devsper 2.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devsper/__init__.py +14 -0
- devsper/agents/a2a/__init__.py +27 -0
- devsper/agents/a2a/client.py +126 -0
- devsper/agents/a2a/discovery.py +24 -0
- devsper/agents/a2a/server.py +128 -0
- devsper/agents/a2a/tool_adapter.py +68 -0
- devsper/agents/a2a/types.py +49 -0
- devsper/agents/agent.py +602 -0
- devsper/agents/critic.py +80 -0
- devsper/agents/message_bus.py +124 -0
- devsper/agents/roles.py +181 -0
- devsper/agents/run_agent.py +78 -0
- devsper/analytics/__init__.py +5 -0
- devsper/analytics/tool_analytics.py +78 -0
- devsper/audit/__init__.py +5 -0
- devsper/audit/logger.py +214 -0
- devsper/bus/__init__.py +29 -0
- devsper/bus/backends/__init__.py +5 -0
- devsper/bus/backends/base.py +38 -0
- devsper/bus/backends/memory.py +55 -0
- devsper/bus/backends/redis.py +146 -0
- devsper/bus/message.py +56 -0
- devsper/bus/schema_version.py +3 -0
- devsper/bus/topics.py +19 -0
- devsper/cache/__init__.py +6 -0
- devsper/cache/embedding_index.py +98 -0
- devsper/cache/hashing.py +24 -0
- devsper/cache/store.py +153 -0
- devsper/cache/task_cache.py +191 -0
- devsper/cli/__init__.py +6 -0
- devsper/cli/commands/reg.py +733 -0
- devsper/cli/github_oauth.py +157 -0
- devsper/cli/init.py +637 -0
- devsper/cli/main.py +2956 -0
- devsper/cli/run_progress.py +103 -0
- devsper/cli/ui/__init__.py +65 -0
- devsper/cli/ui/components.py +94 -0
- devsper/cli/ui/errors.py +104 -0
- devsper/cli/ui/logging.py +120 -0
- devsper/cli/ui/onboarding.py +102 -0
- devsper/cli/ui/progress.py +43 -0
- devsper/cli/ui/run_view.py +308 -0
- devsper/cli/ui/theme.py +40 -0
- devsper/cluster/__init__.py +29 -0
- devsper/cluster/election.py +84 -0
- devsper/cluster/local.py +97 -0
- devsper/cluster/node_info.py +77 -0
- devsper/cluster/registry.py +71 -0
- devsper/cluster/router.py +117 -0
- devsper/cluster/state_backend.py +105 -0
- devsper/compliance/__init__.py +5 -0
- devsper/compliance/pii.py +147 -0
- devsper/config/__init__.py +52 -0
- devsper/config/config_loader.py +121 -0
- devsper/config/defaults.py +77 -0
- devsper/config/resolver.py +342 -0
- devsper/config/schema.py +237 -0
- devsper/credentials/__init__.py +19 -0
- devsper/credentials/cli.py +197 -0
- devsper/credentials/migration.py +124 -0
- devsper/credentials/store.py +142 -0
- devsper/dashboard/__init__.py +9 -0
- devsper/dashboard/dashboard.py +87 -0
- devsper/dev/__init__.py +25 -0
- devsper/dev/builder.py +195 -0
- devsper/dev/debugger.py +95 -0
- devsper/dev/repo_index.py +138 -0
- devsper/dev/sandbox.py +203 -0
- devsper/dev/scaffold.py +122 -0
- devsper/embeddings/__init__.py +5 -0
- devsper/embeddings/service.py +36 -0
- devsper/explainability/__init__.py +14 -0
- devsper/explainability/decision_tree.py +104 -0
- devsper/explainability/rationale.py +38 -0
- devsper/explainability/simulation.py +56 -0
- devsper/hitl/__init__.py +13 -0
- devsper/hitl/approval.py +160 -0
- devsper/hitl/escalation.py +95 -0
- devsper/intelligence/__init__.py +9 -0
- devsper/intelligence/adaptation.py +88 -0
- devsper/intelligence/analysis/__init__.py +19 -0
- devsper/intelligence/analysis/analyzer.py +71 -0
- devsper/intelligence/analysis/cost_estimator.py +66 -0
- devsper/intelligence/analysis/formatter.py +103 -0
- devsper/intelligence/analysis/run_report.py +402 -0
- devsper/intelligence/learning_engine.py +92 -0
- devsper/intelligence/strategies/__init__.py +23 -0
- devsper/intelligence/strategies/base.py +14 -0
- devsper/intelligence/strategies/code_analysis_strategy.py +33 -0
- devsper/intelligence/strategies/data_science_strategy.py +33 -0
- devsper/intelligence/strategies/document_pipeline_strategy.py +33 -0
- devsper/intelligence/strategies/experiment_strategy.py +33 -0
- devsper/intelligence/strategies/research_strategy.py +34 -0
- devsper/intelligence/strategy_selector.py +84 -0
- devsper/intelligence/synthesis.py +132 -0
- devsper/intelligence/task_optimizer.py +92 -0
- devsper/knowledge/__init__.py +5 -0
- devsper/knowledge/extractor.py +204 -0
- devsper/knowledge/knowledge_graph.py +184 -0
- devsper/knowledge/query.py +285 -0
- devsper/memory/__init__.py +35 -0
- devsper/memory/consolidation.py +138 -0
- devsper/memory/embeddings.py +60 -0
- devsper/memory/memory_index.py +97 -0
- devsper/memory/memory_router.py +62 -0
- devsper/memory/memory_store.py +221 -0
- devsper/memory/memory_types.py +54 -0
- devsper/memory/namespaces.py +45 -0
- devsper/memory/scoring.py +77 -0
- devsper/memory/summarizer.py +52 -0
- devsper/nodes/__init__.py +5 -0
- devsper/nodes/controller.py +449 -0
- devsper/nodes/rpc.py +127 -0
- devsper/nodes/single.py +161 -0
- devsper/nodes/worker.py +506 -0
- devsper/orchestration/__init__.py +19 -0
- devsper/orchestration/meta_planner.py +239 -0
- devsper/orchestration/priority_queue.py +61 -0
- devsper/plugins/__init__.py +19 -0
- devsper/plugins/marketplace/__init__.py +0 -0
- devsper/plugins/plugin_loader.py +70 -0
- devsper/plugins/plugin_registry.py +34 -0
- devsper/plugins/registry.py +83 -0
- devsper/protocols/__init__.py +6 -0
- devsper/providers/__init__.py +17 -0
- devsper/providers/anthropic.py +84 -0
- devsper/providers/base.py +75 -0
- devsper/providers/complexity_router.py +94 -0
- devsper/providers/gemini.py +36 -0
- devsper/providers/github.py +180 -0
- devsper/providers/model_router.py +40 -0
- devsper/providers/openai.py +105 -0
- devsper/providers/router/__init__.py +21 -0
- devsper/providers/router/backends/__init__.py +19 -0
- devsper/providers/router/backends/anthropic_backend.py +111 -0
- devsper/providers/router/backends/custom_backend.py +138 -0
- devsper/providers/router/backends/gemini_backend.py +89 -0
- devsper/providers/router/backends/github_backend.py +165 -0
- devsper/providers/router/backends/ollama_backend.py +104 -0
- devsper/providers/router/backends/openai_backend.py +142 -0
- devsper/providers/router/backends/vllm_backend.py +35 -0
- devsper/providers/router/base.py +60 -0
- devsper/providers/router/factory.py +92 -0
- devsper/providers/router/legacy.py +101 -0
- devsper/providers/router/router.py +135 -0
- devsper/reasoning/__init__.py +12 -0
- devsper/reasoning/graph.py +59 -0
- devsper/reasoning/nodes.py +20 -0
- devsper/reasoning/store.py +67 -0
- devsper/runtime/__init__.py +12 -0
- devsper/runtime/health.py +88 -0
- devsper/runtime/replay.py +53 -0
- devsper/runtime/replay_engine.py +142 -0
- devsper/runtime/run_history.py +204 -0
- devsper/runtime/telemetry.py +116 -0
- devsper/runtime/visualize.py +58 -0
- devsper/sandbox/__init__.py +13 -0
- devsper/sandbox/sandbox.py +161 -0
- devsper/swarm/checkpointer.py +65 -0
- devsper/swarm/executor.py +558 -0
- devsper/swarm/map_reduce.py +44 -0
- devsper/swarm/planner.py +197 -0
- devsper/swarm/prefetcher.py +91 -0
- devsper/swarm/scheduler.py +153 -0
- devsper/swarm/speculation.py +47 -0
- devsper/swarm/swarm.py +562 -0
- devsper/tools/__init__.py +33 -0
- devsper/tools/base.py +29 -0
- devsper/tools/code_intelligence/__init__.py +13 -0
- devsper/tools/code_intelligence/api_surface_extractor.py +73 -0
- devsper/tools/code_intelligence/architecture_analyzer.py +65 -0
- devsper/tools/code_intelligence/codebase_indexer.py +71 -0
- devsper/tools/code_intelligence/dependency_graph_builder.py +67 -0
- devsper/tools/code_intelligence/design_pattern_detector.py +62 -0
- devsper/tools/code_intelligence/large_function_detector.py +68 -0
- devsper/tools/code_intelligence/module_responsibility_mapper.py +56 -0
- devsper/tools/code_intelligence/parallel_codebase_analysis.py +44 -0
- devsper/tools/code_intelligence/refactor_candidate_detector.py +81 -0
- devsper/tools/code_intelligence/repository_semantic_index.py +61 -0
- devsper/tools/code_intelligence/test_coverage_estimator.py +62 -0
- devsper/tools/coding/__init__.py +12 -0
- devsper/tools/coding/analyze_code_complexity.py +48 -0
- devsper/tools/coding/dependency_analyzer.py +42 -0
- devsper/tools/coding/extract_functions.py +38 -0
- devsper/tools/coding/format_python.py +50 -0
- devsper/tools/coding/generate_docstrings.py +40 -0
- devsper/tools/coding/generate_unit_tests.py +42 -0
- devsper/tools/coding/lint_python.py +51 -0
- devsper/tools/coding/refactor_function.py +41 -0
- devsper/tools/coding/repo_structure_map.py +54 -0
- devsper/tools/coding/run_python.py +53 -0
- devsper/tools/data/__init__.py +12 -0
- devsper/tools/data/column_type_detection.py +64 -0
- devsper/tools/data/csv_summary.py +52 -0
- devsper/tools/data/dataframe_filter.py +51 -0
- devsper/tools/data/dataframe_groupby.py +47 -0
- devsper/tools/data/dataframe_stats.py +38 -0
- devsper/tools/data/dataset_sampling.py +55 -0
- devsper/tools/data/dataset_schema.py +45 -0
- devsper/tools/data/json_pretty_print.py +37 -0
- devsper/tools/data/json_query.py +46 -0
- devsper/tools/data/missing_value_report.py +47 -0
- devsper/tools/data_science/__init__.py +13 -0
- devsper/tools/data_science/correlation_heatmap.py +72 -0
- devsper/tools/data_science/dataset_bias_detector.py +49 -0
- devsper/tools/data_science/dataset_distribution_report.py +64 -0
- devsper/tools/data_science/dataset_drift_detector.py +64 -0
- devsper/tools/data_science/dataset_outlier_detector.py +65 -0
- devsper/tools/data_science/dataset_profile.py +76 -0
- devsper/tools/data_science/distributed_dataset_processor.py +54 -0
- devsper/tools/data_science/feature_engineering_suggestions.py +69 -0
- devsper/tools/data_science/feature_importance_estimator.py +82 -0
- devsper/tools/data_science/model_input_validator.py +59 -0
- devsper/tools/data_science/time_series_analyzer.py +57 -0
- devsper/tools/documents/__init__.py +11 -0
- devsper/tools/documents/_docproc.py +56 -0
- devsper/tools/documents/document_to_markdown.py +29 -0
- devsper/tools/documents/extract_document_images.py +39 -0
- devsper/tools/documents/extract_document_text.py +29 -0
- devsper/tools/documents/extract_equations.py +36 -0
- devsper/tools/documents/extract_tables.py +47 -0
- devsper/tools/documents/summarize_document.py +42 -0
- devsper/tools/documents/write_latex_document.py +133 -0
- devsper/tools/documents/write_markdown_document.py +89 -0
- devsper/tools/documents/write_word_document.py +149 -0
- devsper/tools/experiments/__init__.py +13 -0
- devsper/tools/experiments/bootstrap_estimator.py +54 -0
- devsper/tools/experiments/experiment_report_generator.py +50 -0
- devsper/tools/experiments/experiment_tracker.py +36 -0
- devsper/tools/experiments/grid_search_runner.py +50 -0
- devsper/tools/experiments/model_benchmark_runner.py +45 -0
- devsper/tools/experiments/monte_carlo_experiment.py +38 -0
- devsper/tools/experiments/parameter_sweep_runner.py +51 -0
- devsper/tools/experiments/result_comparator.py +58 -0
- devsper/tools/experiments/simulation_runner.py +43 -0
- devsper/tools/experiments/statistical_significance_test.py +56 -0
- devsper/tools/experiments/swarm_map_reduce.py +42 -0
- devsper/tools/filesystem/__init__.py +12 -0
- devsper/tools/filesystem/append_file.py +42 -0
- devsper/tools/filesystem/file_hash.py +40 -0
- devsper/tools/filesystem/file_line_count.py +36 -0
- devsper/tools/filesystem/file_metadata.py +38 -0
- devsper/tools/filesystem/file_preview.py +55 -0
- devsper/tools/filesystem/find_large_files.py +50 -0
- devsper/tools/filesystem/list_directory.py +39 -0
- devsper/tools/filesystem/read_file.py +35 -0
- devsper/tools/filesystem/search_files.py +60 -0
- devsper/tools/filesystem/write_file.py +41 -0
- devsper/tools/flagship/__init__.py +15 -0
- devsper/tools/flagship/distributed_document_analysis.py +77 -0
- devsper/tools/flagship/docproc_corpus_pipeline.py +91 -0
- devsper/tools/flagship/repository_semantic_map.py +99 -0
- devsper/tools/flagship/research_graph_builder.py +111 -0
- devsper/tools/flagship/swarm_experiment_runner.py +86 -0
- devsper/tools/knowledge/__init__.py +10 -0
- devsper/tools/knowledge/citation_graph_builder.py +69 -0
- devsper/tools/knowledge/concept_frequency_analyzer.py +74 -0
- devsper/tools/knowledge/corpus_builder.py +66 -0
- devsper/tools/knowledge/cross_document_entity_linker.py +71 -0
- devsper/tools/knowledge/document_corpus_summary.py +68 -0
- devsper/tools/knowledge/document_topic_extractor.py +58 -0
- devsper/tools/knowledge/knowledge_graph_extractor.py +58 -0
- devsper/tools/knowledge/timeline_extractor.py +59 -0
- devsper/tools/math/__init__.py +12 -0
- devsper/tools/math/calculate_expression.py +52 -0
- devsper/tools/math/correlation.py +44 -0
- devsper/tools/math/distribution_summary.py +39 -0
- devsper/tools/math/histogram.py +53 -0
- devsper/tools/math/linear_regression.py +47 -0
- devsper/tools/math/matrix_multiply.py +38 -0
- devsper/tools/math/mean_std.py +35 -0
- devsper/tools/math/monte_carlo_simulation.py +43 -0
- devsper/tools/math/polynomial_fit.py +40 -0
- devsper/tools/math/random_sample.py +36 -0
- devsper/tools/mcp/__init__.py +23 -0
- devsper/tools/mcp/adapter.py +53 -0
- devsper/tools/mcp/client.py +235 -0
- devsper/tools/mcp/discovery.py +53 -0
- devsper/tools/memory/__init__.py +16 -0
- devsper/tools/memory/delete_memory.py +25 -0
- devsper/tools/memory/list_memory.py +34 -0
- devsper/tools/memory/search_memory.py +36 -0
- devsper/tools/memory/store_memory.py +47 -0
- devsper/tools/memory/summarize_memory.py +41 -0
- devsper/tools/memory/tag_memory.py +47 -0
- devsper/tools/pipelines.py +92 -0
- devsper/tools/registry.py +39 -0
- devsper/tools/research/__init__.py +12 -0
- devsper/tools/research/arxiv_download.py +55 -0
- devsper/tools/research/arxiv_search.py +58 -0
- devsper/tools/research/citation_extractor.py +35 -0
- devsper/tools/research/duckduckgo_search.py +42 -0
- devsper/tools/research/paper_metadata_extractor.py +45 -0
- devsper/tools/research/paper_summarizer.py +41 -0
- devsper/tools/research/research_question_generator.py +39 -0
- devsper/tools/research/topic_cluster.py +46 -0
- devsper/tools/research/web_search.py +47 -0
- devsper/tools/research/wikipedia_lookup.py +50 -0
- devsper/tools/research_advanced/__init__.py +14 -0
- devsper/tools/research_advanced/citation_context_extractor.py +60 -0
- devsper/tools/research_advanced/literature_review_generator.py +79 -0
- devsper/tools/research_advanced/methodology_extractor.py +58 -0
- devsper/tools/research_advanced/paper_contribution_extractor.py +50 -0
- devsper/tools/research_advanced/paper_dataset_identifier.py +49 -0
- devsper/tools/research_advanced/paper_method_comparator.py +62 -0
- devsper/tools/research_advanced/paper_similarity_search.py +69 -0
- devsper/tools/research_advanced/paper_trend_analyzer.py +69 -0
- devsper/tools/research_advanced/parallel_document_analyzer.py +56 -0
- devsper/tools/research_advanced/research_gap_finder.py +71 -0
- devsper/tools/research_advanced/research_topic_mapper.py +69 -0
- devsper/tools/research_advanced/swarm_literature_review.py +58 -0
- devsper/tools/scoring/__init__.py +52 -0
- devsper/tools/scoring/report.py +44 -0
- devsper/tools/scoring/scorer.py +39 -0
- devsper/tools/scoring/selector.py +61 -0
- devsper/tools/scoring/store.py +267 -0
- devsper/tools/selector.py +130 -0
- devsper/tools/system/__init__.py +12 -0
- devsper/tools/system/cpu_usage.py +22 -0
- devsper/tools/system/disk_usage.py +35 -0
- devsper/tools/system/environment_variables.py +29 -0
- devsper/tools/system/memory_usage.py +23 -0
- devsper/tools/system/pip_install.py +44 -0
- devsper/tools/system/pip_search.py +29 -0
- devsper/tools/system/process_list.py +34 -0
- devsper/tools/system/python_package_list.py +40 -0
- devsper/tools/system/run_shell_command.py +51 -0
- devsper/tools/system/system_info.py +26 -0
- devsper/tools/tool_runner.py +122 -0
- devsper/tui/__init__.py +5 -0
- devsper/tui/activity_feed_view.py +73 -0
- devsper/tui/adaptive_tasks_view.py +75 -0
- devsper/tui/agent_role_view.py +35 -0
- devsper/tui/app.py +395 -0
- devsper/tui/dashboard_screen.py +290 -0
- devsper/tui/dev_view.py +99 -0
- devsper/tui/inject_screen.py +73 -0
- devsper/tui/knowledge_graph_view.py +46 -0
- devsper/tui/layout.py +43 -0
- devsper/tui/logs_view.py +83 -0
- devsper/tui/memory_view.py +58 -0
- devsper/tui/performance_view.py +33 -0
- devsper/tui/reasoning_graph_view.py +39 -0
- devsper/tui/results_view.py +139 -0
- devsper/tui/swarm_view.py +37 -0
- devsper/tui/task_detail_screen.py +55 -0
- devsper/tui/task_view.py +103 -0
- devsper/types/event.py +97 -0
- devsper/types/exceptions.py +21 -0
- devsper/types/swarm.py +41 -0
- devsper/types/task.py +80 -0
- devsper/upgrade/__init__.py +21 -0
- devsper/upgrade/changelog.py +124 -0
- devsper/upgrade/cli.py +145 -0
- devsper/upgrade/installer.py +103 -0
- devsper/upgrade/notifier.py +52 -0
- devsper/upgrade/version_check.py +121 -0
- devsper/utils/event_logger.py +88 -0
- devsper/utils/http.py +43 -0
- devsper/utils/models.py +54 -0
- devsper/visualization/__init__.py +5 -0
- devsper/visualization/dag_export.py +67 -0
- devsper/workflow/__init__.py +18 -0
- devsper/workflow/conditions.py +157 -0
- devsper/workflow/context.py +108 -0
- devsper/workflow/loader.py +156 -0
- devsper/workflow/resolver.py +109 -0
- devsper/workflow/runner.py +562 -0
- devsper/workflow/schema.py +63 -0
- devsper/workflow/validator.py +128 -0
- devsper-2.1.6.dist-info/METADATA +346 -0
- devsper-2.1.6.dist-info/RECORD +375 -0
- devsper-2.1.6.dist-info/WHEEL +4 -0
- devsper-2.1.6.dist-info/entry_points.txt +3 -0
- devsper-2.1.6.dist-info/licenses/LICENSE +639 -0
devsper/agents/agent.py
ADDED
|
@@ -0,0 +1,602 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
|
|
8
|
+
from devsper.types.task import Task, TaskStatus
|
|
9
|
+
from devsper.types.event import Event, events
|
|
10
|
+
from devsper.utils.event_logger import EventLog
|
|
11
|
+
from devsper.utils.models import generate
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class AgentRequest:
|
|
16
|
+
"""Serializable input for Agent.run. All context comes in via this object."""
|
|
17
|
+
task: Task
|
|
18
|
+
memory_context: str
|
|
19
|
+
tools: list[str] # tool names only
|
|
20
|
+
model: str
|
|
21
|
+
system_prompt: str
|
|
22
|
+
prefetch_used: bool
|
|
23
|
+
|
|
24
|
+
def to_dict(self) -> dict:
|
|
25
|
+
return {
|
|
26
|
+
"task": self.task.to_dict(),
|
|
27
|
+
"memory_context": self.memory_context,
|
|
28
|
+
"tools": list(self.tools),
|
|
29
|
+
"model": self.model,
|
|
30
|
+
"system_prompt": self.system_prompt,
|
|
31
|
+
"prefetch_used": self.prefetch_used,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def from_dict(cls, data: dict) -> "AgentRequest":
|
|
36
|
+
return cls(
|
|
37
|
+
task=Task.from_dict(data["task"]),
|
|
38
|
+
memory_context=data.get("memory_context", ""),
|
|
39
|
+
tools=list(data.get("tools", [])),
|
|
40
|
+
model=data.get("model", "mock"),
|
|
41
|
+
system_prompt=data.get("system_prompt", ""),
|
|
42
|
+
prefetch_used=data.get("prefetch_used", False),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class AgentResponse:
|
|
48
|
+
"""Serializable output from Agent.run."""
|
|
49
|
+
task_id: str
|
|
50
|
+
result: str
|
|
51
|
+
tools_called: list[str]
|
|
52
|
+
broadcasts: list[str]
|
|
53
|
+
tokens_used: int | None
|
|
54
|
+
duration_seconds: float
|
|
55
|
+
error: str | None
|
|
56
|
+
success: bool
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> dict:
|
|
59
|
+
return {
|
|
60
|
+
"task_id": self.task_id,
|
|
61
|
+
"result": self.result if self.result is not None else "",
|
|
62
|
+
"tools_called": list(self.tools_called),
|
|
63
|
+
"broadcasts": list(self.broadcasts),
|
|
64
|
+
"tokens_used": self.tokens_used,
|
|
65
|
+
"duration_seconds": self.duration_seconds,
|
|
66
|
+
"error": self.error,
|
|
67
|
+
"success": self.success,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def from_dict(cls, data: dict) -> "AgentResponse":
|
|
72
|
+
return cls(
|
|
73
|
+
task_id=data["task_id"],
|
|
74
|
+
result=data.get("result", ""),
|
|
75
|
+
tools_called=list(data.get("tools_called", [])),
|
|
76
|
+
broadcasts=list(data.get("broadcasts", [])),
|
|
77
|
+
tokens_used=data.get("tokens_used"),
|
|
78
|
+
duration_seconds=float(data.get("duration_seconds", 0.0)),
|
|
79
|
+
error=data.get("error"),
|
|
80
|
+
success=bool(data.get("success", False)),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
BROADCAST_PREFIX = re.compile(r"^\s*BROADCAST:\s*(.+?)(?=\n\n|\n[A-Z]|\Z)", re.DOTALL | re.IGNORECASE)
|
|
84
|
+
|
|
85
|
+
PROMPT_TEMPLATE = """{role_prefix}
|
|
86
|
+
|
|
87
|
+
Task:
|
|
88
|
+
{task_description}
|
|
89
|
+
{memory_section}
|
|
90
|
+
{message_bus_section}
|
|
91
|
+
|
|
92
|
+
Produce the best possible output. Output only the requested content; do not describe your role or other projects."""
|
|
93
|
+
|
|
94
|
+
PROMPT_TEMPLATE_WITH_TOOLS = """{role_prefix} You may use tools.
|
|
95
|
+
|
|
96
|
+
Task:
|
|
97
|
+
{task_description}
|
|
98
|
+
{memory_section}
|
|
99
|
+
{message_bus_section}
|
|
100
|
+
|
|
101
|
+
Output only the requested content; do not describe your role or other projects.
|
|
102
|
+
|
|
103
|
+
AVAILABLE TOOLS:
|
|
104
|
+
{tools_section}
|
|
105
|
+
|
|
106
|
+
To call a tool, output exactly:
|
|
107
|
+
TOOL: <tool_name>
|
|
108
|
+
INPUT: <json object with arguments>
|
|
109
|
+
|
|
110
|
+
When the task requires listing files, reading/writing files, or running commands, you MUST use the appropriate tool above (output TOOL: and INPUT:). Do not describe what you would do or say you cannot do it—call the tool and use its result. Use the exact tool name as shown in the list (e.g. filesystem.list_dir for listing a directory).
|
|
111
|
+
|
|
112
|
+
You are in an automated workflow. Do not ask the user for their OS, environment, or to specify paths—use the task description and call tools with the paths/data given there.
|
|
113
|
+
|
|
114
|
+
If you do not need a tool, respond with your final answer only (no TOOL: line).
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
BROADCAST_INSTRUCTION = """
|
|
118
|
+
If you discover a fact, constraint, or finding that would help other agents working on related tasks, begin your response with:
|
|
119
|
+
BROADCAST: <one sentence finding>
|
|
120
|
+
Your actual response follows on the next line.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
TOOL_NAME_PATTERN = re.compile(r"TOOL:\s*(\S+)", re.IGNORECASE)
|
|
124
|
+
INPUT_PREFIX = re.compile(r"INPUT:\s*", re.IGNORECASE)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _format_tools_section(tools: list | None = None) -> str:
|
|
128
|
+
if tools is None:
|
|
129
|
+
from devsper.tools.registry import list_tools
|
|
130
|
+
tools = list_tools()
|
|
131
|
+
lines = []
|
|
132
|
+
for t in tools:
|
|
133
|
+
lines.append(f"- {t.name}: {t.description}")
|
|
134
|
+
lines.append(f" input_schema: {json.dumps(t.input_schema)}")
|
|
135
|
+
return "\n".join(lines)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _get_tools_by_names(names: list[str]) -> list:
|
|
139
|
+
"""Resolve tool names to tool objects from registry."""
|
|
140
|
+
from devsper.tools.registry import get
|
|
141
|
+
out = []
|
|
142
|
+
for n in names:
|
|
143
|
+
t = get(n)
|
|
144
|
+
if t is not None:
|
|
145
|
+
out.append(t)
|
|
146
|
+
return out
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _parse_tool_call(text: str) -> tuple[str | None, dict | None]:
|
|
150
|
+
"""Return (tool_name, args) if a tool call is found, else (None, None)."""
|
|
151
|
+
name_m = TOOL_NAME_PATTERN.search(text)
|
|
152
|
+
if not name_m:
|
|
153
|
+
return None, None
|
|
154
|
+
name = name_m.group(1).strip()
|
|
155
|
+
after_name = text[name_m.end() :]
|
|
156
|
+
input_m = INPUT_PREFIX.search(after_name)
|
|
157
|
+
if not input_m:
|
|
158
|
+
return None, None
|
|
159
|
+
start = input_m.end()
|
|
160
|
+
rest = after_name[start:].lstrip()
|
|
161
|
+
if not rest.startswith("{"):
|
|
162
|
+
return name, {}
|
|
163
|
+
depth = 0
|
|
164
|
+
end = 0
|
|
165
|
+
for i, c in enumerate(rest):
|
|
166
|
+
if c == "{":
|
|
167
|
+
depth += 1
|
|
168
|
+
elif c == "}":
|
|
169
|
+
depth -= 1
|
|
170
|
+
if depth == 0:
|
|
171
|
+
end = i + 1
|
|
172
|
+
break
|
|
173
|
+
if end == 0:
|
|
174
|
+
return name, {}
|
|
175
|
+
try:
|
|
176
|
+
args = json.loads(rest[:end])
|
|
177
|
+
except json.JSONDecodeError:
|
|
178
|
+
return name, {}
|
|
179
|
+
return name, args if isinstance(args, dict) else {}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _parse_all_tool_calls(text: str) -> list[tuple[str, dict]]:
|
|
183
|
+
"""Return all (tool_name, args) pairs found in text (multiple TOOL:/INPUT: blocks)."""
|
|
184
|
+
out: list[tuple[str, dict]] = []
|
|
185
|
+
rest = text
|
|
186
|
+
while True:
|
|
187
|
+
name_m = TOOL_NAME_PATTERN.search(rest)
|
|
188
|
+
if not name_m:
|
|
189
|
+
break
|
|
190
|
+
name = name_m.group(1).strip()
|
|
191
|
+
after_name = rest[name_m.end() :]
|
|
192
|
+
input_m = INPUT_PREFIX.search(after_name)
|
|
193
|
+
if not input_m:
|
|
194
|
+
break
|
|
195
|
+
start = input_m.end()
|
|
196
|
+
rest = after_name[start:].lstrip()
|
|
197
|
+
if not rest.startswith("{"):
|
|
198
|
+
out.append((name, {}))
|
|
199
|
+
continue
|
|
200
|
+
depth = 0
|
|
201
|
+
end = 0
|
|
202
|
+
for i, c in enumerate(rest):
|
|
203
|
+
if c == "{":
|
|
204
|
+
depth += 1
|
|
205
|
+
elif c == "}":
|
|
206
|
+
depth -= 1
|
|
207
|
+
if depth == 0:
|
|
208
|
+
end = i + 1
|
|
209
|
+
break
|
|
210
|
+
if end == 0:
|
|
211
|
+
out.append((name, {}))
|
|
212
|
+
continue
|
|
213
|
+
try:
|
|
214
|
+
args = json.loads(rest[:end])
|
|
215
|
+
except json.JSONDecodeError:
|
|
216
|
+
args = {}
|
|
217
|
+
out.append((name, args if isinstance(args, dict) else {}))
|
|
218
|
+
rest = rest[end:].lstrip()
|
|
219
|
+
return out
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class Agent:
|
|
223
|
+
def __init__(
|
|
224
|
+
self,
|
|
225
|
+
model_name: str = "gpt-4o",
|
|
226
|
+
event_log: EventLog | None = None,
|
|
227
|
+
use_tools: bool = False,
|
|
228
|
+
max_tool_iterations: int = 5,
|
|
229
|
+
memory_router=None,
|
|
230
|
+
store_result_to_memory: bool = False,
|
|
231
|
+
reasoning_store=None,
|
|
232
|
+
user_task: str | None = None,
|
|
233
|
+
parallel_tools: bool = True,
|
|
234
|
+
message_bus=None,
|
|
235
|
+
audit_logger=None,
|
|
236
|
+
audit_run_id: str = "",
|
|
237
|
+
):
|
|
238
|
+
self.model_name = model_name
|
|
239
|
+
self.event_log = event_log or EventLog()
|
|
240
|
+
self.use_tools = use_tools
|
|
241
|
+
self.max_tool_iterations = max_tool_iterations
|
|
242
|
+
self.memory_router = memory_router
|
|
243
|
+
self.store_result_to_memory = store_result_to_memory
|
|
244
|
+
self.reasoning_store = reasoning_store
|
|
245
|
+
self.user_task = user_task
|
|
246
|
+
self.parallel_tools = parallel_tools and os.environ.get("DEVSPER_DISABLE_PARALLEL_TOOLS", "").strip() != "1"
|
|
247
|
+
self.message_bus = message_bus
|
|
248
|
+
self.audit_logger = audit_logger
|
|
249
|
+
self.audit_run_id = audit_run_id or ""
|
|
250
|
+
|
|
251
|
+
def run(self, request: AgentRequest) -> AgentResponse:
|
|
252
|
+
"""Stateless run: all context in AgentRequest, all output in AgentResponse."""
|
|
253
|
+
import time
|
|
254
|
+
t0 = time.perf_counter()
|
|
255
|
+
task_id = request.task.id
|
|
256
|
+
try:
|
|
257
|
+
self._emit(events.AGENT_STARTED, {"task_id": task_id})
|
|
258
|
+
self._emit(events.TASK_STARTED, {"task_id": task_id})
|
|
259
|
+
|
|
260
|
+
memory_section = ""
|
|
261
|
+
if request.memory_context:
|
|
262
|
+
memory_section = "\n\nRELEVANT MEMORY\n(previous research notes etc.)\n\n" + request.memory_context
|
|
263
|
+
|
|
264
|
+
if self.use_tools and request.tools:
|
|
265
|
+
tools_objs = _get_tools_by_names(request.tools)
|
|
266
|
+
text, tools_called = self._run_with_tools_for_request(
|
|
267
|
+
request, memory_section, tools_objs
|
|
268
|
+
)
|
|
269
|
+
else:
|
|
270
|
+
prompt = PROMPT_TEMPLATE.format(
|
|
271
|
+
role_prefix=request.system_prompt,
|
|
272
|
+
task_description=request.task.description,
|
|
273
|
+
memory_section=memory_section,
|
|
274
|
+
message_bus_section="",
|
|
275
|
+
)
|
|
276
|
+
text = generate(request.model, prompt)
|
|
277
|
+
tools_called = []
|
|
278
|
+
|
|
279
|
+
text, broadcasts = self._strip_broadcast_and_collect(task_id, text)
|
|
280
|
+
|
|
281
|
+
if self.store_result_to_memory and text and getattr(self.memory_router, "store", None):
|
|
282
|
+
self._store_result_to_memory(request.task, text)
|
|
283
|
+
if self.reasoning_store and text:
|
|
284
|
+
try:
|
|
285
|
+
node = self.reasoning_store.add_node(
|
|
286
|
+
agent_id=getattr(request.task, "role", "") or "agent",
|
|
287
|
+
task_id=task_id,
|
|
288
|
+
content=text[:10000],
|
|
289
|
+
)
|
|
290
|
+
self._emit(events.REASONING_NODE_ADDED, {"node_id": node.id, "task_id": task_id})
|
|
291
|
+
except Exception:
|
|
292
|
+
pass
|
|
293
|
+
self._emit(events.TASK_COMPLETED, {"task_id": task_id})
|
|
294
|
+
self._emit(events.AGENT_FINISHED, {"task_id": task_id})
|
|
295
|
+
|
|
296
|
+
duration = time.perf_counter() - t0
|
|
297
|
+
return AgentResponse(
|
|
298
|
+
task_id=task_id,
|
|
299
|
+
result=text,
|
|
300
|
+
tools_called=tools_called,
|
|
301
|
+
broadcasts=broadcasts,
|
|
302
|
+
tokens_used=None,
|
|
303
|
+
duration_seconds=duration,
|
|
304
|
+
error=None,
|
|
305
|
+
success=True,
|
|
306
|
+
)
|
|
307
|
+
except Exception as e:
|
|
308
|
+
duration = time.perf_counter() - t0
|
|
309
|
+
self._emit(events.TASK_FAILED, {"task_id": task_id, "error": str(e)})
|
|
310
|
+
return AgentResponse(
|
|
311
|
+
task_id=task_id,
|
|
312
|
+
result="",
|
|
313
|
+
tools_called=[],
|
|
314
|
+
broadcasts=[],
|
|
315
|
+
tokens_used=None,
|
|
316
|
+
duration_seconds=duration,
|
|
317
|
+
error=str(e),
|
|
318
|
+
success=False,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
def build_request(
|
|
322
|
+
self,
|
|
323
|
+
task: Task,
|
|
324
|
+
model_override: str | None = None,
|
|
325
|
+
prefetch_result=None,
|
|
326
|
+
) -> AgentRequest:
|
|
327
|
+
"""Build AgentRequest for this task (for use with sandbox or external runner)."""
|
|
328
|
+
memory_section = ""
|
|
329
|
+
if prefetch_result and getattr(prefetch_result, "memory_context", None):
|
|
330
|
+
ctx = prefetch_result.memory_context
|
|
331
|
+
memory_section = ctx or ""
|
|
332
|
+
elif self.memory_router and task.description:
|
|
333
|
+
try:
|
|
334
|
+
query = task.description
|
|
335
|
+
if self.user_task and self.user_task.strip():
|
|
336
|
+
query = f"{self.user_task.strip()} {task.description}".strip()
|
|
337
|
+
memory_section = self.memory_router.get_memory_context(query) or ""
|
|
338
|
+
except Exception:
|
|
339
|
+
pass
|
|
340
|
+
message_bus_section = ""
|
|
341
|
+
if self.message_bus:
|
|
342
|
+
message_bus_section = self.message_bus.get_context_sync(task.id) or ""
|
|
343
|
+
if message_bus_section:
|
|
344
|
+
memory_section = (memory_section + "\n\n" + message_bus_section).strip()
|
|
345
|
+
from devsper.agents.roles import get_role_config
|
|
346
|
+
role_config = get_role_config(getattr(task, "role", None))
|
|
347
|
+
broadcast_instruction = BROADCAST_INSTRUCTION if (self.message_bus and message_bus_section) else ""
|
|
348
|
+
system_prompt = role_config.prompt_prefix + broadcast_instruction if broadcast_instruction else role_config.prompt_prefix
|
|
349
|
+
tools_names: list[str] = []
|
|
350
|
+
if self.use_tools:
|
|
351
|
+
if prefetch_result and getattr(prefetch_result, "tools", None):
|
|
352
|
+
tools_names = [t.name for t in prefetch_result.tools]
|
|
353
|
+
else:
|
|
354
|
+
try:
|
|
355
|
+
from devsper.tools.selector import get_tools_for_task
|
|
356
|
+
from devsper.tools.scoring import get_default_score_store
|
|
357
|
+
score_store = get_default_score_store()
|
|
358
|
+
except Exception:
|
|
359
|
+
score_store = None
|
|
360
|
+
tools = get_tools_for_task(
|
|
361
|
+
task.description or "",
|
|
362
|
+
role=getattr(task, "role", None),
|
|
363
|
+
score_store=score_store,
|
|
364
|
+
)
|
|
365
|
+
tools_names = [t.name for t in tools]
|
|
366
|
+
model = model_override if model_override else self.model_name
|
|
367
|
+
return AgentRequest(
|
|
368
|
+
task=task,
|
|
369
|
+
memory_context=memory_section,
|
|
370
|
+
tools=tools_names,
|
|
371
|
+
model=model,
|
|
372
|
+
system_prompt=system_prompt,
|
|
373
|
+
prefetch_used=prefetch_result is not None,
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
def apply_response(self, task: Task, response: AgentResponse) -> None:
|
|
377
|
+
"""Apply AgentResponse to task (status, result, error)."""
|
|
378
|
+
task.status = TaskStatus.COMPLETED if response.success else TaskStatus.FAILED
|
|
379
|
+
task.result = response.result
|
|
380
|
+
if response.error:
|
|
381
|
+
task.error = response.error
|
|
382
|
+
|
|
383
|
+
def run_task(
|
|
384
|
+
self,
|
|
385
|
+
task: Task,
|
|
386
|
+
model_override: str | None = None,
|
|
387
|
+
prefetch_result=None,
|
|
388
|
+
) -> str:
|
|
389
|
+
"""Backward-compat: build AgentRequest from task and prefetch, run, mutate task, return result."""
|
|
390
|
+
request = self.build_request(task, model_override=model_override, prefetch_result=prefetch_result)
|
|
391
|
+
response = self.run(request)
|
|
392
|
+
self.apply_response(task, response)
|
|
393
|
+
return response.result
|
|
394
|
+
|
|
395
|
+
def _strip_broadcast_and_collect(self, task_id: str, text: str) -> tuple[str, list[str]]:
|
|
396
|
+
"""If text starts with BROADCAST:, optionally emit to message_bus, strip; return (rest, list of findings)."""
|
|
397
|
+
collected: list[str] = []
|
|
398
|
+
rest = text
|
|
399
|
+
while rest:
|
|
400
|
+
m = BROADCAST_PREFIX.match(rest)
|
|
401
|
+
if not m:
|
|
402
|
+
break
|
|
403
|
+
finding = m.group(1).strip()
|
|
404
|
+
collected.append(finding)
|
|
405
|
+
if self.message_bus:
|
|
406
|
+
self.message_bus.broadcast_sync(task_id, finding, tags=[])
|
|
407
|
+
rest = rest[m.end():].lstrip()
|
|
408
|
+
return (rest, collected)
|
|
409
|
+
|
|
410
|
+
def _strip_broadcast_and_emit(self, task: Task, text: str) -> str:
|
|
411
|
+
"""If text starts with BROADCAST:, emit to message_bus and strip; return rest."""
|
|
412
|
+
rest, _ = self._strip_broadcast_and_collect(task.id, text or "")
|
|
413
|
+
return rest
|
|
414
|
+
|
|
415
|
+
def _store_result_to_memory(self, task: Task, text: str) -> None:
|
|
416
|
+
from devsper.memory.memory_store import MemoryStore
|
|
417
|
+
from devsper.memory.memory_types import MemoryRecord, MemoryType
|
|
418
|
+
from devsper.memory.memory_store import generate_memory_id
|
|
419
|
+
from devsper.memory.memory_index import MemoryIndex
|
|
420
|
+
|
|
421
|
+
store = getattr(self.memory_router, "store", None)
|
|
422
|
+
if not isinstance(store, MemoryStore):
|
|
423
|
+
return
|
|
424
|
+
record = MemoryRecord(
|
|
425
|
+
id=generate_memory_id(),
|
|
426
|
+
memory_type=MemoryType.SEMANTIC,
|
|
427
|
+
source_task=task.id,
|
|
428
|
+
content=text[:10000],
|
|
429
|
+
tags=["agent_result", task.id],
|
|
430
|
+
)
|
|
431
|
+
index = getattr(self.memory_router, "index", None)
|
|
432
|
+
if isinstance(index, MemoryIndex):
|
|
433
|
+
record = index.ensure_embedding(record)
|
|
434
|
+
store.store(record)
|
|
435
|
+
|
|
436
|
+
def _run_with_tools_for_request(
|
|
437
|
+
self,
|
|
438
|
+
request: AgentRequest,
|
|
439
|
+
memory_section: str,
|
|
440
|
+
tools_list: list,
|
|
441
|
+
) -> tuple[str, list[str]]:
|
|
442
|
+
"""Run tool loop for a request; return (result_text, list of tool names called)."""
|
|
443
|
+
from devsper.tools.tool_runner import run_tool
|
|
444
|
+
|
|
445
|
+
task = request.task
|
|
446
|
+
task_type = getattr(task, "role", None) or "general"
|
|
447
|
+
tools_section = _format_tools_section(tools_list)
|
|
448
|
+
prompt = PROMPT_TEMPLATE_WITH_TOOLS.format(
|
|
449
|
+
role_prefix=request.system_prompt,
|
|
450
|
+
task_description=task.description,
|
|
451
|
+
memory_section=memory_section,
|
|
452
|
+
message_bus_section="",
|
|
453
|
+
tools_section=tools_section,
|
|
454
|
+
)
|
|
455
|
+
conversation = [prompt]
|
|
456
|
+
tools_called: list[str] = []
|
|
457
|
+
for _ in range(self.max_tool_iterations):
|
|
458
|
+
full_prompt = "\n\n".join(conversation)
|
|
459
|
+
response = generate(request.model, full_prompt)
|
|
460
|
+
tool_calls = _parse_all_tool_calls(response)
|
|
461
|
+
if not tool_calls:
|
|
462
|
+
return (response.strip(), tools_called)
|
|
463
|
+
for (tool_name, _) in tool_calls:
|
|
464
|
+
tools_called.append(tool_name)
|
|
465
|
+
if len(tool_calls) == 1 or not self.parallel_tools:
|
|
466
|
+
tool_name, tool_args = tool_calls[0]
|
|
467
|
+
result = run_tool(tool_name, tool_args, task_type=task_type)
|
|
468
|
+
self._emit_tool_called_audit(task.id, tool_name, result)
|
|
469
|
+
self._emit(
|
|
470
|
+
events.TOOL_CALLED,
|
|
471
|
+
{"task_id": task.id, "tool": tool_name, "result_preview": (result or "")[:200]},
|
|
472
|
+
)
|
|
473
|
+
conversation.append(f"Response:\n{response}")
|
|
474
|
+
conversation.append(f"Tool result ({tool_name}):\n{result or ''}")
|
|
475
|
+
continue
|
|
476
|
+
results = self._run_tools_parallel_sync(tool_calls, task_type, task)
|
|
477
|
+
conversation.append(f"Response:\n{response}")
|
|
478
|
+
for (tool_name, _), result in zip(tool_calls, results):
|
|
479
|
+
self._emit_tool_called_audit(task.id, tool_name, result)
|
|
480
|
+
self._emit(
|
|
481
|
+
events.TOOL_CALLED,
|
|
482
|
+
{"task_id": task.id, "tool": tool_name, "result_preview": (result or "")[:200]},
|
|
483
|
+
)
|
|
484
|
+
conversation.append(f"Tool result ({tool_name}):\n{result or ''}")
|
|
485
|
+
return (conversation[-1].strip() or "Max tool iterations reached.", tools_called)
|
|
486
|
+
|
|
487
|
+
def _run_with_tools(
|
|
488
|
+
self,
|
|
489
|
+
task: Task,
|
|
490
|
+
memory_section: str = "",
|
|
491
|
+
role_prefix: str = "",
|
|
492
|
+
model_name: str | None = None,
|
|
493
|
+
tools_list: list | None = None,
|
|
494
|
+
message_bus_section: str = "",
|
|
495
|
+
) -> str:
|
|
496
|
+
from devsper.tools.selector import get_tools_for_task
|
|
497
|
+
from devsper.tools.tool_runner import run_tool
|
|
498
|
+
|
|
499
|
+
model = model_name or self.model_name
|
|
500
|
+
role = getattr(task, "role", None)
|
|
501
|
+
task_type = role or "general"
|
|
502
|
+
if tools_list is not None:
|
|
503
|
+
tools = tools_list
|
|
504
|
+
else:
|
|
505
|
+
score_store = None
|
|
506
|
+
try:
|
|
507
|
+
from devsper.tools.scoring import get_default_score_store
|
|
508
|
+
score_store = get_default_score_store()
|
|
509
|
+
except Exception:
|
|
510
|
+
score_store = None
|
|
511
|
+
tools = get_tools_for_task(
|
|
512
|
+
task.description if task else "",
|
|
513
|
+
role=role,
|
|
514
|
+
score_store=score_store,
|
|
515
|
+
)
|
|
516
|
+
tools_section = _format_tools_section(tools)
|
|
517
|
+
prompt = PROMPT_TEMPLATE_WITH_TOOLS.format(
|
|
518
|
+
role_prefix=role_prefix,
|
|
519
|
+
task_description=task.description,
|
|
520
|
+
memory_section=memory_section,
|
|
521
|
+
message_bus_section=message_bus_section,
|
|
522
|
+
tools_section=tools_section,
|
|
523
|
+
)
|
|
524
|
+
conversation = [prompt]
|
|
525
|
+
for _ in range(self.max_tool_iterations):
|
|
526
|
+
full_prompt = "\n\n".join(conversation)
|
|
527
|
+
response = generate(model, full_prompt)
|
|
528
|
+
tool_calls = _parse_all_tool_calls(response)
|
|
529
|
+
if not tool_calls:
|
|
530
|
+
return response.strip()
|
|
531
|
+
if len(tool_calls) == 1 or not self.parallel_tools:
|
|
532
|
+
tool_name, tool_args = tool_calls[0]
|
|
533
|
+
result = run_tool(tool_name, tool_args, task_type=task_type)
|
|
534
|
+
self._emit_tool_called_audit(task.id, tool_name, result)
|
|
535
|
+
self._emit(
|
|
536
|
+
events.TOOL_CALLED,
|
|
537
|
+
{"task_id": task.id, "tool": tool_name, "result_preview": (result or "")[:200]},
|
|
538
|
+
)
|
|
539
|
+
conversation.append(f"Response:\n{response}")
|
|
540
|
+
conversation.append(f"Tool result ({tool_name}):\n{result or ''}")
|
|
541
|
+
continue
|
|
542
|
+
results = self._run_tools_parallel_sync(tool_calls, task_type, task)
|
|
543
|
+
conversation.append(f"Response:\n{response}")
|
|
544
|
+
for (tool_name, _), result in zip(tool_calls, results):
|
|
545
|
+
self._emit_tool_called_audit(task.id, tool_name, result)
|
|
546
|
+
self._emit(
|
|
547
|
+
events.TOOL_CALLED,
|
|
548
|
+
{"task_id": task.id, "tool": tool_name, "result_preview": (result or "")[:200]},
|
|
549
|
+
)
|
|
550
|
+
conversation.append(f"Tool result ({tool_name}):\n{result or ''}")
|
|
551
|
+
return conversation[-1].strip() or "Max tool iterations reached."
|
|
552
|
+
|
|
553
|
+
def _run_tools_parallel_sync(
|
|
554
|
+
self,
|
|
555
|
+
tool_calls: list[tuple[str, dict]],
|
|
556
|
+
task_type: str,
|
|
557
|
+
task: Task,
|
|
558
|
+
) -> list[str]:
|
|
559
|
+
"""Run multiple tool calls in parallel (sync entry point)."""
|
|
560
|
+
from devsper.tools.tool_runner import run_tool
|
|
561
|
+
loop = asyncio.new_event_loop()
|
|
562
|
+
try:
|
|
563
|
+
async def run_one(name: str, args: dict) -> str:
|
|
564
|
+
return await loop.run_in_executor(
|
|
565
|
+
None, lambda n=name, a=args: run_tool(n, a, task_type=task_type)
|
|
566
|
+
)
|
|
567
|
+
async def run_all() -> list[str]:
|
|
568
|
+
tasks = [run_one(name, args) for name, args in tool_calls]
|
|
569
|
+
return list(await asyncio.gather(*tasks, return_exceptions=True))
|
|
570
|
+
raw = loop.run_until_complete(run_all())
|
|
571
|
+
out: list[str] = []
|
|
572
|
+
for r in raw:
|
|
573
|
+
if isinstance(r, Exception):
|
|
574
|
+
out.append(f"Tool error: {type(r).__name__}: {r}")
|
|
575
|
+
else:
|
|
576
|
+
out.append(r or "")
|
|
577
|
+
return out
|
|
578
|
+
finally:
|
|
579
|
+
loop.close()
|
|
580
|
+
|
|
581
|
+
def _emit_tool_called_audit(self, task_id: str, tool_name: str, result: str) -> None:
|
|
582
|
+
if not self.audit_logger or not self.audit_run_id:
|
|
583
|
+
return
|
|
584
|
+
try:
|
|
585
|
+
from devsper.audit.logger import make_audit_record
|
|
586
|
+
rec = make_audit_record(
|
|
587
|
+
run_id=self.audit_run_id,
|
|
588
|
+
task_id=task_id,
|
|
589
|
+
event_type="TOOL_CALLED",
|
|
590
|
+
actor=task_id,
|
|
591
|
+
resource=tool_name,
|
|
592
|
+
input_text=tool_name,
|
|
593
|
+
output_text=(result or "")[:2000],
|
|
594
|
+
)
|
|
595
|
+
self.audit_logger.log(rec)
|
|
596
|
+
except Exception:
|
|
597
|
+
pass
|
|
598
|
+
|
|
599
|
+
def _emit(self, event_type: events, payload: dict) -> None:
|
|
600
|
+
self.event_log.append_event(
|
|
601
|
+
Event(timestamp=datetime.now(timezone.utc), type=event_type, payload=payload)
|
|
602
|
+
)
|
devsper/agents/critic.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Critic agent: lightweight second-pass reviewer that scores task results and optionally requests retry.
|
|
3
|
+
Always runs on a fast/cheap model. v1.7.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
from devsper.types.task import Task
|
|
11
|
+
from devsper.utils.models import generate
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class CritiqueResult:
|
|
16
|
+
score: float
|
|
17
|
+
issues: list[str]
|
|
18
|
+
retry: bool
|
|
19
|
+
raw: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CriticAgent:
|
|
23
|
+
"""
|
|
24
|
+
Lightweight second-pass agent that scores a task result and optionally
|
|
25
|
+
requests a retry. Always runs on a fast/cheap model.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
ELIGIBLE_ROLES = {"research", "analysis", "code", "backend", "frontend"}
|
|
29
|
+
SCORE_THRESHOLD = 0.70 # below this, request retry
|
|
30
|
+
MAX_CRITIQUES = 1 # never critique more than once per task
|
|
31
|
+
|
|
32
|
+
CRITIC_SYSTEM = """You are a quality reviewer. Score this task result 0.0-1.0 on:
|
|
33
|
+
- completeness (did it address the task fully?),
|
|
34
|
+
- accuracy (are claims reasonable?),
|
|
35
|
+
- actionability (is the output usable?).
|
|
36
|
+
|
|
37
|
+
Respond ONLY with JSON: {"score": 0.0-1.0, "issues": ["...", ...], "retry": true/false}
|
|
38
|
+
Set retry=true only if score < 0.70 AND there are fixable issues."""
|
|
39
|
+
|
|
40
|
+
def __init__(self, event_log=None):
|
|
41
|
+
self.event_log = event_log
|
|
42
|
+
|
|
43
|
+
async def critique(self, task: Task, result: str, model: str) -> CritiqueResult:
|
|
44
|
+
user_part = f"Task: {task.description}\n\nResult: {(result or '')[:8000]}"
|
|
45
|
+
full_prompt = f"{self.CRITIC_SYSTEM}\n\nUser:\n{user_part}"
|
|
46
|
+
raw = generate(model, full_prompt)
|
|
47
|
+
return self._parse_critique(raw or "{}")
|
|
48
|
+
|
|
49
|
+
def _parse_critique(self, raw: str) -> CritiqueResult:
|
|
50
|
+
try:
|
|
51
|
+
# Extract JSON from response (allow markdown code fence)
|
|
52
|
+
stripped = raw.strip()
|
|
53
|
+
json_match = re.search(r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", stripped, re.DOTALL)
|
|
54
|
+
if json_match:
|
|
55
|
+
data = json.loads(json_match.group(0))
|
|
56
|
+
else:
|
|
57
|
+
data = json.loads(stripped)
|
|
58
|
+
except (json.JSONDecodeError, ValueError):
|
|
59
|
+
return CritiqueResult(
|
|
60
|
+
score=1.0,
|
|
61
|
+
issues=[],
|
|
62
|
+
retry=False,
|
|
63
|
+
raw=raw,
|
|
64
|
+
)
|
|
65
|
+
score = float(data.get("score", 1.0))
|
|
66
|
+
issues = list(data.get("issues", [])) if isinstance(data.get("issues"), list) else []
|
|
67
|
+
retry = bool(data.get("retry", False))
|
|
68
|
+
return CritiqueResult(score=score, issues=issues, retry=retry, raw=raw)
|
|
69
|
+
|
|
70
|
+
async def get_retry_prompt(
|
|
71
|
+
self, task: Task, result: str, critique: CritiqueResult
|
|
72
|
+
) -> str:
|
|
73
|
+
"""Build retry prompt: original task + critique feedback."""
|
|
74
|
+
feedback = "\n".join(f"- {i}" for i in critique.issues[:10]) if critique.issues else "Quality issues identified."
|
|
75
|
+
return (
|
|
76
|
+
f"{task.description}\n\n"
|
|
77
|
+
f"--- Critique (score {critique.score:.2f}) ---\n"
|
|
78
|
+
f"{feedback}\n\n"
|
|
79
|
+
f"Please improve your response addressing the above. Your previous attempt:\n{result[:2000]}"
|
|
80
|
+
)
|