devsper 2.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devsper/__init__.py +14 -0
- devsper/agents/a2a/__init__.py +27 -0
- devsper/agents/a2a/client.py +126 -0
- devsper/agents/a2a/discovery.py +24 -0
- devsper/agents/a2a/server.py +128 -0
- devsper/agents/a2a/tool_adapter.py +68 -0
- devsper/agents/a2a/types.py +49 -0
- devsper/agents/agent.py +602 -0
- devsper/agents/critic.py +80 -0
- devsper/agents/message_bus.py +124 -0
- devsper/agents/roles.py +181 -0
- devsper/agents/run_agent.py +78 -0
- devsper/analytics/__init__.py +5 -0
- devsper/analytics/tool_analytics.py +78 -0
- devsper/audit/__init__.py +5 -0
- devsper/audit/logger.py +214 -0
- devsper/bus/__init__.py +29 -0
- devsper/bus/backends/__init__.py +5 -0
- devsper/bus/backends/base.py +38 -0
- devsper/bus/backends/memory.py +55 -0
- devsper/bus/backends/redis.py +146 -0
- devsper/bus/message.py +56 -0
- devsper/bus/schema_version.py +3 -0
- devsper/bus/topics.py +19 -0
- devsper/cache/__init__.py +6 -0
- devsper/cache/embedding_index.py +98 -0
- devsper/cache/hashing.py +24 -0
- devsper/cache/store.py +153 -0
- devsper/cache/task_cache.py +191 -0
- devsper/cli/__init__.py +6 -0
- devsper/cli/commands/reg.py +733 -0
- devsper/cli/github_oauth.py +157 -0
- devsper/cli/init.py +637 -0
- devsper/cli/main.py +2956 -0
- devsper/cli/run_progress.py +103 -0
- devsper/cli/ui/__init__.py +65 -0
- devsper/cli/ui/components.py +94 -0
- devsper/cli/ui/errors.py +104 -0
- devsper/cli/ui/logging.py +120 -0
- devsper/cli/ui/onboarding.py +102 -0
- devsper/cli/ui/progress.py +43 -0
- devsper/cli/ui/run_view.py +308 -0
- devsper/cli/ui/theme.py +40 -0
- devsper/cluster/__init__.py +29 -0
- devsper/cluster/election.py +84 -0
- devsper/cluster/local.py +97 -0
- devsper/cluster/node_info.py +77 -0
- devsper/cluster/registry.py +71 -0
- devsper/cluster/router.py +117 -0
- devsper/cluster/state_backend.py +105 -0
- devsper/compliance/__init__.py +5 -0
- devsper/compliance/pii.py +147 -0
- devsper/config/__init__.py +52 -0
- devsper/config/config_loader.py +121 -0
- devsper/config/defaults.py +77 -0
- devsper/config/resolver.py +342 -0
- devsper/config/schema.py +237 -0
- devsper/credentials/__init__.py +19 -0
- devsper/credentials/cli.py +197 -0
- devsper/credentials/migration.py +124 -0
- devsper/credentials/store.py +142 -0
- devsper/dashboard/__init__.py +9 -0
- devsper/dashboard/dashboard.py +87 -0
- devsper/dev/__init__.py +25 -0
- devsper/dev/builder.py +195 -0
- devsper/dev/debugger.py +95 -0
- devsper/dev/repo_index.py +138 -0
- devsper/dev/sandbox.py +203 -0
- devsper/dev/scaffold.py +122 -0
- devsper/embeddings/__init__.py +5 -0
- devsper/embeddings/service.py +36 -0
- devsper/explainability/__init__.py +14 -0
- devsper/explainability/decision_tree.py +104 -0
- devsper/explainability/rationale.py +38 -0
- devsper/explainability/simulation.py +56 -0
- devsper/hitl/__init__.py +13 -0
- devsper/hitl/approval.py +160 -0
- devsper/hitl/escalation.py +95 -0
- devsper/intelligence/__init__.py +9 -0
- devsper/intelligence/adaptation.py +88 -0
- devsper/intelligence/analysis/__init__.py +19 -0
- devsper/intelligence/analysis/analyzer.py +71 -0
- devsper/intelligence/analysis/cost_estimator.py +66 -0
- devsper/intelligence/analysis/formatter.py +103 -0
- devsper/intelligence/analysis/run_report.py +402 -0
- devsper/intelligence/learning_engine.py +92 -0
- devsper/intelligence/strategies/__init__.py +23 -0
- devsper/intelligence/strategies/base.py +14 -0
- devsper/intelligence/strategies/code_analysis_strategy.py +33 -0
- devsper/intelligence/strategies/data_science_strategy.py +33 -0
- devsper/intelligence/strategies/document_pipeline_strategy.py +33 -0
- devsper/intelligence/strategies/experiment_strategy.py +33 -0
- devsper/intelligence/strategies/research_strategy.py +34 -0
- devsper/intelligence/strategy_selector.py +84 -0
- devsper/intelligence/synthesis.py +132 -0
- devsper/intelligence/task_optimizer.py +92 -0
- devsper/knowledge/__init__.py +5 -0
- devsper/knowledge/extractor.py +204 -0
- devsper/knowledge/knowledge_graph.py +184 -0
- devsper/knowledge/query.py +285 -0
- devsper/memory/__init__.py +35 -0
- devsper/memory/consolidation.py +138 -0
- devsper/memory/embeddings.py +60 -0
- devsper/memory/memory_index.py +97 -0
- devsper/memory/memory_router.py +62 -0
- devsper/memory/memory_store.py +221 -0
- devsper/memory/memory_types.py +54 -0
- devsper/memory/namespaces.py +45 -0
- devsper/memory/scoring.py +77 -0
- devsper/memory/summarizer.py +52 -0
- devsper/nodes/__init__.py +5 -0
- devsper/nodes/controller.py +449 -0
- devsper/nodes/rpc.py +127 -0
- devsper/nodes/single.py +161 -0
- devsper/nodes/worker.py +506 -0
- devsper/orchestration/__init__.py +19 -0
- devsper/orchestration/meta_planner.py +239 -0
- devsper/orchestration/priority_queue.py +61 -0
- devsper/plugins/__init__.py +19 -0
- devsper/plugins/marketplace/__init__.py +0 -0
- devsper/plugins/plugin_loader.py +70 -0
- devsper/plugins/plugin_registry.py +34 -0
- devsper/plugins/registry.py +83 -0
- devsper/protocols/__init__.py +6 -0
- devsper/providers/__init__.py +17 -0
- devsper/providers/anthropic.py +84 -0
- devsper/providers/base.py +75 -0
- devsper/providers/complexity_router.py +94 -0
- devsper/providers/gemini.py +36 -0
- devsper/providers/github.py +180 -0
- devsper/providers/model_router.py +40 -0
- devsper/providers/openai.py +105 -0
- devsper/providers/router/__init__.py +21 -0
- devsper/providers/router/backends/__init__.py +19 -0
- devsper/providers/router/backends/anthropic_backend.py +111 -0
- devsper/providers/router/backends/custom_backend.py +138 -0
- devsper/providers/router/backends/gemini_backend.py +89 -0
- devsper/providers/router/backends/github_backend.py +165 -0
- devsper/providers/router/backends/ollama_backend.py +104 -0
- devsper/providers/router/backends/openai_backend.py +142 -0
- devsper/providers/router/backends/vllm_backend.py +35 -0
- devsper/providers/router/base.py +60 -0
- devsper/providers/router/factory.py +92 -0
- devsper/providers/router/legacy.py +101 -0
- devsper/providers/router/router.py +135 -0
- devsper/reasoning/__init__.py +12 -0
- devsper/reasoning/graph.py +59 -0
- devsper/reasoning/nodes.py +20 -0
- devsper/reasoning/store.py +67 -0
- devsper/runtime/__init__.py +12 -0
- devsper/runtime/health.py +88 -0
- devsper/runtime/replay.py +53 -0
- devsper/runtime/replay_engine.py +142 -0
- devsper/runtime/run_history.py +204 -0
- devsper/runtime/telemetry.py +116 -0
- devsper/runtime/visualize.py +58 -0
- devsper/sandbox/__init__.py +13 -0
- devsper/sandbox/sandbox.py +161 -0
- devsper/swarm/checkpointer.py +65 -0
- devsper/swarm/executor.py +558 -0
- devsper/swarm/map_reduce.py +44 -0
- devsper/swarm/planner.py +197 -0
- devsper/swarm/prefetcher.py +91 -0
- devsper/swarm/scheduler.py +153 -0
- devsper/swarm/speculation.py +47 -0
- devsper/swarm/swarm.py +562 -0
- devsper/tools/__init__.py +33 -0
- devsper/tools/base.py +29 -0
- devsper/tools/code_intelligence/__init__.py +13 -0
- devsper/tools/code_intelligence/api_surface_extractor.py +73 -0
- devsper/tools/code_intelligence/architecture_analyzer.py +65 -0
- devsper/tools/code_intelligence/codebase_indexer.py +71 -0
- devsper/tools/code_intelligence/dependency_graph_builder.py +67 -0
- devsper/tools/code_intelligence/design_pattern_detector.py +62 -0
- devsper/tools/code_intelligence/large_function_detector.py +68 -0
- devsper/tools/code_intelligence/module_responsibility_mapper.py +56 -0
- devsper/tools/code_intelligence/parallel_codebase_analysis.py +44 -0
- devsper/tools/code_intelligence/refactor_candidate_detector.py +81 -0
- devsper/tools/code_intelligence/repository_semantic_index.py +61 -0
- devsper/tools/code_intelligence/test_coverage_estimator.py +62 -0
- devsper/tools/coding/__init__.py +12 -0
- devsper/tools/coding/analyze_code_complexity.py +48 -0
- devsper/tools/coding/dependency_analyzer.py +42 -0
- devsper/tools/coding/extract_functions.py +38 -0
- devsper/tools/coding/format_python.py +50 -0
- devsper/tools/coding/generate_docstrings.py +40 -0
- devsper/tools/coding/generate_unit_tests.py +42 -0
- devsper/tools/coding/lint_python.py +51 -0
- devsper/tools/coding/refactor_function.py +41 -0
- devsper/tools/coding/repo_structure_map.py +54 -0
- devsper/tools/coding/run_python.py +53 -0
- devsper/tools/data/__init__.py +12 -0
- devsper/tools/data/column_type_detection.py +64 -0
- devsper/tools/data/csv_summary.py +52 -0
- devsper/tools/data/dataframe_filter.py +51 -0
- devsper/tools/data/dataframe_groupby.py +47 -0
- devsper/tools/data/dataframe_stats.py +38 -0
- devsper/tools/data/dataset_sampling.py +55 -0
- devsper/tools/data/dataset_schema.py +45 -0
- devsper/tools/data/json_pretty_print.py +37 -0
- devsper/tools/data/json_query.py +46 -0
- devsper/tools/data/missing_value_report.py +47 -0
- devsper/tools/data_science/__init__.py +13 -0
- devsper/tools/data_science/correlation_heatmap.py +72 -0
- devsper/tools/data_science/dataset_bias_detector.py +49 -0
- devsper/tools/data_science/dataset_distribution_report.py +64 -0
- devsper/tools/data_science/dataset_drift_detector.py +64 -0
- devsper/tools/data_science/dataset_outlier_detector.py +65 -0
- devsper/tools/data_science/dataset_profile.py +76 -0
- devsper/tools/data_science/distributed_dataset_processor.py +54 -0
- devsper/tools/data_science/feature_engineering_suggestions.py +69 -0
- devsper/tools/data_science/feature_importance_estimator.py +82 -0
- devsper/tools/data_science/model_input_validator.py +59 -0
- devsper/tools/data_science/time_series_analyzer.py +57 -0
- devsper/tools/documents/__init__.py +11 -0
- devsper/tools/documents/_docproc.py +56 -0
- devsper/tools/documents/document_to_markdown.py +29 -0
- devsper/tools/documents/extract_document_images.py +39 -0
- devsper/tools/documents/extract_document_text.py +29 -0
- devsper/tools/documents/extract_equations.py +36 -0
- devsper/tools/documents/extract_tables.py +47 -0
- devsper/tools/documents/summarize_document.py +42 -0
- devsper/tools/documents/write_latex_document.py +133 -0
- devsper/tools/documents/write_markdown_document.py +89 -0
- devsper/tools/documents/write_word_document.py +149 -0
- devsper/tools/experiments/__init__.py +13 -0
- devsper/tools/experiments/bootstrap_estimator.py +54 -0
- devsper/tools/experiments/experiment_report_generator.py +50 -0
- devsper/tools/experiments/experiment_tracker.py +36 -0
- devsper/tools/experiments/grid_search_runner.py +50 -0
- devsper/tools/experiments/model_benchmark_runner.py +45 -0
- devsper/tools/experiments/monte_carlo_experiment.py +38 -0
- devsper/tools/experiments/parameter_sweep_runner.py +51 -0
- devsper/tools/experiments/result_comparator.py +58 -0
- devsper/tools/experiments/simulation_runner.py +43 -0
- devsper/tools/experiments/statistical_significance_test.py +56 -0
- devsper/tools/experiments/swarm_map_reduce.py +42 -0
- devsper/tools/filesystem/__init__.py +12 -0
- devsper/tools/filesystem/append_file.py +42 -0
- devsper/tools/filesystem/file_hash.py +40 -0
- devsper/tools/filesystem/file_line_count.py +36 -0
- devsper/tools/filesystem/file_metadata.py +38 -0
- devsper/tools/filesystem/file_preview.py +55 -0
- devsper/tools/filesystem/find_large_files.py +50 -0
- devsper/tools/filesystem/list_directory.py +39 -0
- devsper/tools/filesystem/read_file.py +35 -0
- devsper/tools/filesystem/search_files.py +60 -0
- devsper/tools/filesystem/write_file.py +41 -0
- devsper/tools/flagship/__init__.py +15 -0
- devsper/tools/flagship/distributed_document_analysis.py +77 -0
- devsper/tools/flagship/docproc_corpus_pipeline.py +91 -0
- devsper/tools/flagship/repository_semantic_map.py +99 -0
- devsper/tools/flagship/research_graph_builder.py +111 -0
- devsper/tools/flagship/swarm_experiment_runner.py +86 -0
- devsper/tools/knowledge/__init__.py +10 -0
- devsper/tools/knowledge/citation_graph_builder.py +69 -0
- devsper/tools/knowledge/concept_frequency_analyzer.py +74 -0
- devsper/tools/knowledge/corpus_builder.py +66 -0
- devsper/tools/knowledge/cross_document_entity_linker.py +71 -0
- devsper/tools/knowledge/document_corpus_summary.py +68 -0
- devsper/tools/knowledge/document_topic_extractor.py +58 -0
- devsper/tools/knowledge/knowledge_graph_extractor.py +58 -0
- devsper/tools/knowledge/timeline_extractor.py +59 -0
- devsper/tools/math/__init__.py +12 -0
- devsper/tools/math/calculate_expression.py +52 -0
- devsper/tools/math/correlation.py +44 -0
- devsper/tools/math/distribution_summary.py +39 -0
- devsper/tools/math/histogram.py +53 -0
- devsper/tools/math/linear_regression.py +47 -0
- devsper/tools/math/matrix_multiply.py +38 -0
- devsper/tools/math/mean_std.py +35 -0
- devsper/tools/math/monte_carlo_simulation.py +43 -0
- devsper/tools/math/polynomial_fit.py +40 -0
- devsper/tools/math/random_sample.py +36 -0
- devsper/tools/mcp/__init__.py +23 -0
- devsper/tools/mcp/adapter.py +53 -0
- devsper/tools/mcp/client.py +235 -0
- devsper/tools/mcp/discovery.py +53 -0
- devsper/tools/memory/__init__.py +16 -0
- devsper/tools/memory/delete_memory.py +25 -0
- devsper/tools/memory/list_memory.py +34 -0
- devsper/tools/memory/search_memory.py +36 -0
- devsper/tools/memory/store_memory.py +47 -0
- devsper/tools/memory/summarize_memory.py +41 -0
- devsper/tools/memory/tag_memory.py +47 -0
- devsper/tools/pipelines.py +92 -0
- devsper/tools/registry.py +39 -0
- devsper/tools/research/__init__.py +12 -0
- devsper/tools/research/arxiv_download.py +55 -0
- devsper/tools/research/arxiv_search.py +58 -0
- devsper/tools/research/citation_extractor.py +35 -0
- devsper/tools/research/duckduckgo_search.py +42 -0
- devsper/tools/research/paper_metadata_extractor.py +45 -0
- devsper/tools/research/paper_summarizer.py +41 -0
- devsper/tools/research/research_question_generator.py +39 -0
- devsper/tools/research/topic_cluster.py +46 -0
- devsper/tools/research/web_search.py +47 -0
- devsper/tools/research/wikipedia_lookup.py +50 -0
- devsper/tools/research_advanced/__init__.py +14 -0
- devsper/tools/research_advanced/citation_context_extractor.py +60 -0
- devsper/tools/research_advanced/literature_review_generator.py +79 -0
- devsper/tools/research_advanced/methodology_extractor.py +58 -0
- devsper/tools/research_advanced/paper_contribution_extractor.py +50 -0
- devsper/tools/research_advanced/paper_dataset_identifier.py +49 -0
- devsper/tools/research_advanced/paper_method_comparator.py +62 -0
- devsper/tools/research_advanced/paper_similarity_search.py +69 -0
- devsper/tools/research_advanced/paper_trend_analyzer.py +69 -0
- devsper/tools/research_advanced/parallel_document_analyzer.py +56 -0
- devsper/tools/research_advanced/research_gap_finder.py +71 -0
- devsper/tools/research_advanced/research_topic_mapper.py +69 -0
- devsper/tools/research_advanced/swarm_literature_review.py +58 -0
- devsper/tools/scoring/__init__.py +52 -0
- devsper/tools/scoring/report.py +44 -0
- devsper/tools/scoring/scorer.py +39 -0
- devsper/tools/scoring/selector.py +61 -0
- devsper/tools/scoring/store.py +267 -0
- devsper/tools/selector.py +130 -0
- devsper/tools/system/__init__.py +12 -0
- devsper/tools/system/cpu_usage.py +22 -0
- devsper/tools/system/disk_usage.py +35 -0
- devsper/tools/system/environment_variables.py +29 -0
- devsper/tools/system/memory_usage.py +23 -0
- devsper/tools/system/pip_install.py +44 -0
- devsper/tools/system/pip_search.py +29 -0
- devsper/tools/system/process_list.py +34 -0
- devsper/tools/system/python_package_list.py +40 -0
- devsper/tools/system/run_shell_command.py +51 -0
- devsper/tools/system/system_info.py +26 -0
- devsper/tools/tool_runner.py +122 -0
- devsper/tui/__init__.py +5 -0
- devsper/tui/activity_feed_view.py +73 -0
- devsper/tui/adaptive_tasks_view.py +75 -0
- devsper/tui/agent_role_view.py +35 -0
- devsper/tui/app.py +395 -0
- devsper/tui/dashboard_screen.py +290 -0
- devsper/tui/dev_view.py +99 -0
- devsper/tui/inject_screen.py +73 -0
- devsper/tui/knowledge_graph_view.py +46 -0
- devsper/tui/layout.py +43 -0
- devsper/tui/logs_view.py +83 -0
- devsper/tui/memory_view.py +58 -0
- devsper/tui/performance_view.py +33 -0
- devsper/tui/reasoning_graph_view.py +39 -0
- devsper/tui/results_view.py +139 -0
- devsper/tui/swarm_view.py +37 -0
- devsper/tui/task_detail_screen.py +55 -0
- devsper/tui/task_view.py +103 -0
- devsper/types/event.py +97 -0
- devsper/types/exceptions.py +21 -0
- devsper/types/swarm.py +41 -0
- devsper/types/task.py +80 -0
- devsper/upgrade/__init__.py +21 -0
- devsper/upgrade/changelog.py +124 -0
- devsper/upgrade/cli.py +145 -0
- devsper/upgrade/installer.py +103 -0
- devsper/upgrade/notifier.py +52 -0
- devsper/upgrade/version_check.py +121 -0
- devsper/utils/event_logger.py +88 -0
- devsper/utils/http.py +43 -0
- devsper/utils/models.py +54 -0
- devsper/visualization/__init__.py +5 -0
- devsper/visualization/dag_export.py +67 -0
- devsper/workflow/__init__.py +18 -0
- devsper/workflow/conditions.py +157 -0
- devsper/workflow/context.py +108 -0
- devsper/workflow/loader.py +156 -0
- devsper/workflow/resolver.py +109 -0
- devsper/workflow/runner.py +562 -0
- devsper/workflow/schema.py +63 -0
- devsper/workflow/validator.py +128 -0
- devsper-2.1.6.dist-info/METADATA +346 -0
- devsper-2.1.6.dist-info/RECORD +375 -0
- devsper-2.1.6.dist-info/WHEEL +4 -0
- devsper-2.1.6.dist-info/entry_points.txt +3 -0
- devsper-2.1.6.dist-info/licenses/LICENSE +639 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Research strategy: DAG for literature review (corpus -> topic -> citation -> review)."""
|
|
2
|
+
|
|
3
|
+
import secrets
|
|
4
|
+
|
|
5
|
+
from devsper.types.task import Task
|
|
6
|
+
|
|
7
|
+
from devsper.intelligence.strategies.base import Strategy
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _short_id() -> str:
|
|
11
|
+
return secrets.token_hex(4)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ResearchStrategy(Strategy):
|
|
15
|
+
"""Research pipeline: corpus_builder -> topic_extraction -> citation_graph -> literature_review."""
|
|
16
|
+
|
|
17
|
+
def plan(self, root_task: Task) -> list[Task]:
|
|
18
|
+
steps = [
|
|
19
|
+
("corpus_builder", "Build a corpus of relevant papers and sources for the topic."),
|
|
20
|
+
("topic_extraction", "Extract main topics and themes from the corpus."),
|
|
21
|
+
("citation_graph", "Build citation graph and identify key references."),
|
|
22
|
+
("literature_review", "Write a structured literature review synthesizing findings."),
|
|
23
|
+
]
|
|
24
|
+
task_ids: list[str] = []
|
|
25
|
+
tasks: list[Task] = []
|
|
26
|
+
for i, (step_id, desc) in enumerate(steps):
|
|
27
|
+
tid = _short_id()
|
|
28
|
+
task_ids.append(tid)
|
|
29
|
+
deps = [task_ids[i - 1]] if i > 0 else []
|
|
30
|
+
# Include root context in first step
|
|
31
|
+
description = f"{root_task.description}\n\nStep: {desc}" if i == 0 else desc
|
|
32
|
+
t = Task(id=tid, description=description, dependencies=deps)
|
|
33
|
+
tasks.append(t)
|
|
34
|
+
return tasks
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Strategy selector: choose execution strategy (research, code analysis, data analysis).
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
from devsper.types.task import Task
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExecutionStrategy(str, Enum):
|
|
12
|
+
RESEARCH = "research"
|
|
13
|
+
CODE_ANALYSIS = "code_analysis"
|
|
14
|
+
DATA_ANALYSIS = "data_analysis"
|
|
15
|
+
DOCUMENT = "document"
|
|
16
|
+
EXPERIMENT = "experiment"
|
|
17
|
+
GENERAL = "general"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
RESEARCH_KEYWORDS = [
|
|
21
|
+
"research", "paper", "literature", "survey", "cite", "citation",
|
|
22
|
+
"diffusion", "transformer", "methodology", "findings", "review",
|
|
23
|
+
]
|
|
24
|
+
CODE_KEYWORDS = [
|
|
25
|
+
"code", "codebase", "repository", "refactor", "lint", "test",
|
|
26
|
+
"api", "architecture", "module", "function", "class", "implement",
|
|
27
|
+
]
|
|
28
|
+
DATA_KEYWORDS = [
|
|
29
|
+
"data", "dataset", "csv", "analysis", "experiment", "metric",
|
|
30
|
+
"statistic", "plot", "visualization", "pipeline", "training",
|
|
31
|
+
]
|
|
32
|
+
DOCUMENT_KEYWORDS = [
|
|
33
|
+
"document", "pdf", "docx", "ingest", "extract", "corpus",
|
|
34
|
+
"paper", "report", "write", "summary",
|
|
35
|
+
]
|
|
36
|
+
EXPERIMENT_KEYWORDS = [
|
|
37
|
+
"experiment", "sweep", "parameter", "benchmark", "ablation",
|
|
38
|
+
"compare", "metric", "evaluate", "run",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class StrategySelector:
|
|
43
|
+
"""
|
|
44
|
+
Select execution strategy from a root task description to tune planner/executor behavior.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self) -> None:
|
|
48
|
+
self._keyword_lists = {
|
|
49
|
+
ExecutionStrategy.RESEARCH: RESEARCH_KEYWORDS,
|
|
50
|
+
ExecutionStrategy.CODE_ANALYSIS: CODE_KEYWORDS,
|
|
51
|
+
ExecutionStrategy.DATA_ANALYSIS: DATA_KEYWORDS,
|
|
52
|
+
ExecutionStrategy.DOCUMENT: DOCUMENT_KEYWORDS,
|
|
53
|
+
ExecutionStrategy.EXPERIMENT: EXPERIMENT_KEYWORDS,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
def select(self, task: Task | str) -> ExecutionStrategy:
|
|
57
|
+
"""
|
|
58
|
+
Return the best strategy for the given task (or task description string).
|
|
59
|
+
"""
|
|
60
|
+
text = task.description if isinstance(task, Task) else str(task)
|
|
61
|
+
text = (text or "").lower()
|
|
62
|
+
scores = {s: 0 for s in ExecutionStrategy}
|
|
63
|
+
scores[ExecutionStrategy.GENERAL] = 0
|
|
64
|
+
for strategy, keywords in self._keyword_lists.items():
|
|
65
|
+
for kw in keywords:
|
|
66
|
+
if kw in text:
|
|
67
|
+
scores[strategy] += 1
|
|
68
|
+
candidates = [s for s in ExecutionStrategy if s != ExecutionStrategy.GENERAL]
|
|
69
|
+
best = max(candidates, key=lambda s: scores[s])
|
|
70
|
+
return best if scores[best] > 0 else ExecutionStrategy.GENERAL
|
|
71
|
+
|
|
72
|
+
def suggest_planner_prompt_suffix(self, strategy: ExecutionStrategy) -> str:
|
|
73
|
+
"""Return optional prompt suffix to bias the planner for this strategy."""
|
|
74
|
+
if strategy == ExecutionStrategy.RESEARCH:
|
|
75
|
+
return " Focus on: literature search, paper summaries, citation context, methodology comparison."
|
|
76
|
+
if strategy == ExecutionStrategy.CODE_ANALYSIS:
|
|
77
|
+
return " Focus on: structure, dependencies, tests, refactors, documentation."
|
|
78
|
+
if strategy == ExecutionStrategy.DATA_ANALYSIS:
|
|
79
|
+
return " Focus on: data loading, stats, visualizations, experiments, metrics."
|
|
80
|
+
if strategy == ExecutionStrategy.DOCUMENT:
|
|
81
|
+
return " Focus on: document ingest, extraction, linking, and reporting."
|
|
82
|
+
if strategy == ExecutionStrategy.EXPERIMENT:
|
|
83
|
+
return " Focus on: setup, run, comparison, and experiment reporting."
|
|
84
|
+
return ""
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""
|
|
2
|
+
v1.8: Cross-run synthesis — answer questions by querying across ALL memory (all runs).
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterator
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
from devsper.knowledge.query import query_for_planning, format_planning_context, PlanningContext
|
|
9
|
+
from devsper.memory.memory_index import MemoryIndex
|
|
10
|
+
from devsper.memory.memory_types import MemoryRecord
|
|
11
|
+
from devsper.knowledge.knowledge_graph import KnowledgeGraph
|
|
12
|
+
from devsper.utils.models import generate
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _cosine_sim(a: list[float], b: list[float]) -> float:
|
|
16
|
+
if not a or not b or len(a) != len(b):
|
|
17
|
+
return 0.0
|
|
18
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
19
|
+
na = sum(x * x for x in a) ** 0.5
|
|
20
|
+
nb = sum(x * x for x in b) ** 0.5
|
|
21
|
+
if na == 0 or nb == 0:
|
|
22
|
+
return 0.0
|
|
23
|
+
return dot / (na * nb)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _deduplicate_by_similarity(records: list[MemoryRecord], threshold: float = 0.95) -> list[MemoryRecord]:
|
|
27
|
+
"""Remove records with cosine similarity > threshold to an already-kept record."""
|
|
28
|
+
if not records:
|
|
29
|
+
return []
|
|
30
|
+
out: list[MemoryRecord] = []
|
|
31
|
+
for r in records:
|
|
32
|
+
if r.embedding is None:
|
|
33
|
+
out.append(r)
|
|
34
|
+
continue
|
|
35
|
+
keep = True
|
|
36
|
+
for o in out:
|
|
37
|
+
if o.embedding is None:
|
|
38
|
+
continue
|
|
39
|
+
if _cosine_sim(r.embedding, o.embedding) > threshold:
|
|
40
|
+
keep = False
|
|
41
|
+
break
|
|
42
|
+
if keep:
|
|
43
|
+
out.append(r)
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _short_run_id(run_id: str) -> str:
|
|
48
|
+
"""Short run id for citations (e.g. events_2025-03-09... -> 2025-03-09)."""
|
|
49
|
+
if not run_id:
|
|
50
|
+
return "unknown"
|
|
51
|
+
if "_" in run_id:
|
|
52
|
+
parts = run_id.split("_")
|
|
53
|
+
if len(parts) >= 2:
|
|
54
|
+
return parts[1][:12] if len(parts[1]) > 12 else parts[1]
|
|
55
|
+
return run_id[:12] if len(run_id) > 12 else run_id
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class CrossRunSynthesizer:
|
|
59
|
+
"""
|
|
60
|
+
Answers questions by querying across ALL memory (all runs),
|
|
61
|
+
not just the current session.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
memory_index: MemoryIndex,
|
|
67
|
+
knowledge_graph: KnowledgeGraph | None,
|
|
68
|
+
worker_model: str,
|
|
69
|
+
):
|
|
70
|
+
self.memory_index = memory_index
|
|
71
|
+
self.knowledge_graph = knowledge_graph
|
|
72
|
+
self.worker_model = worker_model
|
|
73
|
+
|
|
74
|
+
def synthesize(
|
|
75
|
+
self,
|
|
76
|
+
query: str,
|
|
77
|
+
max_sources: int = 20,
|
|
78
|
+
stream: bool = True,
|
|
79
|
+
use_kg: bool = True,
|
|
80
|
+
since: datetime | None = None,
|
|
81
|
+
) -> Iterator[str] | str:
|
|
82
|
+
"""
|
|
83
|
+
1. Query memory_index across all runs (no run_id filter): top-20 by similarity
|
|
84
|
+
2. Optionally query knowledge graph: query_for_planning(kg, query)
|
|
85
|
+
3. Deduplicate: remove memory records with cosine similarity > 0.95 to each other
|
|
86
|
+
4. Build synthesis prompt with all sources
|
|
87
|
+
5. Stream LLM response (or return full string if stream=False)
|
|
88
|
+
"""
|
|
89
|
+
memories = self.memory_index.query_across_runs(query, top_k=max_sources)
|
|
90
|
+
if since is not None:
|
|
91
|
+
memories = [m for m in memories if m.timestamp >= since]
|
|
92
|
+
memories = _deduplicate_by_similarity(memories, threshold=0.95)
|
|
93
|
+
memories = memories[:max_sources]
|
|
94
|
+
|
|
95
|
+
kg_ctx: PlanningContext | None = None
|
|
96
|
+
if use_kg and self.knowledge_graph is not None:
|
|
97
|
+
kg_ctx = query_for_planning(self.knowledge_graph, query)
|
|
98
|
+
|
|
99
|
+
prompt = self._build_synthesis_prompt(query, memories, kg_ctx)
|
|
100
|
+
if stream:
|
|
101
|
+
gen = generate(self.worker_model, prompt, stream=True)
|
|
102
|
+
for chunk in gen:
|
|
103
|
+
yield chunk
|
|
104
|
+
else:
|
|
105
|
+
return generate(self.worker_model, prompt, stream=False) or ""
|
|
106
|
+
|
|
107
|
+
def _build_synthesis_prompt(
|
|
108
|
+
self,
|
|
109
|
+
query: str,
|
|
110
|
+
memories: list[MemoryRecord],
|
|
111
|
+
kg_ctx: PlanningContext | None,
|
|
112
|
+
) -> str:
|
|
113
|
+
"""Build system + user prompt for synthesis."""
|
|
114
|
+
kg_block = ""
|
|
115
|
+
if kg_ctx and (kg_ctx.relevant_concepts or kg_ctx.prior_findings or kg_ctx.related_methods):
|
|
116
|
+
kg_block = "Knowledge Graph Facts:\n" + format_planning_context(kg_ctx) + "\n\n"
|
|
117
|
+
unique_runs = list(dict.fromkeys(getattr(m, "run_id", "") or "" for m in memories))
|
|
118
|
+
unique_runs = [r for r in unique_runs if r]
|
|
119
|
+
memory_block = "\n".join(
|
|
120
|
+
f"[run:{_short_run_id(getattr(m, 'run_id', ''))}] {(m.content or '')[:300]}"
|
|
121
|
+
for m in memories
|
|
122
|
+
)
|
|
123
|
+
user = f"""Query: {query}
|
|
124
|
+
|
|
125
|
+
{kg_block}Memory Sources ({len(memories)} records across {len(unique_runs)} runs):
|
|
126
|
+
{memory_block}"""
|
|
127
|
+
system = """You are synthesizing research findings across multiple past sessions.
|
|
128
|
+
Answer the query using ONLY the provided sources.
|
|
129
|
+
Cite sources as [run:RUN_ID_SHORT] inline.
|
|
130
|
+
If sources conflict, note the conflict explicitly.
|
|
131
|
+
Do not speculate beyond the sources."""
|
|
132
|
+
return f"System:\n{system}\n\nUser:\n{user}"
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Task optimizer: merge redundant tasks, detect parallel opportunities, remove unnecessary tasks.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from devsper.types.task import Task
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _normalize_for_sim(s: str) -> str:
|
|
10
|
+
return re.sub(r"\s+", " ", (s or "").lower().strip())
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TaskOptimizer:
|
|
14
|
+
"""
|
|
15
|
+
Optimize a task graph: merge redundant tasks, identify tasks that can run in parallel,
|
|
16
|
+
drop no-op or duplicate steps.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, min_similarity_chars: int = 20) -> None:
|
|
20
|
+
self.min_similarity_chars = min_similarity_chars
|
|
21
|
+
|
|
22
|
+
def optimize(self, tasks: list[Task]) -> list[Task]:
|
|
23
|
+
"""
|
|
24
|
+
Return a new list of tasks with redundancies merged, dependencies updated,
|
|
25
|
+
and optional parallelization hints. Does not mutate input tasks.
|
|
26
|
+
"""
|
|
27
|
+
if not tasks:
|
|
28
|
+
return []
|
|
29
|
+
seen_norm: dict[str, Task] = {}
|
|
30
|
+
for t in tasks:
|
|
31
|
+
norm = _normalize_for_sim(t.description)[:200]
|
|
32
|
+
if norm in seen_norm and len(norm) >= self.min_similarity_chars:
|
|
33
|
+
continue
|
|
34
|
+
seen_norm[norm] = t
|
|
35
|
+
deduped = list(seen_norm.values())
|
|
36
|
+
merged = self._merge_trivial(deduped)
|
|
37
|
+
return self._rebuild_deps(merged)
|
|
38
|
+
|
|
39
|
+
def _merge_trivial(self, tasks: list[Task]) -> list[Task]:
|
|
40
|
+
out: list[Task] = []
|
|
41
|
+
for t in tasks:
|
|
42
|
+
desc = (t.description or "").strip()
|
|
43
|
+
if len(desc) < 15 and out:
|
|
44
|
+
prev = out[-1]
|
|
45
|
+
out[-1] = Task(
|
|
46
|
+
id=prev.id,
|
|
47
|
+
description=prev.description + "; " + desc,
|
|
48
|
+
dependencies=prev.dependencies,
|
|
49
|
+
)
|
|
50
|
+
else:
|
|
51
|
+
out.append(t)
|
|
52
|
+
return out
|
|
53
|
+
|
|
54
|
+
def _rebuild_deps(self, tasks: list[Task]) -> list[Task]:
|
|
55
|
+
"""Rebuild task list with sequential ids and deps (task_1 -> task_2 -> ...)."""
|
|
56
|
+
id_map = {t.id: i for i, t in enumerate(tasks, start=1)}
|
|
57
|
+
result: list[Task] = []
|
|
58
|
+
for i, t in enumerate(tasks, start=1):
|
|
59
|
+
new_id = f"task_{i}"
|
|
60
|
+
deps = [f"task_{id_map[d]}" for d in t.dependencies if d in id_map]
|
|
61
|
+
if not deps and i > 1:
|
|
62
|
+
deps = [f"task_{i - 1}"]
|
|
63
|
+
result.append(
|
|
64
|
+
Task(id=new_id, description=t.description, dependencies=deps)
|
|
65
|
+
)
|
|
66
|
+
return result
|
|
67
|
+
|
|
68
|
+
def detect_parallel_opportunities(self, tasks: list[Task]) -> list[list[str]]:
|
|
69
|
+
"""
|
|
70
|
+
Return groups of task ids that could run in parallel (same dependencies).
|
|
71
|
+
Each group is a list of task ids that all depend on the same set and could be run together.
|
|
72
|
+
"""
|
|
73
|
+
by_dep_key: dict[tuple, list[str]] = {}
|
|
74
|
+
for t in tasks:
|
|
75
|
+
key = tuple(sorted(t.dependencies))
|
|
76
|
+
by_dep_key.setdefault(key, []).append(t.id)
|
|
77
|
+
return [ids for ids in by_dep_key.values() if len(ids) > 1]
|
|
78
|
+
|
|
79
|
+
def remove_unnecessary(self, tasks: list[Task], predicate=None) -> list[Task]:
|
|
80
|
+
"""
|
|
81
|
+
Remove tasks for which predicate(task) is True (e.g. no-op descriptions).
|
|
82
|
+
predicate defaults to: description too short or placeholder-like.
|
|
83
|
+
"""
|
|
84
|
+
if predicate is None:
|
|
85
|
+
def predicate(t: Task) -> bool:
|
|
86
|
+
d = (t.description or "").strip().lower()
|
|
87
|
+
if len(d) < 5:
|
|
88
|
+
return True
|
|
89
|
+
if d in ("n/a", "none", "todo", "tbd", "-", "..."):
|
|
90
|
+
return True
|
|
91
|
+
return False
|
|
92
|
+
return [t for t in tasks if not predicate(t)]
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""
|
|
2
|
+
v1.8: Post-run knowledge extraction from task results into the knowledge graph.
|
|
3
|
+
Heuristic extraction (no LLM); fast, non-blocking.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
|
|
11
|
+
from devsper.knowledge.knowledge_graph import (
|
|
12
|
+
KnowledgeGraph,
|
|
13
|
+
NODE_CONCEPT,
|
|
14
|
+
NODE_DATASET,
|
|
15
|
+
NODE_DOCUMENT,
|
|
16
|
+
NODE_METHOD,
|
|
17
|
+
EDGE_USES,
|
|
18
|
+
EDGE_EXTENDS,
|
|
19
|
+
EDGE_OUTPERFORMS,
|
|
20
|
+
EDGE_CITES,
|
|
21
|
+
)
|
|
22
|
+
from devsper.types.task import Task, TaskStatus
|
|
23
|
+
from devsper.types.event import Event, events
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class KGNode:
|
|
28
|
+
"""Lightweight node for extractor output."""
|
|
29
|
+
id: str
|
|
30
|
+
kind: str
|
|
31
|
+
label: str
|
|
32
|
+
confidence: float
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class KGEdge:
|
|
37
|
+
"""Lightweight edge for extractor output."""
|
|
38
|
+
from_id: str
|
|
39
|
+
to_id: str
|
|
40
|
+
edge_type: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Patterns for documents: explicit citations
|
|
44
|
+
_DOC_PATTERN = re.compile(
|
|
45
|
+
r"(?:according to|paper:|article:|from)\s*[:\s]*([^\n.]+?)(?:\.|$)|(https?://[^\s]+)",
|
|
46
|
+
re.IGNORECASE,
|
|
47
|
+
)
|
|
48
|
+
# Capitalized multi-word (2-3 words)
|
|
49
|
+
_CONCEPT_PATTERN = re.compile(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z0-9]+){1,2})\b")
|
|
50
|
+
# Technical: digits+letters, camelCase
|
|
51
|
+
_TECH_PATTERN = re.compile(r"\b([a-z]+[A-Z][a-zA-Z0-9]*|[A-Z][a-z]+[A-Z][a-zA-Z0-9]*)\b")
|
|
52
|
+
# Dataset/corpus/benchmark near a proper noun
|
|
53
|
+
_DATASET_PATTERN = re.compile(
|
|
54
|
+
r"\b(dataset|corpus|benchmark)\s+[\[']?([A-Za-z0-9\-_]+)[\]']?|\b([A-Z][A-Za-z0-9\-]+(?:-\d+)?)\s+(?:dataset|corpus|benchmark)\b",
|
|
55
|
+
re.IGNORECASE,
|
|
56
|
+
)
|
|
57
|
+
# "using X", "via X", "with X" method
|
|
58
|
+
_METHOD_PATTERN = re.compile(
|
|
59
|
+
r"\b(?:using|via|with)\s+([A-Z][A-Za-z0-9\s]+?)(?:\s+to|\s+for|,|\.|$)",
|
|
60
|
+
re.IGNORECASE,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Relationship patterns (entity co-occurrence in same sentence)
|
|
64
|
+
_REL_USES = re.compile(r"(\w+(?:\s+\w+)?)\s+(?:uses?|leverages?)\s+(\w+(?:\s+\w+)?)", re.IGNORECASE)
|
|
65
|
+
_REL_EXTENDS = re.compile(r"(\w+(?:\s+\w+)?)\s+(?:is\s+based\s+on|extends?)\s+(\w+(?:\s+\w+)?)", re.IGNORECASE)
|
|
66
|
+
_REL_OUTPERFORMS = re.compile(r"(\w+(?:\s+\w+)?)\s+(?:outperforms?|is\s+better\s+than)\s+(\w+(?:\s+\w+)?)", re.IGNORECASE)
|
|
67
|
+
_REL_CITES = re.compile(r"(\w+(?:\s+\w+)?)\s+(?:cites?|references?)\s+(\w+(?:\s+\w+)?)", re.IGNORECASE)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _normalize_id(kind: str, label: str) -> str:
|
|
71
|
+
label_clean = re.sub(r"\s+", " ", label.strip())
|
|
72
|
+
return f"{kind}:{label_clean}"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class KnowledgeExtractor:
|
|
76
|
+
"""
|
|
77
|
+
Post-run background pass: extract entities, facts, and relationships
|
|
78
|
+
from task results and add to knowledge graph.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(self, min_confidence: float = 0.60):
|
|
82
|
+
self.min_confidence = min_confidence
|
|
83
|
+
|
|
84
|
+
async def extract_from_run(
|
|
85
|
+
self,
|
|
86
|
+
run_id: str,
|
|
87
|
+
tasks: list[Task],
|
|
88
|
+
kg: KnowledgeGraph,
|
|
89
|
+
event_log=None,
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Called after SWARM_FINISHED. Run in background, non-blocking."""
|
|
92
|
+
start = time.perf_counter()
|
|
93
|
+
nodes_added = 0
|
|
94
|
+
edges_added = 0
|
|
95
|
+
for task in tasks:
|
|
96
|
+
if task.status != TaskStatus.COMPLETED or not task.result:
|
|
97
|
+
continue
|
|
98
|
+
entities = self._extract_entities(task.result)
|
|
99
|
+
for e in entities:
|
|
100
|
+
if e.confidence >= self.min_confidence:
|
|
101
|
+
kg.add_or_update_node(e.id, e.kind, e.label, confidence=e.confidence)
|
|
102
|
+
nodes_added += 1
|
|
103
|
+
relationships = self._extract_relationships(task.result, entities)
|
|
104
|
+
for rel in relationships:
|
|
105
|
+
kg.add_edge(rel.from_id, rel.to_id, rel.edge_type)
|
|
106
|
+
edges_added += 1
|
|
107
|
+
kg.save()
|
|
108
|
+
duration = time.perf_counter() - start
|
|
109
|
+
if event_log is not None:
|
|
110
|
+
event_log.append_event(
|
|
111
|
+
Event(
|
|
112
|
+
timestamp=datetime.now(timezone.utc),
|
|
113
|
+
type=events.KNOWLEDGE_EXTRACTED,
|
|
114
|
+
payload={
|
|
115
|
+
"run_id": run_id,
|
|
116
|
+
"nodes_added": nodes_added,
|
|
117
|
+
"edges_added": edges_added,
|
|
118
|
+
"duration_seconds": round(duration, 2),
|
|
119
|
+
},
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
def _extract_entities(self, text: str) -> list[KGNode]:
|
|
125
|
+
"""Heuristic extraction. Confidence: URL/explicit 0.95, repeated 3+ 0.80, single 0.60."""
|
|
126
|
+
if not text or not text.strip():
|
|
127
|
+
return []
|
|
128
|
+
nodes: list[KGNode] = []
|
|
129
|
+
seen: dict[str, float] = {}
|
|
130
|
+
|
|
131
|
+
# Documents: URLs, "According to", "paper:", "article:"
|
|
132
|
+
for m in _DOC_PATTERN.finditer(text):
|
|
133
|
+
g1, g2 = m.group(1), m.group(2)
|
|
134
|
+
raw = (g1 or g2 or "").strip()
|
|
135
|
+
if not raw:
|
|
136
|
+
continue
|
|
137
|
+
if raw.startswith("http"):
|
|
138
|
+
node_id = _normalize_id(NODE_DOCUMENT, raw[:80])
|
|
139
|
+
nodes.append(KGNode(id=node_id, kind=NODE_DOCUMENT, label=raw[:200], confidence=0.95))
|
|
140
|
+
else:
|
|
141
|
+
node_id = _normalize_id(NODE_DOCUMENT, raw[:100])
|
|
142
|
+
nodes.append(KGNode(id=node_id, kind=NODE_DOCUMENT, label=raw[:200], confidence=0.95))
|
|
143
|
+
|
|
144
|
+
# Concepts: capitalized phrases, technical terms
|
|
145
|
+
sentences = re.split(r"[.!?]\s+", text)
|
|
146
|
+
concept_mentions: dict[str, int] = {}
|
|
147
|
+
for sent in sentences:
|
|
148
|
+
for m in _CONCEPT_PATTERN.finditer(sent):
|
|
149
|
+
c = m.group(1).strip()
|
|
150
|
+
if len(c) < 3 or c.lower() in ("the", "this", "that"):
|
|
151
|
+
continue
|
|
152
|
+
concept_mentions[c] = concept_mentions.get(c, 0) + 1
|
|
153
|
+
for m in _TECH_PATTERN.finditer(sent):
|
|
154
|
+
c = m.group(1).strip()
|
|
155
|
+
if len(c) < 2:
|
|
156
|
+
continue
|
|
157
|
+
concept_mentions[c] = concept_mentions.get(c, 0) + 1
|
|
158
|
+
for label, count in concept_mentions.items():
|
|
159
|
+
conf = 0.80 if count >= 3 else 0.60
|
|
160
|
+
node_id = _normalize_id(NODE_CONCEPT, label)
|
|
161
|
+
if node_id not in seen or seen[node_id] < conf:
|
|
162
|
+
seen[node_id] = conf
|
|
163
|
+
nodes.append(KGNode(id=node_id, kind=NODE_CONCEPT, label=label, confidence=conf))
|
|
164
|
+
|
|
165
|
+
# Datasets
|
|
166
|
+
for m in _DATASET_PATTERN.finditer(text):
|
|
167
|
+
name = (m.group(2) or m.group(3) or "").strip()
|
|
168
|
+
if name and len(name) >= 2:
|
|
169
|
+
node_id = _normalize_id(NODE_DATASET, name)
|
|
170
|
+
nodes.append(KGNode(id=node_id, kind=NODE_DATASET, label=name, confidence=0.75))
|
|
171
|
+
|
|
172
|
+
# Methods: "using X", "via X"
|
|
173
|
+
for m in _METHOD_PATTERN.finditer(text):
|
|
174
|
+
name = m.group(1).strip()
|
|
175
|
+
if len(name) >= 2:
|
|
176
|
+
node_id = _normalize_id(NODE_METHOD, name)
|
|
177
|
+
nodes.append(KGNode(id=node_id, kind=NODE_METHOD, label=name, confidence=0.70))
|
|
178
|
+
|
|
179
|
+
return nodes
|
|
180
|
+
|
|
181
|
+
def _extract_relationships(
|
|
182
|
+
self,
|
|
183
|
+
text: str,
|
|
184
|
+
entities: list[KGNode],
|
|
185
|
+
) -> list[KGEdge]:
|
|
186
|
+
"""Typed edges between detected entities; pattern match on co-occurrence."""
|
|
187
|
+
edges: list[KGEdge] = []
|
|
188
|
+
entity_labels = {e.label.lower(): e for e in entities}
|
|
189
|
+
entity_labels.update({e.id.split(":", 1)[-1].lower(): e for e in entities})
|
|
190
|
+
|
|
191
|
+
def try_edge(pat: re.Pattern, edge_type: str) -> None:
|
|
192
|
+
for m in pat.finditer(text):
|
|
193
|
+
a, b = m.group(1).strip(), m.group(2).strip()
|
|
194
|
+
a_lower, b_lower = a.lower(), b.lower()
|
|
195
|
+
node_a = entity_labels.get(a_lower) or entity_labels.get(a)
|
|
196
|
+
node_b = entity_labels.get(b_lower) or entity_labels.get(b)
|
|
197
|
+
if node_a and node_b and node_a.id != node_b.id:
|
|
198
|
+
edges.append(KGEdge(from_id=node_a.id, to_id=node_b.id, edge_type=edge_type))
|
|
199
|
+
|
|
200
|
+
try_edge(_REL_USES, EDGE_USES)
|
|
201
|
+
try_edge(_REL_EXTENDS, EDGE_EXTENDS)
|
|
202
|
+
try_edge(_REL_OUTPERFORMS, EDGE_OUTPERFORMS)
|
|
203
|
+
try_edge(_REL_CITES, EDGE_CITES)
|
|
204
|
+
return edges
|