devsper 2.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devsper/__init__.py +14 -0
- devsper/agents/a2a/__init__.py +27 -0
- devsper/agents/a2a/client.py +126 -0
- devsper/agents/a2a/discovery.py +24 -0
- devsper/agents/a2a/server.py +128 -0
- devsper/agents/a2a/tool_adapter.py +68 -0
- devsper/agents/a2a/types.py +49 -0
- devsper/agents/agent.py +602 -0
- devsper/agents/critic.py +80 -0
- devsper/agents/message_bus.py +124 -0
- devsper/agents/roles.py +181 -0
- devsper/agents/run_agent.py +78 -0
- devsper/analytics/__init__.py +5 -0
- devsper/analytics/tool_analytics.py +78 -0
- devsper/audit/__init__.py +5 -0
- devsper/audit/logger.py +214 -0
- devsper/bus/__init__.py +29 -0
- devsper/bus/backends/__init__.py +5 -0
- devsper/bus/backends/base.py +38 -0
- devsper/bus/backends/memory.py +55 -0
- devsper/bus/backends/redis.py +146 -0
- devsper/bus/message.py +56 -0
- devsper/bus/schema_version.py +3 -0
- devsper/bus/topics.py +19 -0
- devsper/cache/__init__.py +6 -0
- devsper/cache/embedding_index.py +98 -0
- devsper/cache/hashing.py +24 -0
- devsper/cache/store.py +153 -0
- devsper/cache/task_cache.py +191 -0
- devsper/cli/__init__.py +6 -0
- devsper/cli/commands/reg.py +733 -0
- devsper/cli/github_oauth.py +157 -0
- devsper/cli/init.py +637 -0
- devsper/cli/main.py +2956 -0
- devsper/cli/run_progress.py +103 -0
- devsper/cli/ui/__init__.py +65 -0
- devsper/cli/ui/components.py +94 -0
- devsper/cli/ui/errors.py +104 -0
- devsper/cli/ui/logging.py +120 -0
- devsper/cli/ui/onboarding.py +102 -0
- devsper/cli/ui/progress.py +43 -0
- devsper/cli/ui/run_view.py +308 -0
- devsper/cli/ui/theme.py +40 -0
- devsper/cluster/__init__.py +29 -0
- devsper/cluster/election.py +84 -0
- devsper/cluster/local.py +97 -0
- devsper/cluster/node_info.py +77 -0
- devsper/cluster/registry.py +71 -0
- devsper/cluster/router.py +117 -0
- devsper/cluster/state_backend.py +105 -0
- devsper/compliance/__init__.py +5 -0
- devsper/compliance/pii.py +147 -0
- devsper/config/__init__.py +52 -0
- devsper/config/config_loader.py +121 -0
- devsper/config/defaults.py +77 -0
- devsper/config/resolver.py +342 -0
- devsper/config/schema.py +237 -0
- devsper/credentials/__init__.py +19 -0
- devsper/credentials/cli.py +197 -0
- devsper/credentials/migration.py +124 -0
- devsper/credentials/store.py +142 -0
- devsper/dashboard/__init__.py +9 -0
- devsper/dashboard/dashboard.py +87 -0
- devsper/dev/__init__.py +25 -0
- devsper/dev/builder.py +195 -0
- devsper/dev/debugger.py +95 -0
- devsper/dev/repo_index.py +138 -0
- devsper/dev/sandbox.py +203 -0
- devsper/dev/scaffold.py +122 -0
- devsper/embeddings/__init__.py +5 -0
- devsper/embeddings/service.py +36 -0
- devsper/explainability/__init__.py +14 -0
- devsper/explainability/decision_tree.py +104 -0
- devsper/explainability/rationale.py +38 -0
- devsper/explainability/simulation.py +56 -0
- devsper/hitl/__init__.py +13 -0
- devsper/hitl/approval.py +160 -0
- devsper/hitl/escalation.py +95 -0
- devsper/intelligence/__init__.py +9 -0
- devsper/intelligence/adaptation.py +88 -0
- devsper/intelligence/analysis/__init__.py +19 -0
- devsper/intelligence/analysis/analyzer.py +71 -0
- devsper/intelligence/analysis/cost_estimator.py +66 -0
- devsper/intelligence/analysis/formatter.py +103 -0
- devsper/intelligence/analysis/run_report.py +402 -0
- devsper/intelligence/learning_engine.py +92 -0
- devsper/intelligence/strategies/__init__.py +23 -0
- devsper/intelligence/strategies/base.py +14 -0
- devsper/intelligence/strategies/code_analysis_strategy.py +33 -0
- devsper/intelligence/strategies/data_science_strategy.py +33 -0
- devsper/intelligence/strategies/document_pipeline_strategy.py +33 -0
- devsper/intelligence/strategies/experiment_strategy.py +33 -0
- devsper/intelligence/strategies/research_strategy.py +34 -0
- devsper/intelligence/strategy_selector.py +84 -0
- devsper/intelligence/synthesis.py +132 -0
- devsper/intelligence/task_optimizer.py +92 -0
- devsper/knowledge/__init__.py +5 -0
- devsper/knowledge/extractor.py +204 -0
- devsper/knowledge/knowledge_graph.py +184 -0
- devsper/knowledge/query.py +285 -0
- devsper/memory/__init__.py +35 -0
- devsper/memory/consolidation.py +138 -0
- devsper/memory/embeddings.py +60 -0
- devsper/memory/memory_index.py +97 -0
- devsper/memory/memory_router.py +62 -0
- devsper/memory/memory_store.py +221 -0
- devsper/memory/memory_types.py +54 -0
- devsper/memory/namespaces.py +45 -0
- devsper/memory/scoring.py +77 -0
- devsper/memory/summarizer.py +52 -0
- devsper/nodes/__init__.py +5 -0
- devsper/nodes/controller.py +449 -0
- devsper/nodes/rpc.py +127 -0
- devsper/nodes/single.py +161 -0
- devsper/nodes/worker.py +506 -0
- devsper/orchestration/__init__.py +19 -0
- devsper/orchestration/meta_planner.py +239 -0
- devsper/orchestration/priority_queue.py +61 -0
- devsper/plugins/__init__.py +19 -0
- devsper/plugins/marketplace/__init__.py +0 -0
- devsper/plugins/plugin_loader.py +70 -0
- devsper/plugins/plugin_registry.py +34 -0
- devsper/plugins/registry.py +83 -0
- devsper/protocols/__init__.py +6 -0
- devsper/providers/__init__.py +17 -0
- devsper/providers/anthropic.py +84 -0
- devsper/providers/base.py +75 -0
- devsper/providers/complexity_router.py +94 -0
- devsper/providers/gemini.py +36 -0
- devsper/providers/github.py +180 -0
- devsper/providers/model_router.py +40 -0
- devsper/providers/openai.py +105 -0
- devsper/providers/router/__init__.py +21 -0
- devsper/providers/router/backends/__init__.py +19 -0
- devsper/providers/router/backends/anthropic_backend.py +111 -0
- devsper/providers/router/backends/custom_backend.py +138 -0
- devsper/providers/router/backends/gemini_backend.py +89 -0
- devsper/providers/router/backends/github_backend.py +165 -0
- devsper/providers/router/backends/ollama_backend.py +104 -0
- devsper/providers/router/backends/openai_backend.py +142 -0
- devsper/providers/router/backends/vllm_backend.py +35 -0
- devsper/providers/router/base.py +60 -0
- devsper/providers/router/factory.py +92 -0
- devsper/providers/router/legacy.py +101 -0
- devsper/providers/router/router.py +135 -0
- devsper/reasoning/__init__.py +12 -0
- devsper/reasoning/graph.py +59 -0
- devsper/reasoning/nodes.py +20 -0
- devsper/reasoning/store.py +67 -0
- devsper/runtime/__init__.py +12 -0
- devsper/runtime/health.py +88 -0
- devsper/runtime/replay.py +53 -0
- devsper/runtime/replay_engine.py +142 -0
- devsper/runtime/run_history.py +204 -0
- devsper/runtime/telemetry.py +116 -0
- devsper/runtime/visualize.py +58 -0
- devsper/sandbox/__init__.py +13 -0
- devsper/sandbox/sandbox.py +161 -0
- devsper/swarm/checkpointer.py +65 -0
- devsper/swarm/executor.py +558 -0
- devsper/swarm/map_reduce.py +44 -0
- devsper/swarm/planner.py +197 -0
- devsper/swarm/prefetcher.py +91 -0
- devsper/swarm/scheduler.py +153 -0
- devsper/swarm/speculation.py +47 -0
- devsper/swarm/swarm.py +562 -0
- devsper/tools/__init__.py +33 -0
- devsper/tools/base.py +29 -0
- devsper/tools/code_intelligence/__init__.py +13 -0
- devsper/tools/code_intelligence/api_surface_extractor.py +73 -0
- devsper/tools/code_intelligence/architecture_analyzer.py +65 -0
- devsper/tools/code_intelligence/codebase_indexer.py +71 -0
- devsper/tools/code_intelligence/dependency_graph_builder.py +67 -0
- devsper/tools/code_intelligence/design_pattern_detector.py +62 -0
- devsper/tools/code_intelligence/large_function_detector.py +68 -0
- devsper/tools/code_intelligence/module_responsibility_mapper.py +56 -0
- devsper/tools/code_intelligence/parallel_codebase_analysis.py +44 -0
- devsper/tools/code_intelligence/refactor_candidate_detector.py +81 -0
- devsper/tools/code_intelligence/repository_semantic_index.py +61 -0
- devsper/tools/code_intelligence/test_coverage_estimator.py +62 -0
- devsper/tools/coding/__init__.py +12 -0
- devsper/tools/coding/analyze_code_complexity.py +48 -0
- devsper/tools/coding/dependency_analyzer.py +42 -0
- devsper/tools/coding/extract_functions.py +38 -0
- devsper/tools/coding/format_python.py +50 -0
- devsper/tools/coding/generate_docstrings.py +40 -0
- devsper/tools/coding/generate_unit_tests.py +42 -0
- devsper/tools/coding/lint_python.py +51 -0
- devsper/tools/coding/refactor_function.py +41 -0
- devsper/tools/coding/repo_structure_map.py +54 -0
- devsper/tools/coding/run_python.py +53 -0
- devsper/tools/data/__init__.py +12 -0
- devsper/tools/data/column_type_detection.py +64 -0
- devsper/tools/data/csv_summary.py +52 -0
- devsper/tools/data/dataframe_filter.py +51 -0
- devsper/tools/data/dataframe_groupby.py +47 -0
- devsper/tools/data/dataframe_stats.py +38 -0
- devsper/tools/data/dataset_sampling.py +55 -0
- devsper/tools/data/dataset_schema.py +45 -0
- devsper/tools/data/json_pretty_print.py +37 -0
- devsper/tools/data/json_query.py +46 -0
- devsper/tools/data/missing_value_report.py +47 -0
- devsper/tools/data_science/__init__.py +13 -0
- devsper/tools/data_science/correlation_heatmap.py +72 -0
- devsper/tools/data_science/dataset_bias_detector.py +49 -0
- devsper/tools/data_science/dataset_distribution_report.py +64 -0
- devsper/tools/data_science/dataset_drift_detector.py +64 -0
- devsper/tools/data_science/dataset_outlier_detector.py +65 -0
- devsper/tools/data_science/dataset_profile.py +76 -0
- devsper/tools/data_science/distributed_dataset_processor.py +54 -0
- devsper/tools/data_science/feature_engineering_suggestions.py +69 -0
- devsper/tools/data_science/feature_importance_estimator.py +82 -0
- devsper/tools/data_science/model_input_validator.py +59 -0
- devsper/tools/data_science/time_series_analyzer.py +57 -0
- devsper/tools/documents/__init__.py +11 -0
- devsper/tools/documents/_docproc.py +56 -0
- devsper/tools/documents/document_to_markdown.py +29 -0
- devsper/tools/documents/extract_document_images.py +39 -0
- devsper/tools/documents/extract_document_text.py +29 -0
- devsper/tools/documents/extract_equations.py +36 -0
- devsper/tools/documents/extract_tables.py +47 -0
- devsper/tools/documents/summarize_document.py +42 -0
- devsper/tools/documents/write_latex_document.py +133 -0
- devsper/tools/documents/write_markdown_document.py +89 -0
- devsper/tools/documents/write_word_document.py +149 -0
- devsper/tools/experiments/__init__.py +13 -0
- devsper/tools/experiments/bootstrap_estimator.py +54 -0
- devsper/tools/experiments/experiment_report_generator.py +50 -0
- devsper/tools/experiments/experiment_tracker.py +36 -0
- devsper/tools/experiments/grid_search_runner.py +50 -0
- devsper/tools/experiments/model_benchmark_runner.py +45 -0
- devsper/tools/experiments/monte_carlo_experiment.py +38 -0
- devsper/tools/experiments/parameter_sweep_runner.py +51 -0
- devsper/tools/experiments/result_comparator.py +58 -0
- devsper/tools/experiments/simulation_runner.py +43 -0
- devsper/tools/experiments/statistical_significance_test.py +56 -0
- devsper/tools/experiments/swarm_map_reduce.py +42 -0
- devsper/tools/filesystem/__init__.py +12 -0
- devsper/tools/filesystem/append_file.py +42 -0
- devsper/tools/filesystem/file_hash.py +40 -0
- devsper/tools/filesystem/file_line_count.py +36 -0
- devsper/tools/filesystem/file_metadata.py +38 -0
- devsper/tools/filesystem/file_preview.py +55 -0
- devsper/tools/filesystem/find_large_files.py +50 -0
- devsper/tools/filesystem/list_directory.py +39 -0
- devsper/tools/filesystem/read_file.py +35 -0
- devsper/tools/filesystem/search_files.py +60 -0
- devsper/tools/filesystem/write_file.py +41 -0
- devsper/tools/flagship/__init__.py +15 -0
- devsper/tools/flagship/distributed_document_analysis.py +77 -0
- devsper/tools/flagship/docproc_corpus_pipeline.py +91 -0
- devsper/tools/flagship/repository_semantic_map.py +99 -0
- devsper/tools/flagship/research_graph_builder.py +111 -0
- devsper/tools/flagship/swarm_experiment_runner.py +86 -0
- devsper/tools/knowledge/__init__.py +10 -0
- devsper/tools/knowledge/citation_graph_builder.py +69 -0
- devsper/tools/knowledge/concept_frequency_analyzer.py +74 -0
- devsper/tools/knowledge/corpus_builder.py +66 -0
- devsper/tools/knowledge/cross_document_entity_linker.py +71 -0
- devsper/tools/knowledge/document_corpus_summary.py +68 -0
- devsper/tools/knowledge/document_topic_extractor.py +58 -0
- devsper/tools/knowledge/knowledge_graph_extractor.py +58 -0
- devsper/tools/knowledge/timeline_extractor.py +59 -0
- devsper/tools/math/__init__.py +12 -0
- devsper/tools/math/calculate_expression.py +52 -0
- devsper/tools/math/correlation.py +44 -0
- devsper/tools/math/distribution_summary.py +39 -0
- devsper/tools/math/histogram.py +53 -0
- devsper/tools/math/linear_regression.py +47 -0
- devsper/tools/math/matrix_multiply.py +38 -0
- devsper/tools/math/mean_std.py +35 -0
- devsper/tools/math/monte_carlo_simulation.py +43 -0
- devsper/tools/math/polynomial_fit.py +40 -0
- devsper/tools/math/random_sample.py +36 -0
- devsper/tools/mcp/__init__.py +23 -0
- devsper/tools/mcp/adapter.py +53 -0
- devsper/tools/mcp/client.py +235 -0
- devsper/tools/mcp/discovery.py +53 -0
- devsper/tools/memory/__init__.py +16 -0
- devsper/tools/memory/delete_memory.py +25 -0
- devsper/tools/memory/list_memory.py +34 -0
- devsper/tools/memory/search_memory.py +36 -0
- devsper/tools/memory/store_memory.py +47 -0
- devsper/tools/memory/summarize_memory.py +41 -0
- devsper/tools/memory/tag_memory.py +47 -0
- devsper/tools/pipelines.py +92 -0
- devsper/tools/registry.py +39 -0
- devsper/tools/research/__init__.py +12 -0
- devsper/tools/research/arxiv_download.py +55 -0
- devsper/tools/research/arxiv_search.py +58 -0
- devsper/tools/research/citation_extractor.py +35 -0
- devsper/tools/research/duckduckgo_search.py +42 -0
- devsper/tools/research/paper_metadata_extractor.py +45 -0
- devsper/tools/research/paper_summarizer.py +41 -0
- devsper/tools/research/research_question_generator.py +39 -0
- devsper/tools/research/topic_cluster.py +46 -0
- devsper/tools/research/web_search.py +47 -0
- devsper/tools/research/wikipedia_lookup.py +50 -0
- devsper/tools/research_advanced/__init__.py +14 -0
- devsper/tools/research_advanced/citation_context_extractor.py +60 -0
- devsper/tools/research_advanced/literature_review_generator.py +79 -0
- devsper/tools/research_advanced/methodology_extractor.py +58 -0
- devsper/tools/research_advanced/paper_contribution_extractor.py +50 -0
- devsper/tools/research_advanced/paper_dataset_identifier.py +49 -0
- devsper/tools/research_advanced/paper_method_comparator.py +62 -0
- devsper/tools/research_advanced/paper_similarity_search.py +69 -0
- devsper/tools/research_advanced/paper_trend_analyzer.py +69 -0
- devsper/tools/research_advanced/parallel_document_analyzer.py +56 -0
- devsper/tools/research_advanced/research_gap_finder.py +71 -0
- devsper/tools/research_advanced/research_topic_mapper.py +69 -0
- devsper/tools/research_advanced/swarm_literature_review.py +58 -0
- devsper/tools/scoring/__init__.py +52 -0
- devsper/tools/scoring/report.py +44 -0
- devsper/tools/scoring/scorer.py +39 -0
- devsper/tools/scoring/selector.py +61 -0
- devsper/tools/scoring/store.py +267 -0
- devsper/tools/selector.py +130 -0
- devsper/tools/system/__init__.py +12 -0
- devsper/tools/system/cpu_usage.py +22 -0
- devsper/tools/system/disk_usage.py +35 -0
- devsper/tools/system/environment_variables.py +29 -0
- devsper/tools/system/memory_usage.py +23 -0
- devsper/tools/system/pip_install.py +44 -0
- devsper/tools/system/pip_search.py +29 -0
- devsper/tools/system/process_list.py +34 -0
- devsper/tools/system/python_package_list.py +40 -0
- devsper/tools/system/run_shell_command.py +51 -0
- devsper/tools/system/system_info.py +26 -0
- devsper/tools/tool_runner.py +122 -0
- devsper/tui/__init__.py +5 -0
- devsper/tui/activity_feed_view.py +73 -0
- devsper/tui/adaptive_tasks_view.py +75 -0
- devsper/tui/agent_role_view.py +35 -0
- devsper/tui/app.py +395 -0
- devsper/tui/dashboard_screen.py +290 -0
- devsper/tui/dev_view.py +99 -0
- devsper/tui/inject_screen.py +73 -0
- devsper/tui/knowledge_graph_view.py +46 -0
- devsper/tui/layout.py +43 -0
- devsper/tui/logs_view.py +83 -0
- devsper/tui/memory_view.py +58 -0
- devsper/tui/performance_view.py +33 -0
- devsper/tui/reasoning_graph_view.py +39 -0
- devsper/tui/results_view.py +139 -0
- devsper/tui/swarm_view.py +37 -0
- devsper/tui/task_detail_screen.py +55 -0
- devsper/tui/task_view.py +103 -0
- devsper/types/event.py +97 -0
- devsper/types/exceptions.py +21 -0
- devsper/types/swarm.py +41 -0
- devsper/types/task.py +80 -0
- devsper/upgrade/__init__.py +21 -0
- devsper/upgrade/changelog.py +124 -0
- devsper/upgrade/cli.py +145 -0
- devsper/upgrade/installer.py +103 -0
- devsper/upgrade/notifier.py +52 -0
- devsper/upgrade/version_check.py +121 -0
- devsper/utils/event_logger.py +88 -0
- devsper/utils/http.py +43 -0
- devsper/utils/models.py +54 -0
- devsper/visualization/__init__.py +5 -0
- devsper/visualization/dag_export.py +67 -0
- devsper/workflow/__init__.py +18 -0
- devsper/workflow/conditions.py +157 -0
- devsper/workflow/context.py +108 -0
- devsper/workflow/loader.py +156 -0
- devsper/workflow/resolver.py +109 -0
- devsper/workflow/runner.py +562 -0
- devsper/workflow/schema.py +63 -0
- devsper/workflow/validator.py +128 -0
- devsper-2.1.6.dist-info/METADATA +346 -0
- devsper-2.1.6.dist-info/RECORD +375 -0
- devsper-2.1.6.dist-info/WHEEL +4 -0
- devsper-2.1.6.dist-info/entry_points.txt +3 -0
- devsper-2.1.6.dist-info/licenses/LICENSE +639 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge graph: build relationships between stored memory.
|
|
3
|
+
|
|
4
|
+
Nodes: documents, concepts, datasets, methods.
|
|
5
|
+
Edges: mentions, cites, related_to, uses, extends, outperforms, constrains, blocks.
|
|
6
|
+
Uses networkx. v1.8: add_or_update_node, add_edge, save/load for extraction.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import networkx as nx
|
|
15
|
+
|
|
16
|
+
from devsper.memory.memory_store import MemoryStore, get_default_store
|
|
17
|
+
from devsper.memory.memory_types import MemoryRecord, MemoryType
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
NODE_DOCUMENT = "document"
|
|
21
|
+
NODE_CONCEPT = "concept"
|
|
22
|
+
NODE_DATASET = "dataset"
|
|
23
|
+
NODE_METHOD = "method"
|
|
24
|
+
|
|
25
|
+
EDGE_MENTIONS = "mentions"
|
|
26
|
+
EDGE_CITES = "cites"
|
|
27
|
+
EDGE_RELATED_TO = "related_to"
|
|
28
|
+
EDGE_USES = "uses"
|
|
29
|
+
EDGE_EXTENDS = "extends"
|
|
30
|
+
EDGE_OUTPERFORMS = "outperforms"
|
|
31
|
+
EDGE_CONSTRAINS = "constrains"
|
|
32
|
+
EDGE_BLOCKS = "blocks"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _extract_concepts(text: str, limit: int = 15) -> list[str]:
|
|
36
|
+
"""Heuristic: extract likely concepts (title-case phrases, known tokens)."""
|
|
37
|
+
concepts = set()
|
|
38
|
+
for m in re.finditer(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b", text):
|
|
39
|
+
concepts.add(m.group(1).strip())
|
|
40
|
+
tokens = re.findall(r"\b(diffusion|transformer|dataset|model|training|evaluation|baseline|embedding|neural)\b", text.lower())
|
|
41
|
+
concepts.update(tokens)
|
|
42
|
+
return list(concepts)[:limit]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _extract_datasets(text: str, limit: int = 5) -> list[str]:
|
|
46
|
+
"""Heuristic: extract dataset-like names (e.g. MNIST, ImageNet)."""
|
|
47
|
+
datasets = set()
|
|
48
|
+
for m in re.finditer(r"\b([A-Z][A-Za-z0-9\-]+(?:-\d+)?)\b", text):
|
|
49
|
+
w = m.group(1)
|
|
50
|
+
if len(w) >= 3 and w not in ("The", "This", "These", "When", "What"):
|
|
51
|
+
datasets.add(w)
|
|
52
|
+
return list(datasets)[:limit]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _extract_methods(text: str, limit: int = 5) -> list[str]:
|
|
56
|
+
"""Heuristic: method-like phrases (e.g. 'X method', 'Y approach')."""
|
|
57
|
+
methods = set()
|
|
58
|
+
for m in re.finditer(r"(\w+(?:\s+\w+)?)\s+(?:method|approach|algorithm|framework)\b", text, re.IGNORECASE):
|
|
59
|
+
methods.add(m.group(1).strip())
|
|
60
|
+
return list(methods)[:limit]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class KnowledgeGraph:
|
|
64
|
+
"""
|
|
65
|
+
Build and query a graph over memory: nodes are documents/concepts/datasets/methods,
|
|
66
|
+
edges are mentions, cites, related_to.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self, store: MemoryStore | None = None) -> None:
|
|
70
|
+
self.store = store or get_default_store()
|
|
71
|
+
self._graph: nx.MultiDiGraph = nx.MultiDiGraph()
|
|
72
|
+
|
|
73
|
+
def build_from_memory(self, merge: bool = False) -> nx.MultiDiGraph:
|
|
74
|
+
"""
|
|
75
|
+
Build graph from all stored memory. Returns the graph.
|
|
76
|
+
Nodes: document:<id>, concept:<name>, dataset:<name>, method:<name>.
|
|
77
|
+
Edges: document --mentions--> concept/dataset/method; concept --related_to--> concept.
|
|
78
|
+
If merge=True, add to existing graph instead of clearing (e.g. after load()).
|
|
79
|
+
"""
|
|
80
|
+
if not merge:
|
|
81
|
+
self._graph = nx.MultiDiGraph()
|
|
82
|
+
records = self.store.list_memory(limit=2000)
|
|
83
|
+
for r in records:
|
|
84
|
+
doc_id = f"document:{r.id}"
|
|
85
|
+
self._graph.add_node(doc_id, kind=NODE_DOCUMENT, memory_id=r.id, label=r.content[:200])
|
|
86
|
+
for c in _extract_concepts(r.content):
|
|
87
|
+
node = f"concept:{c}"
|
|
88
|
+
self._graph.add_node(node, kind=NODE_CONCEPT, label=c)
|
|
89
|
+
self._graph.add_edge(doc_id, node, type=EDGE_MENTIONS)
|
|
90
|
+
for d in _extract_datasets(r.content):
|
|
91
|
+
node = f"dataset:{d}"
|
|
92
|
+
self._graph.add_node(node, kind=NODE_DATASET, label=d)
|
|
93
|
+
self._graph.add_edge(doc_id, node, type=EDGE_MENTIONS)
|
|
94
|
+
for m in _extract_methods(r.content):
|
|
95
|
+
node = f"method:{m}"
|
|
96
|
+
self._graph.add_node(node, kind=NODE_METHOD, label=m)
|
|
97
|
+
self._graph.add_edge(doc_id, node, type=EDGE_MENTIONS)
|
|
98
|
+
doc_nodes = [n for n, attrs in self._graph.nodes(data=True) if attrs.get("kind") == NODE_DOCUMENT]
|
|
99
|
+
for doc in doc_nodes:
|
|
100
|
+
succs = list(self._graph.successors(doc))
|
|
101
|
+
concepts = [s for s in succs if s.startswith("concept:")]
|
|
102
|
+
for i, a in enumerate(concepts):
|
|
103
|
+
for b in concepts[i + 1 :]:
|
|
104
|
+
self._graph.add_edge(a, b, type=EDGE_RELATED_TO)
|
|
105
|
+
self._graph.add_edge(b, a, type=EDGE_RELATED_TO)
|
|
106
|
+
return self._graph
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def graph(self) -> nx.MultiDiGraph:
|
|
110
|
+
"""Return the current graph (build first with build_from_memory if needed)."""
|
|
111
|
+
return self._graph
|
|
112
|
+
|
|
113
|
+
def get_neighbors(self, node_id: str, edge_type: str | None = None) -> list[tuple[str, str]]:
|
|
114
|
+
"""Return list of (neighbor_id, edge_type) for outgoing edges."""
|
|
115
|
+
if node_id not in self._graph:
|
|
116
|
+
return []
|
|
117
|
+
out = []
|
|
118
|
+
for _, v, data in self._graph.out_edges(node_id, data=True):
|
|
119
|
+
et = data.get("type", "")
|
|
120
|
+
if edge_type is None or et == edge_type:
|
|
121
|
+
out.append((v, et))
|
|
122
|
+
return out
|
|
123
|
+
|
|
124
|
+
def get_documents_mentioning(self, concept_or_dataset: str) -> list[str]:
|
|
125
|
+
"""Return memory ids of documents that mention the given concept or dataset."""
|
|
126
|
+
node = f"concept:{concept_or_dataset}"
|
|
127
|
+
if node not in self._graph:
|
|
128
|
+
node = f"dataset:{concept_or_dataset}"
|
|
129
|
+
if node not in self._graph:
|
|
130
|
+
return []
|
|
131
|
+
doc_ids = []
|
|
132
|
+
for pred in self._graph.predecessors(node):
|
|
133
|
+
if pred.startswith("document:"):
|
|
134
|
+
doc_ids.append(self._graph.nodes[pred].get("memory_id", pred.replace("document:", "")))
|
|
135
|
+
return doc_ids
|
|
136
|
+
|
|
137
|
+
def add_or_update_node(self, node_id: str, kind: str, label: str, **attrs: Any) -> None:
|
|
138
|
+
"""v1.8: Add or update a node (e.g. from KnowledgeExtractor)."""
|
|
139
|
+
self._graph.add_node(node_id, kind=kind, label=label, **attrs)
|
|
140
|
+
|
|
141
|
+
def add_edge(self, from_id: str, to_id: str, edge_type: str) -> None:
|
|
142
|
+
"""v1.8: Add a directed edge (e.g. from KnowledgeExtractor)."""
|
|
143
|
+
self._graph.add_node(from_id, **self._graph.nodes.get(from_id, {}))
|
|
144
|
+
self._graph.add_node(to_id, **self._graph.nodes.get(to_id, {}))
|
|
145
|
+
self._graph.add_edge(from_id, to_id, type=edge_type)
|
|
146
|
+
|
|
147
|
+
def _persist_path(self) -> str:
|
|
148
|
+
"""Path to persisted graph JSON (data_dir/knowledge_graph.json)."""
|
|
149
|
+
try:
|
|
150
|
+
from devsper.config import get_config
|
|
151
|
+
base = get_config().data_dir
|
|
152
|
+
except Exception:
|
|
153
|
+
base = os.environ.get("DEVSPER_DATA_DIR", ".devsper")
|
|
154
|
+
os.makedirs(base, exist_ok=True)
|
|
155
|
+
return os.path.join(base, "knowledge_graph.json")
|
|
156
|
+
|
|
157
|
+
def save(self) -> None:
|
|
158
|
+
"""v1.8: Persist graph to JSON (nodes and edges only; no embeddings)."""
|
|
159
|
+
nodes = []
|
|
160
|
+
for nid, data in self._graph.nodes(data=True):
|
|
161
|
+
nodes.append({"id": nid, **{k: v for k, v in data.items() if isinstance(v, (str, int, float, bool))}})
|
|
162
|
+
edges = []
|
|
163
|
+
for u, v, data in self._graph.edges(data=True):
|
|
164
|
+
edges.append({"from": u, "to": v, "type": data.get("type", "related_to")})
|
|
165
|
+
payload = {"nodes": nodes, "edges": edges}
|
|
166
|
+
with open(self._persist_path(), "w", encoding="utf-8") as f:
|
|
167
|
+
json.dump(payload, f, indent=0)
|
|
168
|
+
|
|
169
|
+
def load(self) -> bool:
|
|
170
|
+
"""v1.8: Load graph from JSON if file exists; merge into _graph. Returns True if loaded."""
|
|
171
|
+
path = self._persist_path()
|
|
172
|
+
if not os.path.isfile(path):
|
|
173
|
+
return False
|
|
174
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
175
|
+
payload = json.load(f)
|
|
176
|
+
for n in payload.get("nodes", []):
|
|
177
|
+
nid = n.pop("id", None)
|
|
178
|
+
if nid:
|
|
179
|
+
self._graph.add_node(nid, **n)
|
|
180
|
+
for e in payload.get("edges", []):
|
|
181
|
+
u, v = e.get("from"), e.get("to")
|
|
182
|
+
if u and v:
|
|
183
|
+
self._graph.add_edge(u, v, type=e.get("type", "related_to"))
|
|
184
|
+
return True
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge graph query: entity search and relationship traversal.
|
|
3
|
+
v1.8: query_for_planning for knowledge-guided planning.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from difflib import SequenceMatcher
|
|
8
|
+
|
|
9
|
+
from devsper.knowledge.knowledge_graph import (
|
|
10
|
+
KnowledgeGraph,
|
|
11
|
+
NODE_DOCUMENT,
|
|
12
|
+
NODE_CONCEPT,
|
|
13
|
+
NODE_DATASET,
|
|
14
|
+
NODE_METHOD,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class QueryResult:
|
|
20
|
+
"""Structured result: entities matching query and edges (optionally traversed)."""
|
|
21
|
+
|
|
22
|
+
entities: list[tuple[str, str]] # (node_id, label)
|
|
23
|
+
edges: list[tuple[str, str, str]] # (from_id, to_id, edge_type)
|
|
24
|
+
documents: list[str] # memory ids of documents mentioning matched entities
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class PlanningContext:
|
|
29
|
+
"""v1.8: Context from KG for planner injection."""
|
|
30
|
+
|
|
31
|
+
relevant_concepts: list[str]
|
|
32
|
+
prior_findings: list[str]
|
|
33
|
+
known_constraints: list[str]
|
|
34
|
+
related_methods: list[str]
|
|
35
|
+
confidence: float
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _node_matches_label(node_id: str, label: str, query_lower: str) -> bool:
|
|
39
|
+
"""True if node label or id contains query terms."""
|
|
40
|
+
if not label:
|
|
41
|
+
return False
|
|
42
|
+
return query_lower in label.lower() or query_lower in node_id.lower()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def entity_search(kg: KnowledgeGraph, query_text: str) -> list[tuple[str, str]]:
|
|
46
|
+
"""
|
|
47
|
+
Find nodes (concept, dataset, method) whose label matches the query.
|
|
48
|
+
Returns list of (node_id, label).
|
|
49
|
+
"""
|
|
50
|
+
query_lower = (query_text or "").strip().lower()
|
|
51
|
+
if not query_lower:
|
|
52
|
+
return []
|
|
53
|
+
g = kg.graph
|
|
54
|
+
matches: list[tuple[str, str]] = []
|
|
55
|
+
for node_id, data in g.nodes(data=True):
|
|
56
|
+
kind = data.get("kind")
|
|
57
|
+
if kind in (NODE_DOCUMENT,):
|
|
58
|
+
continue
|
|
59
|
+
label = data.get("label", "") or node_id.split(":", 1)[-1] if ":" in node_id else node_id
|
|
60
|
+
if _node_matches_label(node_id, label, query_lower):
|
|
61
|
+
matches.append((node_id, label))
|
|
62
|
+
return matches
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def traverse(
|
|
66
|
+
kg: KnowledgeGraph,
|
|
67
|
+
node_ids: list[str],
|
|
68
|
+
hops: int = 1,
|
|
69
|
+
edge_type: str | None = None,
|
|
70
|
+
) -> list[tuple[str, str, str]]:
|
|
71
|
+
"""
|
|
72
|
+
Traverse from given nodes up to `hops` steps. Returns list of (from_id, to_id, edge_type).
|
|
73
|
+
"""
|
|
74
|
+
if hops < 1:
|
|
75
|
+
return []
|
|
76
|
+
g = kg.graph
|
|
77
|
+
edges: list[tuple[str, str, str]] = []
|
|
78
|
+
frontier = set(node_ids)
|
|
79
|
+
seen_edges: set[tuple[str, str]] = set()
|
|
80
|
+
for _ in range(hops):
|
|
81
|
+
next_frontier = set()
|
|
82
|
+
for n in frontier:
|
|
83
|
+
if n not in g:
|
|
84
|
+
continue
|
|
85
|
+
for _, v, data in g.out_edges(n, data=True):
|
|
86
|
+
et = data.get("type", "")
|
|
87
|
+
if edge_type is not None and et != edge_type:
|
|
88
|
+
continue
|
|
89
|
+
key = (n, v)
|
|
90
|
+
if key not in seen_edges:
|
|
91
|
+
seen_edges.add(key)
|
|
92
|
+
edges.append((n, v, et))
|
|
93
|
+
next_frontier.add(v)
|
|
94
|
+
frontier = next_frontier
|
|
95
|
+
return edges
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def query(
|
|
99
|
+
kg: KnowledgeGraph,
|
|
100
|
+
query_text: str,
|
|
101
|
+
traverse_hops: int = 1,
|
|
102
|
+
) -> QueryResult:
|
|
103
|
+
"""
|
|
104
|
+
Run entity search for query_text, optionally traverse relationships (1-2 hops).
|
|
105
|
+
Returns QueryResult with entities, edges, and document ids.
|
|
106
|
+
"""
|
|
107
|
+
entities = entity_search(kg, query_text)
|
|
108
|
+
node_ids = [e[0] for e in entities]
|
|
109
|
+
edges = traverse(kg, node_ids, hops=traverse_hops) if node_ids else []
|
|
110
|
+
documents: list[str] = []
|
|
111
|
+
for nid, _ in entities:
|
|
112
|
+
if nid.startswith("document:"):
|
|
113
|
+
continue
|
|
114
|
+
concept_or_dataset = nid.split(":", 1)[-1] if ":" in nid else nid
|
|
115
|
+
docs = kg.get_documents_mentioning(concept_or_dataset)
|
|
116
|
+
documents.extend(docs)
|
|
117
|
+
documents = list(dict.fromkeys(documents))
|
|
118
|
+
return QueryResult(entities=entities, edges=edges, documents=documents)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# Stopwords for task term extraction (simple heuristic)
|
|
122
|
+
_PLANNING_STOPWORDS = frozenset(
|
|
123
|
+
{
|
|
124
|
+
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
|
|
125
|
+
"of", "with", "by", "from", "as", "is", "was", "are", "were", "be",
|
|
126
|
+
"been", "being", "have", "has", "had", "do", "does", "did", "will",
|
|
127
|
+
"would", "could", "should", "may", "might", "must", "can", "this",
|
|
128
|
+
"that", "these", "those", "it", "its", "into", "through", "during",
|
|
129
|
+
}
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _extract_candidate_terms(task_description: str) -> list[str]:
|
|
134
|
+
"""Extract candidate terms: split on spaces, drop stopwords, keep capitalized or domain-like."""
|
|
135
|
+
if not task_description or not task_description.strip():
|
|
136
|
+
return []
|
|
137
|
+
words = task_description.strip().split()
|
|
138
|
+
candidates = []
|
|
139
|
+
for w in words:
|
|
140
|
+
w_clean = w.strip(".,;:!?").lower()
|
|
141
|
+
if not w_clean or w_clean in _PLANNING_STOPWORDS:
|
|
142
|
+
continue
|
|
143
|
+
if w[0].isupper() or any(c.isdigit() for c in w) or "_" in w:
|
|
144
|
+
candidates.append(w_clean)
|
|
145
|
+
else:
|
|
146
|
+
candidates.append(w_clean)
|
|
147
|
+
return list(dict.fromkeys(candidates))
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _fuzzy_match_label(term: str, label: str, threshold: float = 0.8) -> float:
|
|
151
|
+
"""Return similarity ratio in [0, 1]; 0 if below threshold."""
|
|
152
|
+
if not label:
|
|
153
|
+
return 0.0
|
|
154
|
+
label_lower = label.lower()
|
|
155
|
+
term_lower = term.lower()
|
|
156
|
+
if term_lower in label_lower:
|
|
157
|
+
return min(1.0, 0.8 + 0.2 * (len(term_lower) / max(1, len(label_lower))))
|
|
158
|
+
r = SequenceMatcher(None, term_lower, label_lower).ratio()
|
|
159
|
+
return r if r >= threshold else 0.0
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def query_for_planning(kg: KnowledgeGraph, task_description: str) -> PlanningContext:
|
|
163
|
+
"""
|
|
164
|
+
Build planning context from KG: concepts, findings, constraints, methods.
|
|
165
|
+
Uses term extraction, fuzzy node match, 2-hop neighborhood, centrality + match scoring.
|
|
166
|
+
"""
|
|
167
|
+
concepts: list[str] = []
|
|
168
|
+
findings: list[str] = []
|
|
169
|
+
constraints: list[str] = []
|
|
170
|
+
methods: list[str] = []
|
|
171
|
+
g = kg.graph
|
|
172
|
+
if g.number_of_nodes() == 0:
|
|
173
|
+
return PlanningContext(
|
|
174
|
+
relevant_concepts=[],
|
|
175
|
+
prior_findings=[],
|
|
176
|
+
known_constraints=[],
|
|
177
|
+
related_methods=[],
|
|
178
|
+
confidence=0.0,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
terms = _extract_candidate_terms(task_description)
|
|
182
|
+
matched_nodes: list[tuple[str, str, float]] = [] # (node_id, label, match_score)
|
|
183
|
+
for node_id, data in g.nodes(data=True):
|
|
184
|
+
kind = data.get("kind")
|
|
185
|
+
if kind == NODE_DOCUMENT:
|
|
186
|
+
continue
|
|
187
|
+
label = data.get("label", "") or (node_id.split(":", 1)[-1] if ":" in node_id else node_id)
|
|
188
|
+
for t in terms:
|
|
189
|
+
score = _fuzzy_match_label(t, label, 0.8)
|
|
190
|
+
if score > 0:
|
|
191
|
+
matched_nodes.append((node_id, label, score))
|
|
192
|
+
break
|
|
193
|
+
|
|
194
|
+
if not matched_nodes:
|
|
195
|
+
return PlanningContext(
|
|
196
|
+
relevant_concepts=[],
|
|
197
|
+
prior_findings=[],
|
|
198
|
+
known_constraints=[],
|
|
199
|
+
related_methods=[],
|
|
200
|
+
confidence=0.0,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
neighborhood = set(n[0] for n in matched_nodes)
|
|
204
|
+
for _ in range(2):
|
|
205
|
+
next_n = set()
|
|
206
|
+
for nid in neighborhood:
|
|
207
|
+
if nid not in g:
|
|
208
|
+
continue
|
|
209
|
+
for _, v, _ in g.out_edges(nid, data=True):
|
|
210
|
+
next_n.add(v)
|
|
211
|
+
for u, _, _ in g.in_edges(nid, data=True):
|
|
212
|
+
next_n.add(u)
|
|
213
|
+
neighborhood |= next_n
|
|
214
|
+
|
|
215
|
+
match_scores: dict[str, float] = {}
|
|
216
|
+
for nid, label, s in matched_nodes:
|
|
217
|
+
match_scores[nid] = max(match_scores.get(nid, 0), s)
|
|
218
|
+
try:
|
|
219
|
+
degree = dict(g.degree(neighborhood))
|
|
220
|
+
except Exception:
|
|
221
|
+
degree = {n: 0 for n in neighborhood}
|
|
222
|
+
scores: list[tuple[str, str, str, float]] = []
|
|
223
|
+
for nid in neighborhood:
|
|
224
|
+
data = g.nodes.get(nid, {})
|
|
225
|
+
kind = data.get("kind", "")
|
|
226
|
+
label = data.get("label", "") or (nid.split(":", 1)[-1] if ":" in nid else nid)
|
|
227
|
+
deg = degree.get(nid, 0)
|
|
228
|
+
recency = 1.0
|
|
229
|
+
ms = match_scores.get(nid, 0.5)
|
|
230
|
+
total = (deg * 0.3) + (recency * 0.2) + (ms * 0.5)
|
|
231
|
+
scores.append((nid, kind, label, total))
|
|
232
|
+
|
|
233
|
+
scores.sort(key=lambda x: -x[3])
|
|
234
|
+
top = scores[:30]
|
|
235
|
+
for nid, kind, label, _ in top:
|
|
236
|
+
if kind == NODE_CONCEPT and label not in concepts:
|
|
237
|
+
concepts.append(label)
|
|
238
|
+
elif kind == NODE_METHOD and label not in methods:
|
|
239
|
+
methods.append(label)
|
|
240
|
+
|
|
241
|
+
for u, v, data in g.out_edges(neighborhood, data=True):
|
|
242
|
+
if data.get("type") in ("constrains", "blocks"):
|
|
243
|
+
edge_desc = f"{u.split(':', 1)[-1] if ':' in u else u} -> {v.split(':', 1)[-1] if ':' in v else v}"
|
|
244
|
+
if edge_desc not in constraints:
|
|
245
|
+
constraints.append(edge_desc)
|
|
246
|
+
|
|
247
|
+
doc_nodes = [n for n in neighborhood if g.nodes.get(n, {}).get("kind") == NODE_DOCUMENT]
|
|
248
|
+
for d in doc_nodes[:5]:
|
|
249
|
+
summary = (g.nodes[d].get("label", "") or d)[:200]
|
|
250
|
+
if summary and summary not in findings:
|
|
251
|
+
findings.append(summary)
|
|
252
|
+
|
|
253
|
+
total_nodes = g.number_of_nodes()
|
|
254
|
+
found = len(concepts) + len(methods) + len(findings) + len(constraints)
|
|
255
|
+
confidence = min(1.0, (found / max(1, total_nodes)) * 2.0) if total_nodes else 0.0
|
|
256
|
+
confidence = max(0.0, min(1.0, confidence))
|
|
257
|
+
|
|
258
|
+
return PlanningContext(
|
|
259
|
+
relevant_concepts=concepts[:15],
|
|
260
|
+
prior_findings=findings[:5],
|
|
261
|
+
known_constraints=constraints[:10],
|
|
262
|
+
related_methods=methods[:10],
|
|
263
|
+
confidence=confidence,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def format_planning_context(ctx: PlanningContext, max_tokens: int = 300) -> str:
|
|
268
|
+
"""Render PlanningContext as concise bullet list; only non-empty sections; prepend confidence."""
|
|
269
|
+
parts = []
|
|
270
|
+
if ctx.confidence >= 0.7:
|
|
271
|
+
parts.append("High confidence")
|
|
272
|
+
elif ctx.confidence >= 0.3:
|
|
273
|
+
parts.append("Partial context")
|
|
274
|
+
if ctx.relevant_concepts:
|
|
275
|
+
parts.append("Concepts: " + ", ".join(ctx.relevant_concepts[:10]))
|
|
276
|
+
if ctx.prior_findings:
|
|
277
|
+
parts.append("Prior findings: " + " | ".join(s[:80] for s in ctx.prior_findings[:5]))
|
|
278
|
+
if ctx.known_constraints:
|
|
279
|
+
parts.append("Constraints: " + "; ".join(ctx.known_constraints[:5]))
|
|
280
|
+
if ctx.related_methods:
|
|
281
|
+
parts.append("Methods: " + ", ".join(ctx.related_methods[:8]))
|
|
282
|
+
text = "\n".join(parts)
|
|
283
|
+
if len(text) > max_tokens * 4:
|
|
284
|
+
text = text[: max_tokens * 4] + "..."
|
|
285
|
+
return text
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Swarm memory: persistent store, semantic index, and router for agent recall.
|
|
3
|
+
|
|
4
|
+
- memory_types: EpisodicMemory, SemanticMemory, ArtifactMemory, ResearchMemory
|
|
5
|
+
- memory_store: SQLite-backed store (store, retrieve, delete, list)
|
|
6
|
+
- memory_index: vector/semantic search (query_memory, top_k)
|
|
7
|
+
- memory_router: select relevant memories for a task
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from devsper.memory.memory_types import (
|
|
11
|
+
EpisodicMemory,
|
|
12
|
+
SemanticMemory,
|
|
13
|
+
ArtifactMemory,
|
|
14
|
+
ResearchMemory,
|
|
15
|
+
MemoryRecord,
|
|
16
|
+
MemoryType,
|
|
17
|
+
)
|
|
18
|
+
from devsper.memory.memory_store import MemoryStore
|
|
19
|
+
from devsper.memory.memory_index import MemoryIndex
|
|
20
|
+
from devsper.memory.memory_router import MemoryRouter
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"EpisodicMemory",
|
|
24
|
+
"SemanticMemory",
|
|
25
|
+
"ArtifactMemory",
|
|
26
|
+
"ResearchMemory",
|
|
27
|
+
"MemoryRecord",
|
|
28
|
+
"MemoryType",
|
|
29
|
+
"MemoryStore",
|
|
30
|
+
"MemoryIndex",
|
|
31
|
+
"MemoryRouter",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
# Optional: import submodules for summarizer, namespaces, scoring
|
|
35
|
+
# from devsper.memory import summarizer, namespaces, scoring
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""
|
|
2
|
+
v1.8: Memory consolidation — cluster similar records, summarize clusters, archive originals.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from devsper.memory.memory_store import MemoryStore
|
|
8
|
+
from devsper.memory.memory_index import MemoryIndex
|
|
9
|
+
from devsper.memory.memory_types import MemoryRecord, MemoryType
|
|
10
|
+
from devsper.memory.memory_store import generate_memory_id
|
|
11
|
+
from devsper.utils.models import generate
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class ConsolidationReport:
|
|
16
|
+
clusters_found: int
|
|
17
|
+
clusters_consolidated: int
|
|
18
|
+
records_archived: int
|
|
19
|
+
records_created: int
|
|
20
|
+
tokens_saved_estimate: int
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _cosine_sim(a: list[float], b: list[float]) -> float:
|
|
24
|
+
if not a or not b or len(a) != len(b):
|
|
25
|
+
return 0.0
|
|
26
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
27
|
+
na = sum(x * x for x in a) ** 0.5
|
|
28
|
+
nb = sum(x * x for x in b) ** 0.5
|
|
29
|
+
if na == 0 or nb == 0:
|
|
30
|
+
return 0.0
|
|
31
|
+
return dot / (na * nb)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MemoryConsolidator:
|
|
35
|
+
"""
|
|
36
|
+
Clusters similar memory records, summarizes each cluster into one
|
|
37
|
+
high-quality record, archives originals. Keeps agent context tight
|
|
38
|
+
for long-running projects.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, min_cluster_size: int = 3):
|
|
42
|
+
self.min_cluster_size = min_cluster_size
|
|
43
|
+
|
|
44
|
+
async def _summarize_cluster(self, records: list[MemoryRecord], model: str) -> str:
|
|
45
|
+
"""Synthesize N related memory records into one comprehensive record. Max 300 words."""
|
|
46
|
+
blocks = "\n\n".join((r.content or "")[:500] for r in records[:20])
|
|
47
|
+
prompt = f"""Synthesize these {len(records)} related memory records into one comprehensive, information-dense record. Preserve all unique facts. Max 300 words.
|
|
48
|
+
|
|
49
|
+
Records:
|
|
50
|
+
{blocks}"""
|
|
51
|
+
out = generate(model, prompt)
|
|
52
|
+
return (out or "").strip()[:2000]
|
|
53
|
+
|
|
54
|
+
async def consolidate(
|
|
55
|
+
self,
|
|
56
|
+
memory_store: MemoryStore,
|
|
57
|
+
memory_index: MemoryIndex,
|
|
58
|
+
worker_model: str,
|
|
59
|
+
dry_run: bool = False,
|
|
60
|
+
) -> ConsolidationReport:
|
|
61
|
+
"""
|
|
62
|
+
1. Load all memory records (include archived for clustering? No - only non-archived)
|
|
63
|
+
2. Cluster by embedding similarity (AgglomerativeClustering, distance_threshold=0.25)
|
|
64
|
+
3. For clusters with >= min_cluster_size records: generate summary
|
|
65
|
+
4. Store summary as new MemoryRecord (type=semantic, tagged "consolidated")
|
|
66
|
+
5. Archive originals: set archived=True
|
|
67
|
+
6. Return report
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
from sklearn.cluster import AgglomerativeClustering
|
|
71
|
+
import numpy as np
|
|
72
|
+
except ImportError:
|
|
73
|
+
raise ImportError(
|
|
74
|
+
"Memory consolidation requires scikit-learn. Install with: pip install devsper[data]"
|
|
75
|
+
) from None
|
|
76
|
+
|
|
77
|
+
records = memory_store.list_memory(limit=5000, include_archived=False)
|
|
78
|
+
with_emb = [r for r in records if r.embedding is not None]
|
|
79
|
+
if len(with_emb) < self.min_cluster_size:
|
|
80
|
+
return ConsolidationReport(
|
|
81
|
+
clusters_found=0,
|
|
82
|
+
clusters_consolidated=0,
|
|
83
|
+
records_archived=0,
|
|
84
|
+
records_created=0,
|
|
85
|
+
tokens_saved_estimate=0,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
X = np.array(with_emb[0].embedding)
|
|
89
|
+
for r in with_emb[1:]:
|
|
90
|
+
X = np.vstack([X, r.embedding])
|
|
91
|
+
clustering = AgglomerativeClustering(
|
|
92
|
+
n_clusters=None,
|
|
93
|
+
distance_threshold=0.25,
|
|
94
|
+
metric="cosine",
|
|
95
|
+
linkage="average",
|
|
96
|
+
)
|
|
97
|
+
labels = clustering.fit_predict(X)
|
|
98
|
+
unique_labels = set(labels)
|
|
99
|
+
clusters_found = len(unique_labels)
|
|
100
|
+
clusters_consolidated = 0
|
|
101
|
+
records_archived = 0
|
|
102
|
+
records_created = 0
|
|
103
|
+
avg_tokens = 100
|
|
104
|
+
|
|
105
|
+
for lab in unique_labels:
|
|
106
|
+
indices = [i for i, l in enumerate(labels) if l == lab]
|
|
107
|
+
cluster_records = [with_emb[i] for i in indices]
|
|
108
|
+
if len(cluster_records) < self.min_cluster_size:
|
|
109
|
+
continue
|
|
110
|
+
clusters_consolidated += 1
|
|
111
|
+
if dry_run:
|
|
112
|
+
records_archived += len(cluster_records)
|
|
113
|
+
records_created += 1
|
|
114
|
+
continue
|
|
115
|
+
summary_text = await self._summarize_cluster(cluster_records, worker_model)
|
|
116
|
+
summary_record = MemoryRecord(
|
|
117
|
+
id=generate_memory_id(),
|
|
118
|
+
memory_type=MemoryType.SEMANTIC,
|
|
119
|
+
content=summary_text,
|
|
120
|
+
tags=["consolidated"],
|
|
121
|
+
run_id="",
|
|
122
|
+
archived=False,
|
|
123
|
+
)
|
|
124
|
+
summary_record = memory_index.ensure_embedding(summary_record)
|
|
125
|
+
memory_store.store(summary_record)
|
|
126
|
+
records_created += 1
|
|
127
|
+
for r in cluster_records:
|
|
128
|
+
memory_store.set_archived(r.id, True)
|
|
129
|
+
records_archived += 1
|
|
130
|
+
|
|
131
|
+
tokens_saved_estimate = records_archived * avg_tokens
|
|
132
|
+
return ConsolidationReport(
|
|
133
|
+
clusters_found=clusters_found,
|
|
134
|
+
clusters_consolidated=clusters_consolidated,
|
|
135
|
+
records_archived=records_archived,
|
|
136
|
+
records_created=records_created,
|
|
137
|
+
tokens_saved_estimate=tokens_saved_estimate,
|
|
138
|
+
)
|