devsper 2.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (375) hide show
  1. devsper/__init__.py +14 -0
  2. devsper/agents/a2a/__init__.py +27 -0
  3. devsper/agents/a2a/client.py +126 -0
  4. devsper/agents/a2a/discovery.py +24 -0
  5. devsper/agents/a2a/server.py +128 -0
  6. devsper/agents/a2a/tool_adapter.py +68 -0
  7. devsper/agents/a2a/types.py +49 -0
  8. devsper/agents/agent.py +602 -0
  9. devsper/agents/critic.py +80 -0
  10. devsper/agents/message_bus.py +124 -0
  11. devsper/agents/roles.py +181 -0
  12. devsper/agents/run_agent.py +78 -0
  13. devsper/analytics/__init__.py +5 -0
  14. devsper/analytics/tool_analytics.py +78 -0
  15. devsper/audit/__init__.py +5 -0
  16. devsper/audit/logger.py +214 -0
  17. devsper/bus/__init__.py +29 -0
  18. devsper/bus/backends/__init__.py +5 -0
  19. devsper/bus/backends/base.py +38 -0
  20. devsper/bus/backends/memory.py +55 -0
  21. devsper/bus/backends/redis.py +146 -0
  22. devsper/bus/message.py +56 -0
  23. devsper/bus/schema_version.py +3 -0
  24. devsper/bus/topics.py +19 -0
  25. devsper/cache/__init__.py +6 -0
  26. devsper/cache/embedding_index.py +98 -0
  27. devsper/cache/hashing.py +24 -0
  28. devsper/cache/store.py +153 -0
  29. devsper/cache/task_cache.py +191 -0
  30. devsper/cli/__init__.py +6 -0
  31. devsper/cli/commands/reg.py +733 -0
  32. devsper/cli/github_oauth.py +157 -0
  33. devsper/cli/init.py +637 -0
  34. devsper/cli/main.py +2956 -0
  35. devsper/cli/run_progress.py +103 -0
  36. devsper/cli/ui/__init__.py +65 -0
  37. devsper/cli/ui/components.py +94 -0
  38. devsper/cli/ui/errors.py +104 -0
  39. devsper/cli/ui/logging.py +120 -0
  40. devsper/cli/ui/onboarding.py +102 -0
  41. devsper/cli/ui/progress.py +43 -0
  42. devsper/cli/ui/run_view.py +308 -0
  43. devsper/cli/ui/theme.py +40 -0
  44. devsper/cluster/__init__.py +29 -0
  45. devsper/cluster/election.py +84 -0
  46. devsper/cluster/local.py +97 -0
  47. devsper/cluster/node_info.py +77 -0
  48. devsper/cluster/registry.py +71 -0
  49. devsper/cluster/router.py +117 -0
  50. devsper/cluster/state_backend.py +105 -0
  51. devsper/compliance/__init__.py +5 -0
  52. devsper/compliance/pii.py +147 -0
  53. devsper/config/__init__.py +52 -0
  54. devsper/config/config_loader.py +121 -0
  55. devsper/config/defaults.py +77 -0
  56. devsper/config/resolver.py +342 -0
  57. devsper/config/schema.py +237 -0
  58. devsper/credentials/__init__.py +19 -0
  59. devsper/credentials/cli.py +197 -0
  60. devsper/credentials/migration.py +124 -0
  61. devsper/credentials/store.py +142 -0
  62. devsper/dashboard/__init__.py +9 -0
  63. devsper/dashboard/dashboard.py +87 -0
  64. devsper/dev/__init__.py +25 -0
  65. devsper/dev/builder.py +195 -0
  66. devsper/dev/debugger.py +95 -0
  67. devsper/dev/repo_index.py +138 -0
  68. devsper/dev/sandbox.py +203 -0
  69. devsper/dev/scaffold.py +122 -0
  70. devsper/embeddings/__init__.py +5 -0
  71. devsper/embeddings/service.py +36 -0
  72. devsper/explainability/__init__.py +14 -0
  73. devsper/explainability/decision_tree.py +104 -0
  74. devsper/explainability/rationale.py +38 -0
  75. devsper/explainability/simulation.py +56 -0
  76. devsper/hitl/__init__.py +13 -0
  77. devsper/hitl/approval.py +160 -0
  78. devsper/hitl/escalation.py +95 -0
  79. devsper/intelligence/__init__.py +9 -0
  80. devsper/intelligence/adaptation.py +88 -0
  81. devsper/intelligence/analysis/__init__.py +19 -0
  82. devsper/intelligence/analysis/analyzer.py +71 -0
  83. devsper/intelligence/analysis/cost_estimator.py +66 -0
  84. devsper/intelligence/analysis/formatter.py +103 -0
  85. devsper/intelligence/analysis/run_report.py +402 -0
  86. devsper/intelligence/learning_engine.py +92 -0
  87. devsper/intelligence/strategies/__init__.py +23 -0
  88. devsper/intelligence/strategies/base.py +14 -0
  89. devsper/intelligence/strategies/code_analysis_strategy.py +33 -0
  90. devsper/intelligence/strategies/data_science_strategy.py +33 -0
  91. devsper/intelligence/strategies/document_pipeline_strategy.py +33 -0
  92. devsper/intelligence/strategies/experiment_strategy.py +33 -0
  93. devsper/intelligence/strategies/research_strategy.py +34 -0
  94. devsper/intelligence/strategy_selector.py +84 -0
  95. devsper/intelligence/synthesis.py +132 -0
  96. devsper/intelligence/task_optimizer.py +92 -0
  97. devsper/knowledge/__init__.py +5 -0
  98. devsper/knowledge/extractor.py +204 -0
  99. devsper/knowledge/knowledge_graph.py +184 -0
  100. devsper/knowledge/query.py +285 -0
  101. devsper/memory/__init__.py +35 -0
  102. devsper/memory/consolidation.py +138 -0
  103. devsper/memory/embeddings.py +60 -0
  104. devsper/memory/memory_index.py +97 -0
  105. devsper/memory/memory_router.py +62 -0
  106. devsper/memory/memory_store.py +221 -0
  107. devsper/memory/memory_types.py +54 -0
  108. devsper/memory/namespaces.py +45 -0
  109. devsper/memory/scoring.py +77 -0
  110. devsper/memory/summarizer.py +52 -0
  111. devsper/nodes/__init__.py +5 -0
  112. devsper/nodes/controller.py +449 -0
  113. devsper/nodes/rpc.py +127 -0
  114. devsper/nodes/single.py +161 -0
  115. devsper/nodes/worker.py +506 -0
  116. devsper/orchestration/__init__.py +19 -0
  117. devsper/orchestration/meta_planner.py +239 -0
  118. devsper/orchestration/priority_queue.py +61 -0
  119. devsper/plugins/__init__.py +19 -0
  120. devsper/plugins/marketplace/__init__.py +0 -0
  121. devsper/plugins/plugin_loader.py +70 -0
  122. devsper/plugins/plugin_registry.py +34 -0
  123. devsper/plugins/registry.py +83 -0
  124. devsper/protocols/__init__.py +6 -0
  125. devsper/providers/__init__.py +17 -0
  126. devsper/providers/anthropic.py +84 -0
  127. devsper/providers/base.py +75 -0
  128. devsper/providers/complexity_router.py +94 -0
  129. devsper/providers/gemini.py +36 -0
  130. devsper/providers/github.py +180 -0
  131. devsper/providers/model_router.py +40 -0
  132. devsper/providers/openai.py +105 -0
  133. devsper/providers/router/__init__.py +21 -0
  134. devsper/providers/router/backends/__init__.py +19 -0
  135. devsper/providers/router/backends/anthropic_backend.py +111 -0
  136. devsper/providers/router/backends/custom_backend.py +138 -0
  137. devsper/providers/router/backends/gemini_backend.py +89 -0
  138. devsper/providers/router/backends/github_backend.py +165 -0
  139. devsper/providers/router/backends/ollama_backend.py +104 -0
  140. devsper/providers/router/backends/openai_backend.py +142 -0
  141. devsper/providers/router/backends/vllm_backend.py +35 -0
  142. devsper/providers/router/base.py +60 -0
  143. devsper/providers/router/factory.py +92 -0
  144. devsper/providers/router/legacy.py +101 -0
  145. devsper/providers/router/router.py +135 -0
  146. devsper/reasoning/__init__.py +12 -0
  147. devsper/reasoning/graph.py +59 -0
  148. devsper/reasoning/nodes.py +20 -0
  149. devsper/reasoning/store.py +67 -0
  150. devsper/runtime/__init__.py +12 -0
  151. devsper/runtime/health.py +88 -0
  152. devsper/runtime/replay.py +53 -0
  153. devsper/runtime/replay_engine.py +142 -0
  154. devsper/runtime/run_history.py +204 -0
  155. devsper/runtime/telemetry.py +116 -0
  156. devsper/runtime/visualize.py +58 -0
  157. devsper/sandbox/__init__.py +13 -0
  158. devsper/sandbox/sandbox.py +161 -0
  159. devsper/swarm/checkpointer.py +65 -0
  160. devsper/swarm/executor.py +558 -0
  161. devsper/swarm/map_reduce.py +44 -0
  162. devsper/swarm/planner.py +197 -0
  163. devsper/swarm/prefetcher.py +91 -0
  164. devsper/swarm/scheduler.py +153 -0
  165. devsper/swarm/speculation.py +47 -0
  166. devsper/swarm/swarm.py +562 -0
  167. devsper/tools/__init__.py +33 -0
  168. devsper/tools/base.py +29 -0
  169. devsper/tools/code_intelligence/__init__.py +13 -0
  170. devsper/tools/code_intelligence/api_surface_extractor.py +73 -0
  171. devsper/tools/code_intelligence/architecture_analyzer.py +65 -0
  172. devsper/tools/code_intelligence/codebase_indexer.py +71 -0
  173. devsper/tools/code_intelligence/dependency_graph_builder.py +67 -0
  174. devsper/tools/code_intelligence/design_pattern_detector.py +62 -0
  175. devsper/tools/code_intelligence/large_function_detector.py +68 -0
  176. devsper/tools/code_intelligence/module_responsibility_mapper.py +56 -0
  177. devsper/tools/code_intelligence/parallel_codebase_analysis.py +44 -0
  178. devsper/tools/code_intelligence/refactor_candidate_detector.py +81 -0
  179. devsper/tools/code_intelligence/repository_semantic_index.py +61 -0
  180. devsper/tools/code_intelligence/test_coverage_estimator.py +62 -0
  181. devsper/tools/coding/__init__.py +12 -0
  182. devsper/tools/coding/analyze_code_complexity.py +48 -0
  183. devsper/tools/coding/dependency_analyzer.py +42 -0
  184. devsper/tools/coding/extract_functions.py +38 -0
  185. devsper/tools/coding/format_python.py +50 -0
  186. devsper/tools/coding/generate_docstrings.py +40 -0
  187. devsper/tools/coding/generate_unit_tests.py +42 -0
  188. devsper/tools/coding/lint_python.py +51 -0
  189. devsper/tools/coding/refactor_function.py +41 -0
  190. devsper/tools/coding/repo_structure_map.py +54 -0
  191. devsper/tools/coding/run_python.py +53 -0
  192. devsper/tools/data/__init__.py +12 -0
  193. devsper/tools/data/column_type_detection.py +64 -0
  194. devsper/tools/data/csv_summary.py +52 -0
  195. devsper/tools/data/dataframe_filter.py +51 -0
  196. devsper/tools/data/dataframe_groupby.py +47 -0
  197. devsper/tools/data/dataframe_stats.py +38 -0
  198. devsper/tools/data/dataset_sampling.py +55 -0
  199. devsper/tools/data/dataset_schema.py +45 -0
  200. devsper/tools/data/json_pretty_print.py +37 -0
  201. devsper/tools/data/json_query.py +46 -0
  202. devsper/tools/data/missing_value_report.py +47 -0
  203. devsper/tools/data_science/__init__.py +13 -0
  204. devsper/tools/data_science/correlation_heatmap.py +72 -0
  205. devsper/tools/data_science/dataset_bias_detector.py +49 -0
  206. devsper/tools/data_science/dataset_distribution_report.py +64 -0
  207. devsper/tools/data_science/dataset_drift_detector.py +64 -0
  208. devsper/tools/data_science/dataset_outlier_detector.py +65 -0
  209. devsper/tools/data_science/dataset_profile.py +76 -0
  210. devsper/tools/data_science/distributed_dataset_processor.py +54 -0
  211. devsper/tools/data_science/feature_engineering_suggestions.py +69 -0
  212. devsper/tools/data_science/feature_importance_estimator.py +82 -0
  213. devsper/tools/data_science/model_input_validator.py +59 -0
  214. devsper/tools/data_science/time_series_analyzer.py +57 -0
  215. devsper/tools/documents/__init__.py +11 -0
  216. devsper/tools/documents/_docproc.py +56 -0
  217. devsper/tools/documents/document_to_markdown.py +29 -0
  218. devsper/tools/documents/extract_document_images.py +39 -0
  219. devsper/tools/documents/extract_document_text.py +29 -0
  220. devsper/tools/documents/extract_equations.py +36 -0
  221. devsper/tools/documents/extract_tables.py +47 -0
  222. devsper/tools/documents/summarize_document.py +42 -0
  223. devsper/tools/documents/write_latex_document.py +133 -0
  224. devsper/tools/documents/write_markdown_document.py +89 -0
  225. devsper/tools/documents/write_word_document.py +149 -0
  226. devsper/tools/experiments/__init__.py +13 -0
  227. devsper/tools/experiments/bootstrap_estimator.py +54 -0
  228. devsper/tools/experiments/experiment_report_generator.py +50 -0
  229. devsper/tools/experiments/experiment_tracker.py +36 -0
  230. devsper/tools/experiments/grid_search_runner.py +50 -0
  231. devsper/tools/experiments/model_benchmark_runner.py +45 -0
  232. devsper/tools/experiments/monte_carlo_experiment.py +38 -0
  233. devsper/tools/experiments/parameter_sweep_runner.py +51 -0
  234. devsper/tools/experiments/result_comparator.py +58 -0
  235. devsper/tools/experiments/simulation_runner.py +43 -0
  236. devsper/tools/experiments/statistical_significance_test.py +56 -0
  237. devsper/tools/experiments/swarm_map_reduce.py +42 -0
  238. devsper/tools/filesystem/__init__.py +12 -0
  239. devsper/tools/filesystem/append_file.py +42 -0
  240. devsper/tools/filesystem/file_hash.py +40 -0
  241. devsper/tools/filesystem/file_line_count.py +36 -0
  242. devsper/tools/filesystem/file_metadata.py +38 -0
  243. devsper/tools/filesystem/file_preview.py +55 -0
  244. devsper/tools/filesystem/find_large_files.py +50 -0
  245. devsper/tools/filesystem/list_directory.py +39 -0
  246. devsper/tools/filesystem/read_file.py +35 -0
  247. devsper/tools/filesystem/search_files.py +60 -0
  248. devsper/tools/filesystem/write_file.py +41 -0
  249. devsper/tools/flagship/__init__.py +15 -0
  250. devsper/tools/flagship/distributed_document_analysis.py +77 -0
  251. devsper/tools/flagship/docproc_corpus_pipeline.py +91 -0
  252. devsper/tools/flagship/repository_semantic_map.py +99 -0
  253. devsper/tools/flagship/research_graph_builder.py +111 -0
  254. devsper/tools/flagship/swarm_experiment_runner.py +86 -0
  255. devsper/tools/knowledge/__init__.py +10 -0
  256. devsper/tools/knowledge/citation_graph_builder.py +69 -0
  257. devsper/tools/knowledge/concept_frequency_analyzer.py +74 -0
  258. devsper/tools/knowledge/corpus_builder.py +66 -0
  259. devsper/tools/knowledge/cross_document_entity_linker.py +71 -0
  260. devsper/tools/knowledge/document_corpus_summary.py +68 -0
  261. devsper/tools/knowledge/document_topic_extractor.py +58 -0
  262. devsper/tools/knowledge/knowledge_graph_extractor.py +58 -0
  263. devsper/tools/knowledge/timeline_extractor.py +59 -0
  264. devsper/tools/math/__init__.py +12 -0
  265. devsper/tools/math/calculate_expression.py +52 -0
  266. devsper/tools/math/correlation.py +44 -0
  267. devsper/tools/math/distribution_summary.py +39 -0
  268. devsper/tools/math/histogram.py +53 -0
  269. devsper/tools/math/linear_regression.py +47 -0
  270. devsper/tools/math/matrix_multiply.py +38 -0
  271. devsper/tools/math/mean_std.py +35 -0
  272. devsper/tools/math/monte_carlo_simulation.py +43 -0
  273. devsper/tools/math/polynomial_fit.py +40 -0
  274. devsper/tools/math/random_sample.py +36 -0
  275. devsper/tools/mcp/__init__.py +23 -0
  276. devsper/tools/mcp/adapter.py +53 -0
  277. devsper/tools/mcp/client.py +235 -0
  278. devsper/tools/mcp/discovery.py +53 -0
  279. devsper/tools/memory/__init__.py +16 -0
  280. devsper/tools/memory/delete_memory.py +25 -0
  281. devsper/tools/memory/list_memory.py +34 -0
  282. devsper/tools/memory/search_memory.py +36 -0
  283. devsper/tools/memory/store_memory.py +47 -0
  284. devsper/tools/memory/summarize_memory.py +41 -0
  285. devsper/tools/memory/tag_memory.py +47 -0
  286. devsper/tools/pipelines.py +92 -0
  287. devsper/tools/registry.py +39 -0
  288. devsper/tools/research/__init__.py +12 -0
  289. devsper/tools/research/arxiv_download.py +55 -0
  290. devsper/tools/research/arxiv_search.py +58 -0
  291. devsper/tools/research/citation_extractor.py +35 -0
  292. devsper/tools/research/duckduckgo_search.py +42 -0
  293. devsper/tools/research/paper_metadata_extractor.py +45 -0
  294. devsper/tools/research/paper_summarizer.py +41 -0
  295. devsper/tools/research/research_question_generator.py +39 -0
  296. devsper/tools/research/topic_cluster.py +46 -0
  297. devsper/tools/research/web_search.py +47 -0
  298. devsper/tools/research/wikipedia_lookup.py +50 -0
  299. devsper/tools/research_advanced/__init__.py +14 -0
  300. devsper/tools/research_advanced/citation_context_extractor.py +60 -0
  301. devsper/tools/research_advanced/literature_review_generator.py +79 -0
  302. devsper/tools/research_advanced/methodology_extractor.py +58 -0
  303. devsper/tools/research_advanced/paper_contribution_extractor.py +50 -0
  304. devsper/tools/research_advanced/paper_dataset_identifier.py +49 -0
  305. devsper/tools/research_advanced/paper_method_comparator.py +62 -0
  306. devsper/tools/research_advanced/paper_similarity_search.py +69 -0
  307. devsper/tools/research_advanced/paper_trend_analyzer.py +69 -0
  308. devsper/tools/research_advanced/parallel_document_analyzer.py +56 -0
  309. devsper/tools/research_advanced/research_gap_finder.py +71 -0
  310. devsper/tools/research_advanced/research_topic_mapper.py +69 -0
  311. devsper/tools/research_advanced/swarm_literature_review.py +58 -0
  312. devsper/tools/scoring/__init__.py +52 -0
  313. devsper/tools/scoring/report.py +44 -0
  314. devsper/tools/scoring/scorer.py +39 -0
  315. devsper/tools/scoring/selector.py +61 -0
  316. devsper/tools/scoring/store.py +267 -0
  317. devsper/tools/selector.py +130 -0
  318. devsper/tools/system/__init__.py +12 -0
  319. devsper/tools/system/cpu_usage.py +22 -0
  320. devsper/tools/system/disk_usage.py +35 -0
  321. devsper/tools/system/environment_variables.py +29 -0
  322. devsper/tools/system/memory_usage.py +23 -0
  323. devsper/tools/system/pip_install.py +44 -0
  324. devsper/tools/system/pip_search.py +29 -0
  325. devsper/tools/system/process_list.py +34 -0
  326. devsper/tools/system/python_package_list.py +40 -0
  327. devsper/tools/system/run_shell_command.py +51 -0
  328. devsper/tools/system/system_info.py +26 -0
  329. devsper/tools/tool_runner.py +122 -0
  330. devsper/tui/__init__.py +5 -0
  331. devsper/tui/activity_feed_view.py +73 -0
  332. devsper/tui/adaptive_tasks_view.py +75 -0
  333. devsper/tui/agent_role_view.py +35 -0
  334. devsper/tui/app.py +395 -0
  335. devsper/tui/dashboard_screen.py +290 -0
  336. devsper/tui/dev_view.py +99 -0
  337. devsper/tui/inject_screen.py +73 -0
  338. devsper/tui/knowledge_graph_view.py +46 -0
  339. devsper/tui/layout.py +43 -0
  340. devsper/tui/logs_view.py +83 -0
  341. devsper/tui/memory_view.py +58 -0
  342. devsper/tui/performance_view.py +33 -0
  343. devsper/tui/reasoning_graph_view.py +39 -0
  344. devsper/tui/results_view.py +139 -0
  345. devsper/tui/swarm_view.py +37 -0
  346. devsper/tui/task_detail_screen.py +55 -0
  347. devsper/tui/task_view.py +103 -0
  348. devsper/types/event.py +97 -0
  349. devsper/types/exceptions.py +21 -0
  350. devsper/types/swarm.py +41 -0
  351. devsper/types/task.py +80 -0
  352. devsper/upgrade/__init__.py +21 -0
  353. devsper/upgrade/changelog.py +124 -0
  354. devsper/upgrade/cli.py +145 -0
  355. devsper/upgrade/installer.py +103 -0
  356. devsper/upgrade/notifier.py +52 -0
  357. devsper/upgrade/version_check.py +121 -0
  358. devsper/utils/event_logger.py +88 -0
  359. devsper/utils/http.py +43 -0
  360. devsper/utils/models.py +54 -0
  361. devsper/visualization/__init__.py +5 -0
  362. devsper/visualization/dag_export.py +67 -0
  363. devsper/workflow/__init__.py +18 -0
  364. devsper/workflow/conditions.py +157 -0
  365. devsper/workflow/context.py +108 -0
  366. devsper/workflow/loader.py +156 -0
  367. devsper/workflow/resolver.py +109 -0
  368. devsper/workflow/runner.py +562 -0
  369. devsper/workflow/schema.py +63 -0
  370. devsper/workflow/validator.py +128 -0
  371. devsper-2.1.6.dist-info/METADATA +346 -0
  372. devsper-2.1.6.dist-info/RECORD +375 -0
  373. devsper-2.1.6.dist-info/WHEEL +4 -0
  374. devsper-2.1.6.dist-info/entry_points.txt +3 -0
  375. devsper-2.1.6.dist-info/licenses/LICENSE +639 -0
@@ -0,0 +1,184 @@
1
+ """
2
+ Knowledge graph: build relationships between stored memory.
3
+
4
+ Nodes: documents, concepts, datasets, methods.
5
+ Edges: mentions, cites, related_to, uses, extends, outperforms, constrains, blocks.
6
+ Uses networkx. v1.8: add_or_update_node, add_edge, save/load for extraction.
7
+ """
8
+
9
+ import json
10
+ import os
11
+ import re
12
+ from typing import Any
13
+
14
+ import networkx as nx
15
+
16
+ from devsper.memory.memory_store import MemoryStore, get_default_store
17
+ from devsper.memory.memory_types import MemoryRecord, MemoryType
18
+
19
+
20
+ NODE_DOCUMENT = "document"
21
+ NODE_CONCEPT = "concept"
22
+ NODE_DATASET = "dataset"
23
+ NODE_METHOD = "method"
24
+
25
+ EDGE_MENTIONS = "mentions"
26
+ EDGE_CITES = "cites"
27
+ EDGE_RELATED_TO = "related_to"
28
+ EDGE_USES = "uses"
29
+ EDGE_EXTENDS = "extends"
30
+ EDGE_OUTPERFORMS = "outperforms"
31
+ EDGE_CONSTRAINS = "constrains"
32
+ EDGE_BLOCKS = "blocks"
33
+
34
+
35
+ def _extract_concepts(text: str, limit: int = 15) -> list[str]:
36
+ """Heuristic: extract likely concepts (title-case phrases, known tokens)."""
37
+ concepts = set()
38
+ for m in re.finditer(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b", text):
39
+ concepts.add(m.group(1).strip())
40
+ tokens = re.findall(r"\b(diffusion|transformer|dataset|model|training|evaluation|baseline|embedding|neural)\b", text.lower())
41
+ concepts.update(tokens)
42
+ return list(concepts)[:limit]
43
+
44
+
45
+ def _extract_datasets(text: str, limit: int = 5) -> list[str]:
46
+ """Heuristic: extract dataset-like names (e.g. MNIST, ImageNet)."""
47
+ datasets = set()
48
+ for m in re.finditer(r"\b([A-Z][A-Za-z0-9\-]+(?:-\d+)?)\b", text):
49
+ w = m.group(1)
50
+ if len(w) >= 3 and w not in ("The", "This", "These", "When", "What"):
51
+ datasets.add(w)
52
+ return list(datasets)[:limit]
53
+
54
+
55
+ def _extract_methods(text: str, limit: int = 5) -> list[str]:
56
+ """Heuristic: method-like phrases (e.g. 'X method', 'Y approach')."""
57
+ methods = set()
58
+ for m in re.finditer(r"(\w+(?:\s+\w+)?)\s+(?:method|approach|algorithm|framework)\b", text, re.IGNORECASE):
59
+ methods.add(m.group(1).strip())
60
+ return list(methods)[:limit]
61
+
62
+
63
+ class KnowledgeGraph:
64
+ """
65
+ Build and query a graph over memory: nodes are documents/concepts/datasets/methods,
66
+ edges are mentions, cites, related_to.
67
+ """
68
+
69
+ def __init__(self, store: MemoryStore | None = None) -> None:
70
+ self.store = store or get_default_store()
71
+ self._graph: nx.MultiDiGraph = nx.MultiDiGraph()
72
+
73
+ def build_from_memory(self, merge: bool = False) -> nx.MultiDiGraph:
74
+ """
75
+ Build graph from all stored memory. Returns the graph.
76
+ Nodes: document:<id>, concept:<name>, dataset:<name>, method:<name>.
77
+ Edges: document --mentions--> concept/dataset/method; concept --related_to--> concept.
78
+ If merge=True, add to existing graph instead of clearing (e.g. after load()).
79
+ """
80
+ if not merge:
81
+ self._graph = nx.MultiDiGraph()
82
+ records = self.store.list_memory(limit=2000)
83
+ for r in records:
84
+ doc_id = f"document:{r.id}"
85
+ self._graph.add_node(doc_id, kind=NODE_DOCUMENT, memory_id=r.id, label=r.content[:200])
86
+ for c in _extract_concepts(r.content):
87
+ node = f"concept:{c}"
88
+ self._graph.add_node(node, kind=NODE_CONCEPT, label=c)
89
+ self._graph.add_edge(doc_id, node, type=EDGE_MENTIONS)
90
+ for d in _extract_datasets(r.content):
91
+ node = f"dataset:{d}"
92
+ self._graph.add_node(node, kind=NODE_DATASET, label=d)
93
+ self._graph.add_edge(doc_id, node, type=EDGE_MENTIONS)
94
+ for m in _extract_methods(r.content):
95
+ node = f"method:{m}"
96
+ self._graph.add_node(node, kind=NODE_METHOD, label=m)
97
+ self._graph.add_edge(doc_id, node, type=EDGE_MENTIONS)
98
+ doc_nodes = [n for n, attrs in self._graph.nodes(data=True) if attrs.get("kind") == NODE_DOCUMENT]
99
+ for doc in doc_nodes:
100
+ succs = list(self._graph.successors(doc))
101
+ concepts = [s for s in succs if s.startswith("concept:")]
102
+ for i, a in enumerate(concepts):
103
+ for b in concepts[i + 1 :]:
104
+ self._graph.add_edge(a, b, type=EDGE_RELATED_TO)
105
+ self._graph.add_edge(b, a, type=EDGE_RELATED_TO)
106
+ return self._graph
107
+
108
+ @property
109
+ def graph(self) -> nx.MultiDiGraph:
110
+ """Return the current graph (build first with build_from_memory if needed)."""
111
+ return self._graph
112
+
113
+ def get_neighbors(self, node_id: str, edge_type: str | None = None) -> list[tuple[str, str]]:
114
+ """Return list of (neighbor_id, edge_type) for outgoing edges."""
115
+ if node_id not in self._graph:
116
+ return []
117
+ out = []
118
+ for _, v, data in self._graph.out_edges(node_id, data=True):
119
+ et = data.get("type", "")
120
+ if edge_type is None or et == edge_type:
121
+ out.append((v, et))
122
+ return out
123
+
124
+ def get_documents_mentioning(self, concept_or_dataset: str) -> list[str]:
125
+ """Return memory ids of documents that mention the given concept or dataset."""
126
+ node = f"concept:{concept_or_dataset}"
127
+ if node not in self._graph:
128
+ node = f"dataset:{concept_or_dataset}"
129
+ if node not in self._graph:
130
+ return []
131
+ doc_ids = []
132
+ for pred in self._graph.predecessors(node):
133
+ if pred.startswith("document:"):
134
+ doc_ids.append(self._graph.nodes[pred].get("memory_id", pred.replace("document:", "")))
135
+ return doc_ids
136
+
137
+ def add_or_update_node(self, node_id: str, kind: str, label: str, **attrs: Any) -> None:
138
+ """v1.8: Add or update a node (e.g. from KnowledgeExtractor)."""
139
+ self._graph.add_node(node_id, kind=kind, label=label, **attrs)
140
+
141
+ def add_edge(self, from_id: str, to_id: str, edge_type: str) -> None:
142
+ """v1.8: Add a directed edge (e.g. from KnowledgeExtractor)."""
143
+ self._graph.add_node(from_id, **self._graph.nodes.get(from_id, {}))
144
+ self._graph.add_node(to_id, **self._graph.nodes.get(to_id, {}))
145
+ self._graph.add_edge(from_id, to_id, type=edge_type)
146
+
147
+ def _persist_path(self) -> str:
148
+ """Path to persisted graph JSON (data_dir/knowledge_graph.json)."""
149
+ try:
150
+ from devsper.config import get_config
151
+ base = get_config().data_dir
152
+ except Exception:
153
+ base = os.environ.get("DEVSPER_DATA_DIR", ".devsper")
154
+ os.makedirs(base, exist_ok=True)
155
+ return os.path.join(base, "knowledge_graph.json")
156
+
157
+ def save(self) -> None:
158
+ """v1.8: Persist graph to JSON (nodes and edges only; no embeddings)."""
159
+ nodes = []
160
+ for nid, data in self._graph.nodes(data=True):
161
+ nodes.append({"id": nid, **{k: v for k, v in data.items() if isinstance(v, (str, int, float, bool))}})
162
+ edges = []
163
+ for u, v, data in self._graph.edges(data=True):
164
+ edges.append({"from": u, "to": v, "type": data.get("type", "related_to")})
165
+ payload = {"nodes": nodes, "edges": edges}
166
+ with open(self._persist_path(), "w", encoding="utf-8") as f:
167
+ json.dump(payload, f, indent=0)
168
+
169
+ def load(self) -> bool:
170
+ """v1.8: Load graph from JSON if file exists; merge into _graph. Returns True if loaded."""
171
+ path = self._persist_path()
172
+ if not os.path.isfile(path):
173
+ return False
174
+ with open(path, "r", encoding="utf-8") as f:
175
+ payload = json.load(f)
176
+ for n in payload.get("nodes", []):
177
+ nid = n.pop("id", None)
178
+ if nid:
179
+ self._graph.add_node(nid, **n)
180
+ for e in payload.get("edges", []):
181
+ u, v = e.get("from"), e.get("to")
182
+ if u and v:
183
+ self._graph.add_edge(u, v, type=e.get("type", "related_to"))
184
+ return True
@@ -0,0 +1,285 @@
1
+ """
2
+ Knowledge graph query: entity search and relationship traversal.
3
+ v1.8: query_for_planning for knowledge-guided planning.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from difflib import SequenceMatcher
8
+
9
+ from devsper.knowledge.knowledge_graph import (
10
+ KnowledgeGraph,
11
+ NODE_DOCUMENT,
12
+ NODE_CONCEPT,
13
+ NODE_DATASET,
14
+ NODE_METHOD,
15
+ )
16
+
17
+
18
+ @dataclass
19
+ class QueryResult:
20
+ """Structured result: entities matching query and edges (optionally traversed)."""
21
+
22
+ entities: list[tuple[str, str]] # (node_id, label)
23
+ edges: list[tuple[str, str, str]] # (from_id, to_id, edge_type)
24
+ documents: list[str] # memory ids of documents mentioning matched entities
25
+
26
+
27
+ @dataclass
28
+ class PlanningContext:
29
+ """v1.8: Context from KG for planner injection."""
30
+
31
+ relevant_concepts: list[str]
32
+ prior_findings: list[str]
33
+ known_constraints: list[str]
34
+ related_methods: list[str]
35
+ confidence: float
36
+
37
+
38
+ def _node_matches_label(node_id: str, label: str, query_lower: str) -> bool:
39
+ """True if node label or id contains query terms."""
40
+ if not label:
41
+ return False
42
+ return query_lower in label.lower() or query_lower in node_id.lower()
43
+
44
+
45
+ def entity_search(kg: KnowledgeGraph, query_text: str) -> list[tuple[str, str]]:
46
+ """
47
+ Find nodes (concept, dataset, method) whose label matches the query.
48
+ Returns list of (node_id, label).
49
+ """
50
+ query_lower = (query_text or "").strip().lower()
51
+ if not query_lower:
52
+ return []
53
+ g = kg.graph
54
+ matches: list[tuple[str, str]] = []
55
+ for node_id, data in g.nodes(data=True):
56
+ kind = data.get("kind")
57
+ if kind in (NODE_DOCUMENT,):
58
+ continue
59
+ label = data.get("label", "") or node_id.split(":", 1)[-1] if ":" in node_id else node_id
60
+ if _node_matches_label(node_id, label, query_lower):
61
+ matches.append((node_id, label))
62
+ return matches
63
+
64
+
65
+ def traverse(
66
+ kg: KnowledgeGraph,
67
+ node_ids: list[str],
68
+ hops: int = 1,
69
+ edge_type: str | None = None,
70
+ ) -> list[tuple[str, str, str]]:
71
+ """
72
+ Traverse from given nodes up to `hops` steps. Returns list of (from_id, to_id, edge_type).
73
+ """
74
+ if hops < 1:
75
+ return []
76
+ g = kg.graph
77
+ edges: list[tuple[str, str, str]] = []
78
+ frontier = set(node_ids)
79
+ seen_edges: set[tuple[str, str]] = set()
80
+ for _ in range(hops):
81
+ next_frontier = set()
82
+ for n in frontier:
83
+ if n not in g:
84
+ continue
85
+ for _, v, data in g.out_edges(n, data=True):
86
+ et = data.get("type", "")
87
+ if edge_type is not None and et != edge_type:
88
+ continue
89
+ key = (n, v)
90
+ if key not in seen_edges:
91
+ seen_edges.add(key)
92
+ edges.append((n, v, et))
93
+ next_frontier.add(v)
94
+ frontier = next_frontier
95
+ return edges
96
+
97
+
98
+ def query(
99
+ kg: KnowledgeGraph,
100
+ query_text: str,
101
+ traverse_hops: int = 1,
102
+ ) -> QueryResult:
103
+ """
104
+ Run entity search for query_text, optionally traverse relationships (1-2 hops).
105
+ Returns QueryResult with entities, edges, and document ids.
106
+ """
107
+ entities = entity_search(kg, query_text)
108
+ node_ids = [e[0] for e in entities]
109
+ edges = traverse(kg, node_ids, hops=traverse_hops) if node_ids else []
110
+ documents: list[str] = []
111
+ for nid, _ in entities:
112
+ if nid.startswith("document:"):
113
+ continue
114
+ concept_or_dataset = nid.split(":", 1)[-1] if ":" in nid else nid
115
+ docs = kg.get_documents_mentioning(concept_or_dataset)
116
+ documents.extend(docs)
117
+ documents = list(dict.fromkeys(documents))
118
+ return QueryResult(entities=entities, edges=edges, documents=documents)
119
+
120
+
121
+ # Stopwords for task term extraction (simple heuristic)
122
+ _PLANNING_STOPWORDS = frozenset(
123
+ {
124
+ "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
125
+ "of", "with", "by", "from", "as", "is", "was", "are", "were", "be",
126
+ "been", "being", "have", "has", "had", "do", "does", "did", "will",
127
+ "would", "could", "should", "may", "might", "must", "can", "this",
128
+ "that", "these", "those", "it", "its", "into", "through", "during",
129
+ }
130
+ )
131
+
132
+
133
+ def _extract_candidate_terms(task_description: str) -> list[str]:
134
+ """Extract candidate terms: split on spaces, drop stopwords, keep capitalized or domain-like."""
135
+ if not task_description or not task_description.strip():
136
+ return []
137
+ words = task_description.strip().split()
138
+ candidates = []
139
+ for w in words:
140
+ w_clean = w.strip(".,;:!?").lower()
141
+ if not w_clean or w_clean in _PLANNING_STOPWORDS:
142
+ continue
143
+ if w[0].isupper() or any(c.isdigit() for c in w) or "_" in w:
144
+ candidates.append(w_clean)
145
+ else:
146
+ candidates.append(w_clean)
147
+ return list(dict.fromkeys(candidates))
148
+
149
+
150
+ def _fuzzy_match_label(term: str, label: str, threshold: float = 0.8) -> float:
151
+ """Return similarity ratio in [0, 1]; 0 if below threshold."""
152
+ if not label:
153
+ return 0.0
154
+ label_lower = label.lower()
155
+ term_lower = term.lower()
156
+ if term_lower in label_lower:
157
+ return min(1.0, 0.8 + 0.2 * (len(term_lower) / max(1, len(label_lower))))
158
+ r = SequenceMatcher(None, term_lower, label_lower).ratio()
159
+ return r if r >= threshold else 0.0
160
+
161
+
162
+ def query_for_planning(kg: KnowledgeGraph, task_description: str) -> PlanningContext:
163
+ """
164
+ Build planning context from KG: concepts, findings, constraints, methods.
165
+ Uses term extraction, fuzzy node match, 2-hop neighborhood, centrality + match scoring.
166
+ """
167
+ concepts: list[str] = []
168
+ findings: list[str] = []
169
+ constraints: list[str] = []
170
+ methods: list[str] = []
171
+ g = kg.graph
172
+ if g.number_of_nodes() == 0:
173
+ return PlanningContext(
174
+ relevant_concepts=[],
175
+ prior_findings=[],
176
+ known_constraints=[],
177
+ related_methods=[],
178
+ confidence=0.0,
179
+ )
180
+
181
+ terms = _extract_candidate_terms(task_description)
182
+ matched_nodes: list[tuple[str, str, float]] = [] # (node_id, label, match_score)
183
+ for node_id, data in g.nodes(data=True):
184
+ kind = data.get("kind")
185
+ if kind == NODE_DOCUMENT:
186
+ continue
187
+ label = data.get("label", "") or (node_id.split(":", 1)[-1] if ":" in node_id else node_id)
188
+ for t in terms:
189
+ score = _fuzzy_match_label(t, label, 0.8)
190
+ if score > 0:
191
+ matched_nodes.append((node_id, label, score))
192
+ break
193
+
194
+ if not matched_nodes:
195
+ return PlanningContext(
196
+ relevant_concepts=[],
197
+ prior_findings=[],
198
+ known_constraints=[],
199
+ related_methods=[],
200
+ confidence=0.0,
201
+ )
202
+
203
+ neighborhood = set(n[0] for n in matched_nodes)
204
+ for _ in range(2):
205
+ next_n = set()
206
+ for nid in neighborhood:
207
+ if nid not in g:
208
+ continue
209
+ for _, v, _ in g.out_edges(nid, data=True):
210
+ next_n.add(v)
211
+ for u, _, _ in g.in_edges(nid, data=True):
212
+ next_n.add(u)
213
+ neighborhood |= next_n
214
+
215
+ match_scores: dict[str, float] = {}
216
+ for nid, label, s in matched_nodes:
217
+ match_scores[nid] = max(match_scores.get(nid, 0), s)
218
+ try:
219
+ degree = dict(g.degree(neighborhood))
220
+ except Exception:
221
+ degree = {n: 0 for n in neighborhood}
222
+ scores: list[tuple[str, str, str, float]] = []
223
+ for nid in neighborhood:
224
+ data = g.nodes.get(nid, {})
225
+ kind = data.get("kind", "")
226
+ label = data.get("label", "") or (nid.split(":", 1)[-1] if ":" in nid else nid)
227
+ deg = degree.get(nid, 0)
228
+ recency = 1.0
229
+ ms = match_scores.get(nid, 0.5)
230
+ total = (deg * 0.3) + (recency * 0.2) + (ms * 0.5)
231
+ scores.append((nid, kind, label, total))
232
+
233
+ scores.sort(key=lambda x: -x[3])
234
+ top = scores[:30]
235
+ for nid, kind, label, _ in top:
236
+ if kind == NODE_CONCEPT and label not in concepts:
237
+ concepts.append(label)
238
+ elif kind == NODE_METHOD and label not in methods:
239
+ methods.append(label)
240
+
241
+ for u, v, data in g.out_edges(neighborhood, data=True):
242
+ if data.get("type") in ("constrains", "blocks"):
243
+ edge_desc = f"{u.split(':', 1)[-1] if ':' in u else u} -> {v.split(':', 1)[-1] if ':' in v else v}"
244
+ if edge_desc not in constraints:
245
+ constraints.append(edge_desc)
246
+
247
+ doc_nodes = [n for n in neighborhood if g.nodes.get(n, {}).get("kind") == NODE_DOCUMENT]
248
+ for d in doc_nodes[:5]:
249
+ summary = (g.nodes[d].get("label", "") or d)[:200]
250
+ if summary and summary not in findings:
251
+ findings.append(summary)
252
+
253
+ total_nodes = g.number_of_nodes()
254
+ found = len(concepts) + len(methods) + len(findings) + len(constraints)
255
+ confidence = min(1.0, (found / max(1, total_nodes)) * 2.0) if total_nodes else 0.0
256
+ confidence = max(0.0, min(1.0, confidence))
257
+
258
+ return PlanningContext(
259
+ relevant_concepts=concepts[:15],
260
+ prior_findings=findings[:5],
261
+ known_constraints=constraints[:10],
262
+ related_methods=methods[:10],
263
+ confidence=confidence,
264
+ )
265
+
266
+
267
+ def format_planning_context(ctx: PlanningContext, max_tokens: int = 300) -> str:
268
+ """Render PlanningContext as concise bullet list; only non-empty sections; prepend confidence."""
269
+ parts = []
270
+ if ctx.confidence >= 0.7:
271
+ parts.append("High confidence")
272
+ elif ctx.confidence >= 0.3:
273
+ parts.append("Partial context")
274
+ if ctx.relevant_concepts:
275
+ parts.append("Concepts: " + ", ".join(ctx.relevant_concepts[:10]))
276
+ if ctx.prior_findings:
277
+ parts.append("Prior findings: " + " | ".join(s[:80] for s in ctx.prior_findings[:5]))
278
+ if ctx.known_constraints:
279
+ parts.append("Constraints: " + "; ".join(ctx.known_constraints[:5]))
280
+ if ctx.related_methods:
281
+ parts.append("Methods: " + ", ".join(ctx.related_methods[:8]))
282
+ text = "\n".join(parts)
283
+ if len(text) > max_tokens * 4:
284
+ text = text[: max_tokens * 4] + "..."
285
+ return text
@@ -0,0 +1,35 @@
1
+ """
2
+ Swarm memory: persistent store, semantic index, and router for agent recall.
3
+
4
+ - memory_types: EpisodicMemory, SemanticMemory, ArtifactMemory, ResearchMemory
5
+ - memory_store: SQLite-backed store (store, retrieve, delete, list)
6
+ - memory_index: vector/semantic search (query_memory, top_k)
7
+ - memory_router: select relevant memories for a task
8
+ """
9
+
10
+ from devsper.memory.memory_types import (
11
+ EpisodicMemory,
12
+ SemanticMemory,
13
+ ArtifactMemory,
14
+ ResearchMemory,
15
+ MemoryRecord,
16
+ MemoryType,
17
+ )
18
+ from devsper.memory.memory_store import MemoryStore
19
+ from devsper.memory.memory_index import MemoryIndex
20
+ from devsper.memory.memory_router import MemoryRouter
21
+
22
+ __all__ = [
23
+ "EpisodicMemory",
24
+ "SemanticMemory",
25
+ "ArtifactMemory",
26
+ "ResearchMemory",
27
+ "MemoryRecord",
28
+ "MemoryType",
29
+ "MemoryStore",
30
+ "MemoryIndex",
31
+ "MemoryRouter",
32
+ ]
33
+
34
+ # Optional: import submodules for summarizer, namespaces, scoring
35
+ # from devsper.memory import summarizer, namespaces, scoring
@@ -0,0 +1,138 @@
1
+ """
2
+ v1.8: Memory consolidation — cluster similar records, summarize clusters, archive originals.
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from devsper.memory.memory_store import MemoryStore
8
+ from devsper.memory.memory_index import MemoryIndex
9
+ from devsper.memory.memory_types import MemoryRecord, MemoryType
10
+ from devsper.memory.memory_store import generate_memory_id
11
+ from devsper.utils.models import generate
12
+
13
+
14
+ @dataclass
15
+ class ConsolidationReport:
16
+ clusters_found: int
17
+ clusters_consolidated: int
18
+ records_archived: int
19
+ records_created: int
20
+ tokens_saved_estimate: int
21
+
22
+
23
+ def _cosine_sim(a: list[float], b: list[float]) -> float:
24
+ if not a or not b or len(a) != len(b):
25
+ return 0.0
26
+ dot = sum(x * y for x, y in zip(a, b))
27
+ na = sum(x * x for x in a) ** 0.5
28
+ nb = sum(x * x for x in b) ** 0.5
29
+ if na == 0 or nb == 0:
30
+ return 0.0
31
+ return dot / (na * nb)
32
+
33
+
34
+ class MemoryConsolidator:
35
+ """
36
+ Clusters similar memory records, summarizes each cluster into one
37
+ high-quality record, archives originals. Keeps agent context tight
38
+ for long-running projects.
39
+ """
40
+
41
+ def __init__(self, min_cluster_size: int = 3):
42
+ self.min_cluster_size = min_cluster_size
43
+
44
+ async def _summarize_cluster(self, records: list[MemoryRecord], model: str) -> str:
45
+ """Synthesize N related memory records into one comprehensive record. Max 300 words."""
46
+ blocks = "\n\n".join((r.content or "")[:500] for r in records[:20])
47
+ prompt = f"""Synthesize these {len(records)} related memory records into one comprehensive, information-dense record. Preserve all unique facts. Max 300 words.
48
+
49
+ Records:
50
+ {blocks}"""
51
+ out = generate(model, prompt)
52
+ return (out or "").strip()[:2000]
53
+
54
+ async def consolidate(
55
+ self,
56
+ memory_store: MemoryStore,
57
+ memory_index: MemoryIndex,
58
+ worker_model: str,
59
+ dry_run: bool = False,
60
+ ) -> ConsolidationReport:
61
+ """
62
+ 1. Load all memory records (include archived for clustering? No - only non-archived)
63
+ 2. Cluster by embedding similarity (AgglomerativeClustering, distance_threshold=0.25)
64
+ 3. For clusters with >= min_cluster_size records: generate summary
65
+ 4. Store summary as new MemoryRecord (type=semantic, tagged "consolidated")
66
+ 5. Archive originals: set archived=True
67
+ 6. Return report
68
+ """
69
+ try:
70
+ from sklearn.cluster import AgglomerativeClustering
71
+ import numpy as np
72
+ except ImportError:
73
+ raise ImportError(
74
+ "Memory consolidation requires scikit-learn. Install with: pip install devsper[data]"
75
+ ) from None
76
+
77
+ records = memory_store.list_memory(limit=5000, include_archived=False)
78
+ with_emb = [r for r in records if r.embedding is not None]
79
+ if len(with_emb) < self.min_cluster_size:
80
+ return ConsolidationReport(
81
+ clusters_found=0,
82
+ clusters_consolidated=0,
83
+ records_archived=0,
84
+ records_created=0,
85
+ tokens_saved_estimate=0,
86
+ )
87
+
88
+ X = np.array(with_emb[0].embedding)
89
+ for r in with_emb[1:]:
90
+ X = np.vstack([X, r.embedding])
91
+ clustering = AgglomerativeClustering(
92
+ n_clusters=None,
93
+ distance_threshold=0.25,
94
+ metric="cosine",
95
+ linkage="average",
96
+ )
97
+ labels = clustering.fit_predict(X)
98
+ unique_labels = set(labels)
99
+ clusters_found = len(unique_labels)
100
+ clusters_consolidated = 0
101
+ records_archived = 0
102
+ records_created = 0
103
+ avg_tokens = 100
104
+
105
+ for lab in unique_labels:
106
+ indices = [i for i, l in enumerate(labels) if l == lab]
107
+ cluster_records = [with_emb[i] for i in indices]
108
+ if len(cluster_records) < self.min_cluster_size:
109
+ continue
110
+ clusters_consolidated += 1
111
+ if dry_run:
112
+ records_archived += len(cluster_records)
113
+ records_created += 1
114
+ continue
115
+ summary_text = await self._summarize_cluster(cluster_records, worker_model)
116
+ summary_record = MemoryRecord(
117
+ id=generate_memory_id(),
118
+ memory_type=MemoryType.SEMANTIC,
119
+ content=summary_text,
120
+ tags=["consolidated"],
121
+ run_id="",
122
+ archived=False,
123
+ )
124
+ summary_record = memory_index.ensure_embedding(summary_record)
125
+ memory_store.store(summary_record)
126
+ records_created += 1
127
+ for r in cluster_records:
128
+ memory_store.set_archived(r.id, True)
129
+ records_archived += 1
130
+
131
+ tokens_saved_estimate = records_archived * avg_tokens
132
+ return ConsolidationReport(
133
+ clusters_found=clusters_found,
134
+ clusters_consolidated=clusters_consolidated,
135
+ records_archived=records_archived,
136
+ records_created=records_created,
137
+ tokens_saved_estimate=tokens_saved_estimate,
138
+ )