superlocalmemory 3.4.18 → 3.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +42 -34
  3. package/bin/slm +11 -0
  4. package/bin/slm.bat +12 -0
  5. package/package.json +4 -3
  6. package/pyproject.toml +3 -2
  7. package/scripts/build-slm-hook.ps1 +40 -0
  8. package/scripts/build-slm-hook.sh +45 -0
  9. package/scripts/build_entry.py +452 -0
  10. package/scripts/ci/stage5b_gate.sh +50 -0
  11. package/scripts/postinstall/validation.js +187 -0
  12. package/scripts/postinstall-interactive.js +756 -0
  13. package/scripts/postinstall_binary.js +287 -0
  14. package/scripts/release_manifest.py +273 -0
  15. package/scripts/slm-hook.spec +56 -0
  16. package/skills/slm-build-graph/SKILL.md +423 -0
  17. package/skills/slm-list-recent/SKILL.md +348 -0
  18. package/skills/slm-recall/SKILL.md +343 -0
  19. package/skills/slm-remember/SKILL.md +194 -0
  20. package/skills/slm-show-patterns/SKILL.md +224 -0
  21. package/skills/slm-status/SKILL.md +363 -0
  22. package/skills/slm-switch-profile/SKILL.md +442 -0
  23. package/src/superlocalmemory/cli/commands.py +219 -79
  24. package/src/superlocalmemory/cli/context_commands.py +192 -0
  25. package/src/superlocalmemory/cli/daemon.py +15 -1
  26. package/src/superlocalmemory/cli/db_migrate.py +80 -0
  27. package/src/superlocalmemory/cli/escape_hatch.py +220 -0
  28. package/src/superlocalmemory/cli/main.py +72 -1
  29. package/src/superlocalmemory/core/context_cache.py +397 -0
  30. package/src/superlocalmemory/core/embeddings.py +8 -2
  31. package/src/superlocalmemory/core/engine.py +38 -2
  32. package/src/superlocalmemory/core/engine_wiring.py +1 -1
  33. package/src/superlocalmemory/core/ram_lock.py +111 -0
  34. package/src/superlocalmemory/core/recall_pipeline.py +433 -3
  35. package/src/superlocalmemory/core/recall_worker.py +8 -3
  36. package/src/superlocalmemory/core/security_primitives.py +635 -0
  37. package/src/superlocalmemory/core/shadow_router.py +319 -0
  38. package/src/superlocalmemory/core/slm_disabled.py +87 -0
  39. package/src/superlocalmemory/core/slmignore.py +125 -0
  40. package/src/superlocalmemory/core/topic_signature.py +143 -0
  41. package/src/superlocalmemory/core/worker_pool.py +14 -3
  42. package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
  43. package/src/superlocalmemory/evolution/budget.py +321 -0
  44. package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
  45. package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
  46. package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
  47. package/src/superlocalmemory/hooks/adapter_base.py +317 -0
  48. package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
  49. package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
  50. package/src/superlocalmemory/hooks/context_payload.py +312 -0
  51. package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
  52. package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
  53. package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
  54. package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
  55. package/src/superlocalmemory/hooks/ide_connector.py +25 -2
  56. package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
  57. package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
  58. package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
  59. package/src/superlocalmemory/hooks/session_registry.py +186 -0
  60. package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
  61. package/src/superlocalmemory/hooks/sync_loop.py +114 -0
  62. package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
  63. package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
  64. package/src/superlocalmemory/infra/backup.py +3 -3
  65. package/src/superlocalmemory/infra/cloud_backup.py +2 -2
  66. package/src/superlocalmemory/infra/event_bus.py +2 -2
  67. package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
  68. package/src/superlocalmemory/learning/arm_catalog.py +99 -0
  69. package/src/superlocalmemory/learning/bandit.py +526 -0
  70. package/src/superlocalmemory/learning/bandit_cache.py +133 -0
  71. package/src/superlocalmemory/learning/behavioral.py +53 -1
  72. package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
  73. package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
  74. package/src/superlocalmemory/learning/database.py +256 -0
  75. package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
  76. package/src/superlocalmemory/learning/ensemble.py +300 -0
  77. package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
  78. package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
  79. package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
  80. package/src/superlocalmemory/learning/labeler.py +87 -0
  81. package/src/superlocalmemory/learning/legacy_migration.py +277 -0
  82. package/src/superlocalmemory/learning/memory_merge.py +160 -0
  83. package/src/superlocalmemory/learning/model_cache.py +269 -0
  84. package/src/superlocalmemory/learning/model_rollback.py +278 -0
  85. package/src/superlocalmemory/learning/outcome_queue.py +284 -0
  86. package/src/superlocalmemory/learning/pattern_miner.py +415 -0
  87. package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
  88. package/src/superlocalmemory/learning/ranker.py +225 -81
  89. package/src/superlocalmemory/learning/ranker_common.py +163 -0
  90. package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
  91. package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
  92. package/src/superlocalmemory/learning/reward.py +777 -0
  93. package/src/superlocalmemory/learning/reward_archive.py +210 -0
  94. package/src/superlocalmemory/learning/reward_boost.py +201 -0
  95. package/src/superlocalmemory/learning/reward_proxy.py +326 -0
  96. package/src/superlocalmemory/learning/shadow_test.py +524 -0
  97. package/src/superlocalmemory/learning/signal_worker.py +270 -0
  98. package/src/superlocalmemory/learning/signals.py +314 -0
  99. package/src/superlocalmemory/learning/trigram_index.py +547 -0
  100. package/src/superlocalmemory/mcp/server.py +5 -5
  101. package/src/superlocalmemory/mcp/tools_context.py +183 -0
  102. package/src/superlocalmemory/mcp/tools_core.py +92 -27
  103. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
  104. package/src/superlocalmemory/retrieval/engine.py +52 -0
  105. package/src/superlocalmemory/retrieval/reranker.py +4 -2
  106. package/src/superlocalmemory/server/api.py +2 -2
  107. package/src/superlocalmemory/server/bandit_loops.py +140 -0
  108. package/src/superlocalmemory/server/middleware/__init__.py +11 -0
  109. package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
  110. package/src/superlocalmemory/server/routes/backup.py +36 -13
  111. package/src/superlocalmemory/server/routes/behavioral.py +50 -19
  112. package/src/superlocalmemory/server/routes/brain.py +1234 -0
  113. package/src/superlocalmemory/server/routes/data_io.py +4 -4
  114. package/src/superlocalmemory/server/routes/events.py +2 -2
  115. package/src/superlocalmemory/server/routes/helpers.py +1 -1
  116. package/src/superlocalmemory/server/routes/learning.py +192 -7
  117. package/src/superlocalmemory/server/routes/memories.py +189 -1
  118. package/src/superlocalmemory/server/routes/prewarm.py +171 -0
  119. package/src/superlocalmemory/server/routes/profiles.py +3 -3
  120. package/src/superlocalmemory/server/routes/token.py +88 -0
  121. package/src/superlocalmemory/server/routes/ws.py +5 -5
  122. package/src/superlocalmemory/server/security_middleware.py +13 -7
  123. package/src/superlocalmemory/server/ui.py +2 -2
  124. package/src/superlocalmemory/server/unified_daemon.py +335 -3
  125. package/src/superlocalmemory/storage/migration_runner.py +545 -0
  126. package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
  127. package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
  128. package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
  129. package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
  130. package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
  131. package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
  132. package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
  133. package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
  134. package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
  135. package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
  136. package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
  137. package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
  138. package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
  139. package/src/superlocalmemory/storage/models.py +4 -0
  140. package/src/superlocalmemory/ui/css/brain.css +409 -0
  141. package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
  142. package/src/superlocalmemory/ui/index.html +459 -1345
  143. package/src/superlocalmemory/ui/js/brain.js +1321 -0
  144. package/src/superlocalmemory/ui/js/clusters.js +123 -4
  145. package/src/superlocalmemory/ui/js/init.js +48 -39
  146. package/src/superlocalmemory/ui/js/memories.js +88 -2
  147. package/src/superlocalmemory/ui/js/modal.js +71 -1
  148. package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
  149. package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
  150. package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
  151. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
  152. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
  153. package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
  154. package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
  155. package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
  156. package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
  157. package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
  158. package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
  159. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
  160. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
  161. package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
  162. package/src/superlocalmemory/ui/js/behavioral.js +0 -447
  163. package/src/superlocalmemory/ui/js/graph-core.js +0 -447
  164. package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
  165. package/src/superlocalmemory/ui/js/learning.js +0 -435
  166. package/src/superlocalmemory/ui/js/patterns.js +0 -93
  167. package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
  168. package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
  169. package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
  170. package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
  171. package/src/superlocalmemory.egg-info/requires.txt +0 -58
  172. package/src/superlocalmemory.egg-info/top_level.txt +0 -1
@@ -2,15 +2,24 @@
2
2
  # Licensed under AGPL-3.0-or-later - see LICENSE file
3
3
  # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
4
 
5
- """Sleep-Time Consolidation Worker — background memory maintenance.
5
+ """Sleep-Time Consolidation Worker — **back-compat shim**.
6
6
 
7
- Runs periodically (every 6 hours or on-demand) to:
8
- 1. Decay confidence on unused facts (floor 0.1)
9
- 2. Deduplicate near-identical facts
10
- 3. Auto-retrain the adaptive ranker when signal threshold is met
11
- 4. Report consolidation stats
7
+ As of v3.4.21 (F4.A Stage-8 H-01 fix), the 1344-LOC god-module was
8
+ split into five cohesive files:
12
9
 
13
- Inspired by: Letta's sleep-time compute, neuroscience memory consolidation.
10
+ - ``consolidation_cycle.py`` — :class:`ConsolidationWorker`.
11
+ - ``pattern_miner.py`` — :func:`generate_patterns`.
12
+ - ``ranker_retrain_legacy.py`` — deprecated cold-start path.
13
+ - ``ranker_retrain_online.py`` — LLD-10 candidate seams.
14
+ - ``ranker_common.py`` — training-matrix + NDCG helpers.
15
+
16
+ This shim exists so that the 3830 live tests + dashboard + MCP tools +
17
+ managed server routes keep importing from
18
+ ``superlocalmemory.learning.consolidation_worker`` with zero churn.
19
+
20
+ Tests that ``monkeypatch.setattr(cw_mod, "_train_booster", fake)`` work
21
+ unchanged because :func:`_run_shadow_cycle` is defined here and resolves
22
+ its helper names through *this* module's globals.
14
23
 
15
24
  Part of Qualixar | Author: Varun Pratap Bhardwaj
16
25
  """
@@ -19,521 +28,180 @@ from __future__ import annotations
19
28
 
20
29
  import logging
21
30
  import sqlite3
22
- from datetime import datetime, timezone
23
- from pathlib import Path
24
31
 
25
32
  logger = logging.getLogger(__name__)
26
33
 
27
34
 
28
- class ConsolidationWorker:
29
- """Background memory maintenance worker.
30
-
31
- Call `run()` periodically or via dashboard button.
32
- All operations are safe — they improve quality without losing data.
35
+ # ---------------------------------------------------------------------------
36
+ # Re-exports anything tests or production code reached for on the old
37
+ # module stays reachable here under the same name.
38
+ # ---------------------------------------------------------------------------
39
+
40
+ from superlocalmemory.learning.consolidation_cycle import ( # noqa: E402
41
+ ConsolidationWorker,
42
+ )
43
+ from superlocalmemory.learning.ranker_common import ( # noqa: E402
44
+ _build_training_matrix,
45
+ _compute_eval_metrics,
46
+ _shadow_test_improved,
47
+ )
48
+ from superlocalmemory.learning.ranker_retrain_legacy import ( # noqa: E402
49
+ _retrain_ranker_impl,
50
+ _LEGACY_RETRAIN_DEPRECATED,
51
+ )
52
+ from superlocalmemory.learning.ranker_retrain_online import ( # noqa: E402
53
+ RETRAIN_HOURS_THRESHOLD,
54
+ RETRAIN_HYPERPARAM_CAPS,
55
+ RETRAIN_MODEL_SIZE_BYTES_CAP,
56
+ RETRAIN_NEW_OUTCOMES_THRESHOLD,
57
+ RETRAIN_WALL_TIME_BUDGET_SEC,
58
+ RetrainWallTimeExceeded,
59
+ _check_rollback,
60
+ _feature_names,
61
+ _fetch_training_rows,
62
+ _measure_serialized_size,
63
+ _persist_candidate,
64
+ _promote_candidate,
65
+ _train_booster,
66
+ )
67
+
68
+
69
+ __all__ = (
70
+ "ConsolidationWorker",
71
+ "RetrainWallTimeExceeded",
72
+ "RETRAIN_HYPERPARAM_CAPS",
73
+ "RETRAIN_WALL_TIME_BUDGET_SEC",
74
+ "RETRAIN_MODEL_SIZE_BYTES_CAP",
75
+ "RETRAIN_NEW_OUTCOMES_THRESHOLD",
76
+ "RETRAIN_HOURS_THRESHOLD",
77
+ "_retrain_ranker_impl",
78
+ "_LEGACY_RETRAIN_DEPRECATED",
79
+ "_build_training_matrix",
80
+ "_compute_eval_metrics",
81
+ "_shadow_test_improved",
82
+ "_feature_names",
83
+ "_fetch_training_rows",
84
+ "_measure_serialized_size",
85
+ "_train_booster",
86
+ "_persist_candidate",
87
+ "_promote_candidate",
88
+ "_check_rollback",
89
+ "_run_shadow_cycle",
90
+ )
91
+
92
+
93
+ # ---------------------------------------------------------------------------
94
+ # Orchestrator — defined HERE so tests patching ``cw_mod._train_booster``
95
+ # actually intercept the helper call.
96
+ # ---------------------------------------------------------------------------
97
+
98
+
99
+ def _run_shadow_cycle(
100
+ *,
101
+ memory_db_path: str,
102
+ learning_db_path: str,
103
+ profile_id: str,
104
+ ) -> dict:
105
+ """Top-level online retrain cycle — runs inside the consolidation
106
+ worker.
107
+
108
+ Orchestrates: fetch rows → train → size-check → persist candidate
109
+ (NOT auto-promote). Promotion happens separately once the live
110
+ shadow-router accumulates enough observations (see
111
+ :mod:`superlocalmemory.core.shadow_router`).
112
+
113
+ Helper functions are looked up via this module's namespace so test
114
+ monkey-patches on ``consolidation_worker`` take effect without any
115
+ test churn.
116
+
117
+ Returns a dict with keys:
118
+ * ``aborted``: reason string if aborted (``'insufficient_data'``,
119
+ ``'model_too_large'``, ``'wall_time_exceeded'``, ``'train_error'``).
120
+ * ``candidate_persisted``: True if a candidate row was written.
121
+ * ``promoted``: False (always — promotion is a separate step).
122
+ * ``metrics``: training metrics dict on success.
33
123
  """
34
-
35
- def __init__(self, memory_db: str | Path, learning_db: str | Path) -> None:
36
- self._memory_db = str(memory_db)
37
- self._learning_db = str(learning_db)
38
-
39
- def run(self, profile_id: str, dry_run: bool = False) -> dict:
40
- """Run full consolidation cycle. Returns stats."""
41
- stats = {
42
- "decayed": 0,
43
- "deduped": 0,
44
- "retrained": False,
45
- "signal_count": 0,
46
- "ranker_phase": 1,
47
- "timestamp": datetime.now(timezone.utc).isoformat(),
48
- }
49
-
50
- # 1. Confidence decay on unused facts
51
- try:
52
- from superlocalmemory.learning.signals import LearningSignals
53
- decayed = LearningSignals.decay_confidence(
54
- self._memory_db, profile_id, rate=0.001,
55
- )
56
- stats["decayed"] = decayed
57
- if not dry_run:
58
- logger.info("Confidence decay: %d facts affected", decayed)
59
- except Exception as exc:
60
- logger.debug("Decay failed: %s", exc)
61
-
62
- # 2. Deduplication (mark near-identical facts)
63
- try:
64
- deduped = self._deduplicate(profile_id, dry_run)
65
- stats["deduped"] = deduped
66
- except Exception as exc:
67
- logger.debug("Dedup failed: %s", exc)
68
-
69
- # 3. Generate behavioral patterns from memories
70
- try:
71
- patterns = self._generate_patterns(profile_id, dry_run)
72
- stats["patterns_generated"] = patterns
73
- except Exception as exc:
74
- logger.debug("Pattern generation failed: %s", exc)
75
-
76
- # 4. Recompute graph intelligence (v3.4.2: wired into learning pipeline)
77
- try:
78
- from superlocalmemory.core.graph_analyzer import GraphAnalyzer
79
- conn_ga = sqlite3.connect(self._memory_db, timeout=10)
80
- conn_ga.execute("PRAGMA busy_timeout=5000")
81
- conn_ga.row_factory = sqlite3.Row
82
-
83
- class _DBProxy:
84
- """Minimal DB proxy for GraphAnalyzer compatibility."""
85
- def __init__(self, connection: sqlite3.Connection) -> None:
86
- self._conn = connection
87
- def execute(self, sql: str, params: tuple = ()) -> list:
88
- cursor = self._conn.execute(sql, params)
89
- if sql.strip().upper().startswith(("INSERT", "UPDATE", "DELETE", "ALTER", "CREATE")):
90
- self._conn.commit()
91
- return []
92
- return cursor.fetchall()
93
-
94
- ga = GraphAnalyzer(_DBProxy(conn_ga))
95
- if not dry_run:
96
- ga_result = ga.compute_and_store(profile_id)
97
- stats["graph_nodes"] = ga_result.get("node_count", 0)
98
- stats["graph_communities"] = ga_result.get("community_count", 0)
99
- logger.info(
100
- "Graph analysis: %d nodes, %d communities",
101
- stats["graph_nodes"], stats["graph_communities"],
102
- )
103
- conn_ga.close()
104
- except Exception as exc:
105
- logger.debug("Graph analysis failed: %s", exc)
106
-
107
- # 5. Check if ranker should retrain
108
- try:
109
- from superlocalmemory.learning.feedback import FeedbackCollector
110
- collector = FeedbackCollector(Path(self._learning_db))
111
- signal_count = collector.get_feedback_count(profile_id)
112
- stats["signal_count"] = signal_count
113
- stats["ranker_phase"] = 1 if signal_count < 50 else (2 if signal_count < 200 else 3)
114
-
115
- # Auto-retrain at threshold crossings
116
- if signal_count >= 200 and not dry_run:
117
- retrained = self._retrain_ranker(profile_id, signal_count)
118
- stats["retrained"] = retrained
119
- except Exception as exc:
120
- logger.debug("Retrain check failed: %s", exc)
121
-
122
- # 6. Entity compilation (v3.4.3: compiled truth per entity)
123
- if not dry_run:
124
- try:
125
- from superlocalmemory.learning.entity_compiler import EntityCompiler
126
- from superlocalmemory.core.config import SLMConfig
127
- config = SLMConfig.load()
128
- compiler = EntityCompiler(self._memory_db, config)
129
- ec_result = compiler.compile_all(profile_id)
130
- stats["entities_compiled"] = ec_result.get("compiled", 0)
131
- if ec_result["compiled"] > 0:
132
- logger.info("Entity compilation: %d entities compiled",
133
- ec_result["compiled"])
134
- except Exception as exc:
135
- logger.debug("Entity compilation failed: %s", exc)
136
-
137
- return stats
138
-
139
- def _deduplicate(self, profile_id: str, dry_run: bool) -> int:
140
- """Find and mark near-duplicate facts.
141
-
142
- Uses content similarity (exact prefix match for now).
143
- Does NOT delete — marks with lower confidence.
144
- """
145
- try:
146
- conn = sqlite3.connect(self._memory_db, timeout=10)
147
- conn.execute("PRAGMA busy_timeout=5000")
148
- conn.row_factory = sqlite3.Row
149
-
150
- rows = conn.execute(
151
- "SELECT fact_id, content FROM atomic_facts "
152
- "WHERE profile_id = ? ORDER BY created_at",
153
- (profile_id,),
154
- ).fetchall()
155
-
156
- seen_prefixes: dict[str, str] = {}
157
- duplicates = []
158
-
159
- for r in rows:
160
- d = dict(r)
161
- prefix = d["content"][:100].strip().lower()
162
- if prefix in seen_prefixes:
163
- duplicates.append(d["fact_id"])
164
- else:
165
- seen_prefixes[prefix] = d["fact_id"]
166
-
167
- if duplicates and not dry_run:
168
- for fid in duplicates:
169
- conn.execute(
170
- "UPDATE atomic_facts SET confidence = MAX(0.1, confidence * 0.5) "
171
- "WHERE fact_id = ?",
172
- (fid,),
173
- )
174
- conn.commit()
175
-
176
- conn.close()
177
- return len(duplicates)
178
- except Exception:
179
- return 0
180
-
181
- def _generate_patterns(self, profile_id: str, dry_run: bool) -> int:
182
- """Mine behavioral patterns from ALL memory sources.
183
-
184
- v3.4.1: Expanded from 3 to 7 pattern types. No 500-fact cap.
185
- Analyzes: facts, signals, co-retrieval edges, channel credits,
186
- entities, sessions, graph communities.
187
- """
188
- try:
189
- from superlocalmemory.learning.behavioral import BehavioralPatternStore
190
- import re
191
- from collections import Counter, defaultdict
192
-
193
- conn = sqlite3.connect(self._memory_db, timeout=10)
194
- conn.execute("PRAGMA busy_timeout=5000")
195
- conn.row_factory = sqlite3.Row
196
-
197
- # v3.4.1: No cap — analyze ALL facts
198
- facts = conn.execute(
199
- "SELECT fact_id, content, fact_type, created_at, session_id, "
200
- "confidence, canonical_entities_json "
201
- "FROM atomic_facts "
202
- "WHERE profile_id = ? AND lifecycle = 'active' "
203
- "ORDER BY created_at DESC",
204
- (profile_id,),
205
- ).fetchall()
206
-
207
- if len(facts) < 5:
208
- conn.close()
209
- return 0
210
-
211
- store = BehavioralPatternStore(self._learning_db)
212
- generated = 0
213
-
214
- # ── 1. Tech Preferences (expanded keyword list) ───────────
215
- tech_keywords = {
216
- "python": "Python", "javascript": "JavaScript",
217
- "typescript": "TypeScript", "react": "React",
218
- "vue": "Vue", "angular": "Angular",
219
- "postgresql": "PostgreSQL", "mysql": "MySQL",
220
- "sqlite": "SQLite", "docker": "Docker",
221
- "kubernetes": "Kubernetes", "aws": "AWS",
222
- "azure": "Azure", "gcp": "GCP",
223
- "node": "Node.js", "fastapi": "FastAPI",
224
- "django": "Django", "flask": "Flask",
225
- "rust": "Rust", "go": "Go", "java": "Java",
226
- "git": "Git", "npm": "npm", "pip": "pip",
227
- "langchain": "LangChain", "ollama": "Ollama",
228
- "pytorch": "PyTorch", "claude": "Claude",
229
- "openai": "OpenAI", "anthropic": "Anthropic",
230
- "redis": "Redis", "mongodb": "MongoDB",
231
- "graphql": "GraphQL", "nextjs": "Next.js",
232
- "terraform": "Terraform", "nginx": "Nginx",
233
- "linux": "Linux", "macos": "macOS",
234
- "vscode": "VS Code", "neovim": "Neovim",
235
- }
236
-
237
- tech_counts: Counter = Counter()
238
- for f in facts:
239
- content = dict(f)["content"].lower()
240
- for keyword, label in tech_keywords.items():
241
- if keyword in content:
242
- tech_counts[label] += 1
243
-
244
- for tech, count in tech_counts.most_common(20):
245
- if count >= 2 and not dry_run:
246
- confidence = min(1.0, count / max(len(facts) * 0.1, 10))
247
- store.record_pattern(
248
- profile_id=profile_id,
249
- pattern_type="tech_preference",
250
- data={"topic": tech, "pattern_key": tech,
251
- "value": tech, "key": "tech",
252
- "evidence": count},
253
- success_rate=confidence,
254
- confidence=confidence,
255
- )
256
- generated += 1
257
-
258
- # ── 2. Topic Interests (word frequency) ───────────────────
259
- stopwords = frozenset({
260
- "the", "is", "a", "an", "in", "on", "at", "to", "for",
261
- "of", "and", "or", "not", "with", "that", "this", "was",
262
- "are", "be", "has", "had", "have", "from", "by", "it",
263
- "its", "as", "but", "were", "been", "being", "would",
264
- "could", "should", "will", "may", "might", "can", "do",
265
- "does", "did", "about", "into", "over", "after", "before",
266
- "then", "than", "also", "just", "like", "more", "some",
267
- "only", "other", "such", "each", "every", "both", "most",
268
- })
269
- word_counts: Counter = Counter()
270
- for f in facts:
271
- words = re.findall(r'\b[a-zA-Z]{4,}\b', dict(f)["content"].lower())
272
- for w in words:
273
- if w not in stopwords:
274
- word_counts[w] += 1
275
-
276
- for topic, count in word_counts.most_common(15):
277
- if count >= 3 and not dry_run:
278
- confidence = min(1.0, count / max(len(facts) * 0.05, 15))
279
- store.record_pattern(
280
- profile_id=profile_id,
281
- pattern_type="interest",
282
- data={"topic": topic, "pattern_key": topic,
283
- "count": count, "evidence": count},
284
- success_rate=confidence,
285
- confidence=confidence,
286
- )
287
- generated += 1
288
-
289
- # ── 3. Temporal Activity Patterns ─────────────────────────
290
- hour_counts: Counter = Counter()
291
- for f in facts:
292
- created = dict(f).get("created_at", "")
293
- try:
294
- if "T" in created:
295
- hour = int(created.split("T")[1][:2])
296
- elif " " in created:
297
- hour = int(created.split(" ")[1][:2])
298
- else:
299
- continue
300
- period = ("morning" if 6 <= hour < 12 else
301
- "afternoon" if 12 <= hour < 18 else
302
- "evening" if 18 <= hour < 22 else "night")
303
- hour_counts[period] += 1
304
- except (ValueError, IndexError):
305
- pass
306
-
307
- total_hours = sum(hour_counts.values())
308
- for period, count in hour_counts.most_common():
309
- if count >= 2 and total_hours > 0 and not dry_run:
310
- pct = round(count / total_hours * 100)
311
- store.record_pattern(
312
- profile_id=profile_id,
313
- pattern_type="temporal",
314
- data={"topic": period, "pattern_key": period,
315
- "value": f"{period} ({pct}%)",
316
- "evidence": count, "key": period,
317
- "distribution": dict(hour_counts)},
318
- success_rate=pct / 100,
319
- confidence=min(1.0, count / max(total_hours * 0.1, 5)),
320
- )
321
- generated += 1
322
-
323
- # ── 4. Entity Preferences (v3.4.1 NEW) ───────────────────
324
- import json as _json
325
- entity_counts: Counter = Counter()
326
- for f in facts:
327
- raw = dict(f).get("canonical_entities_json", "")
328
- if raw:
329
- try:
330
- for ent in _json.loads(raw):
331
- entity_counts[ent] += 1
332
- except (ValueError, TypeError):
333
- pass
334
-
335
- # v3.4.7: Resolve entity IDs to readable canonical names
336
- entity_names: dict = {}
337
- try:
338
- eid_list = list(entity_counts.keys())
339
- if eid_list:
340
- placeholders = ",".join("?" * len(eid_list))
341
- name_rows = conn.execute(
342
- f"SELECT entity_id, canonical_name FROM canonical_entities "
343
- f"WHERE entity_id IN ({placeholders})",
344
- eid_list,
345
- ).fetchall()
346
- entity_names = {dict(r)["entity_id"]: dict(r)["canonical_name"] for r in name_rows}
347
- except Exception:
348
- pass
349
-
350
- for entity, count in entity_counts.most_common(15):
351
- if count >= 3 and not dry_run:
352
- readable = entity_names.get(entity, entity)
353
- confidence = min(1.0, count / max(len(facts) * 0.05, 10))
354
- store.record_pattern(
355
- profile_id=profile_id,
356
- pattern_type="entity_preferences",
357
- data={"topic": readable, "pattern_key": f"entity:{readable}",
358
- "value": readable, "evidence": count,
359
- "source": "entity_frequency"},
360
- success_rate=confidence,
361
- confidence=confidence,
362
- )
363
- generated += 1
364
-
365
- # ── 5. Session Activity Patterns (v3.4.1 NEW) ────────────
366
- session_counts: Counter = Counter()
367
- for f in facts:
368
- sid = dict(f).get("session_id", "")
369
- if sid:
370
- session_counts[sid] += 1
371
-
372
- if session_counts:
373
- avg_facts_per_session = sum(session_counts.values()) / len(session_counts)
374
- heavy_sessions = [s for s, c in session_counts.items() if c > avg_facts_per_session * 2]
375
- if heavy_sessions and not dry_run:
376
- store.record_pattern(
377
- profile_id=profile_id,
378
- pattern_type="session_activity",
379
- data={"pattern_key": "heavy_session_usage",
380
- "value": f"{len(heavy_sessions)} intensive sessions",
381
- "evidence": len(heavy_sessions),
382
- "avg_facts": round(avg_facts_per_session, 1),
383
- "total_sessions": len(session_counts)},
384
- success_rate=0.8,
385
- confidence=min(1.0, len(heavy_sessions) / 5),
386
- )
387
- generated += 1
388
-
389
- # ── 6. Fact Type Distribution (v3.4.1 NEW) ────────────────
390
- type_counts: Counter = Counter()
391
- for f in facts:
392
- ft = dict(f).get("fact_type", "semantic")
393
- type_counts[ft] += 1
394
-
395
- total_ft = sum(type_counts.values())
396
- if total_ft > 0 and not dry_run:
397
- dominant_type = type_counts.most_common(1)[0]
398
- pct = round(dominant_type[1] / total_ft * 100)
399
- store.record_pattern(
400
- profile_id=profile_id,
401
- pattern_type="fact_type_distribution",
402
- data={"pattern_key": "memory_style",
403
- "value": f"{dominant_type[0]} dominant ({pct}%)",
404
- "evidence": dominant_type[1],
405
- "distribution": dict(type_counts)},
406
- success_rate=pct / 100,
407
- confidence=min(1.0, dominant_type[1] / 20),
408
- )
409
- generated += 1
410
-
411
- # ── 7. Channel Performance (v3.4.1 NEW — from signals) ────
412
- try:
413
- learn_conn = sqlite3.connect(self._learning_db, timeout=10)
414
- learn_conn.row_factory = sqlite3.Row
415
-
416
- # Retrieval usage patterns from learning_feedback
417
- channel_rows = learn_conn.execute(
418
- "SELECT channel, COUNT(*) AS cnt, "
419
- "AVG(signal_value) AS avg_signal "
420
- "FROM learning_feedback "
421
- "WHERE profile_id = ? "
422
- "GROUP BY channel ORDER BY cnt DESC",
423
- (profile_id,),
424
- ).fetchall()
425
-
426
- for row in channel_rows:
427
- d = dict(row)
428
- ch = d.get("channel", "unknown")
429
- cnt = d.get("cnt", 0)
430
- avg_sig = round(float(d.get("avg_signal", 0) or 0), 3)
431
- if cnt >= 5 and not dry_run:
432
- store.record_pattern(
433
- profile_id=profile_id,
434
- pattern_type="channel_performance",
435
- data={"pattern_key": f"channel:{ch}",
436
- "value": f"{ch} ({cnt} hits, {avg_sig} avg)",
437
- "evidence": cnt,
438
- "avg_signal": avg_sig},
439
- success_rate=avg_sig,
440
- confidence=min(1.0, cnt / 50),
441
- )
442
- generated += 1
443
-
444
- # Co-retrieval cluster patterns
445
- try:
446
- coret_rows = learn_conn.execute(
447
- "SELECT fact_a, fact_b, co_access_count "
448
- "FROM co_retrieval_edges "
449
- "WHERE profile_id = ? AND co_access_count >= 3 "
450
- "ORDER BY co_access_count DESC LIMIT 20",
451
- (profile_id,),
452
- ).fetchall()
453
- if coret_rows and not dry_run:
454
- store.record_pattern(
455
- profile_id=profile_id,
456
- pattern_type="co_retrieval_clusters",
457
- data={"pattern_key": "co_retrieval_clusters",
458
- "value": f"{len(coret_rows)} strong fact pairs",
459
- "evidence": len(coret_rows),
460
- "top_pair_count": dict(coret_rows[0]).get("co_access_count", 0) if coret_rows else 0},
461
- success_rate=0.7,
462
- confidence=min(1.0, len(coret_rows) / 10),
463
- )
464
- generated += 1
465
- except Exception:
466
- pass
467
-
468
- learn_conn.close()
469
- except Exception as exc:
470
- logger.debug("Signal pattern mining failed: %s", exc)
471
-
472
- # ── 8. Community Membership (v3.4.1 NEW — from graph) ─────
473
- try:
474
- comm_rows = conn.execute(
475
- "SELECT community_id, COUNT(*) AS cnt "
476
- "FROM fact_importance "
477
- "WHERE profile_id = ? AND community_id IS NOT NULL "
478
- "GROUP BY community_id ORDER BY cnt DESC",
479
- (profile_id,),
480
- ).fetchall()
481
- if comm_rows and not dry_run:
482
- total_comm = sum(dict(r)["cnt"] for r in comm_rows)
483
- store.record_pattern(
484
- profile_id=profile_id,
485
- pattern_type="knowledge_structure",
486
- data={"pattern_key": "knowledge_structure",
487
- "value": f"{len(comm_rows)} topic communities, {total_comm} classified facts",
488
- "evidence": total_comm,
489
- "community_count": len(comm_rows)},
490
- success_rate=0.8,
491
- confidence=min(1.0, len(comm_rows) / 5),
492
- )
493
- generated += 1
494
- except Exception:
495
- pass
496
-
497
- conn.close()
498
-
499
- logger.info(
500
- "Pattern mining: %d patterns generated for profile %s "
501
- "from %d facts",
502
- generated, profile_id, len(facts),
503
- )
504
- return generated
505
- except Exception as exc:
506
- logger.warning("Pattern generation error: %s", exc)
507
- return 0
508
-
509
- def _retrain_ranker(self, profile_id: str, signal_count: int) -> bool:
510
- """Retrain the adaptive ranker from accumulated feedback."""
511
- try:
512
- from superlocalmemory.learning.feedback import FeedbackCollector
513
- from superlocalmemory.learning.ranker import AdaptiveRanker
514
-
515
- collector = FeedbackCollector(Path(self._learning_db))
516
- feedback = collector.get_feedback(profile_id, limit=500)
517
-
518
- if len(feedback) < 200:
519
- return False
520
-
521
- # Build training data from feedback
522
- training_data = []
523
- for f in feedback:
524
- label = f.get("signal_value", 0.5)
525
- training_data.append({
526
- "features": {"signal_value": label},
527
- "label": label,
528
- })
529
-
530
- ranker = AdaptiveRanker(signal_count=signal_count)
531
- trained = ranker.train(training_data)
532
-
533
- if trained:
534
- logger.info("Ranker retrained with %d examples (Phase 3)", len(training_data))
535
-
536
- return trained
537
- except Exception as exc:
538
- logger.debug("Retrain failed: %s", exc)
539
- return False
124
+ out: dict = {
125
+ "aborted": None, "candidate_persisted": False,
126
+ "promoted": False, "metrics": None,
127
+ }
128
+
129
+ try:
130
+ rows, _qids = _fetch_training_rows(learning_db_path, profile_id)
131
+ except Exception as exc:
132
+ logger.debug("fetch_training_rows failed: %s", exc)
133
+ out["aborted"] = "fetch_error"
134
+ return out
135
+
136
+ if len(rows) < 20:
137
+ out["aborted"] = "insufficient_data"
138
+ return out
139
+
140
+ # Load prior active for in-sample shadow.
141
+ try:
142
+ from superlocalmemory.learning.database import LearningDatabase
143
+ db = LearningDatabase(learning_db_path)
144
+ prior_row = db.load_active_model(profile_id)
145
+ except Exception:
146
+ prior_row = None
147
+
148
+ feature_names = _feature_names()
149
+
150
+ try:
151
+ booster, metrics = _train_booster(
152
+ learning_db_path, profile_id,
153
+ training_rows=rows, feature_names=feature_names,
154
+ prior_row=prior_row,
155
+ )
156
+ except RetrainWallTimeExceeded as exc:
157
+ out["aborted"] = "wall_time_exceeded"
158
+ out["metrics"] = {"wall_time_sec": exc.elapsed_sec}
159
+ return out
160
+ except Exception as exc:
161
+ logger.debug("train_booster failed: %s", exc)
162
+ out["aborted"] = "train_error"
163
+ return out
164
+
165
+ # Model-size guardrail (LLD-10 §3.2 post-train check).
166
+ size_bytes = _measure_serialized_size(booster)
167
+ if size_bytes > RETRAIN_MODEL_SIZE_BYTES_CAP:
168
+ logger.warning(
169
+ "retrain: candidate %.2f MB exceeds %.1f MB cap — rejecting",
170
+ size_bytes / 1e6, RETRAIN_MODEL_SIZE_BYTES_CAP / 1e6,
171
+ )
172
+ out["aborted"] = "model_too_large"
173
+ out["metrics"] = metrics
174
+ return out
175
+
176
+ # In-sample shadow gate — cheap filter before spending live recalls.
177
+ if prior_row is not None:
178
+ if not _shadow_test_improved(
179
+ prior_row, booster, rows, feature_names,
180
+ ):
181
+ out["aborted"] = "insample_shadow_fail"
182
+ out["metrics"] = metrics
183
+ return out
184
+
185
+ try:
186
+ state_bytes = booster.model_to_string().encode("utf-8")
187
+ except Exception as exc: # pragma: no cover — defensive
188
+ logger.debug("model serialise failed: %s", exc)
189
+ out["aborted"] = "serialise_error"
190
+ return out
191
+
192
+ try:
193
+ _persist_candidate(
194
+ learning_db_path, profile_id=profile_id,
195
+ state_bytes=state_bytes, feature_names=feature_names,
196
+ trained_on_count=len(rows), metrics=metrics,
197
+ shadow_results={"in_sample_pass": prior_row is not None},
198
+ )
199
+ except sqlite3.Error as exc:
200
+ logger.warning("persist_candidate failed: %s", exc)
201
+ out["aborted"] = "persist_error"
202
+ return out
203
+
204
+ out["candidate_persisted"] = True
205
+ out["promoted"] = False # Promotion is a separate live-shadow step.
206
+ out["metrics"] = metrics
207
+ return out