superlocalmemory 3.4.19 → 3.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +42 -34
- package/bin/slm +11 -0
- package/bin/slm.bat +12 -0
- package/package.json +4 -3
- package/pyproject.toml +3 -2
- package/scripts/build-slm-hook.ps1 +40 -0
- package/scripts/build-slm-hook.sh +45 -0
- package/scripts/build_entry.py +452 -0
- package/scripts/ci/stage5b_gate.sh +50 -0
- package/scripts/postinstall/validation.js +187 -0
- package/scripts/postinstall-interactive.js +756 -0
- package/scripts/postinstall_binary.js +287 -0
- package/scripts/release_manifest.py +273 -0
- package/scripts/slm-hook.spec +56 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +343 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-show-patterns/SKILL.md +224 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/superlocalmemory/cli/commands.py +219 -79
- package/src/superlocalmemory/cli/context_commands.py +192 -0
- package/src/superlocalmemory/cli/daemon.py +15 -1
- package/src/superlocalmemory/cli/db_migrate.py +80 -0
- package/src/superlocalmemory/cli/escape_hatch.py +220 -0
- package/src/superlocalmemory/cli/main.py +72 -1
- package/src/superlocalmemory/core/context_cache.py +397 -0
- package/src/superlocalmemory/core/engine.py +38 -2
- package/src/superlocalmemory/core/engine_wiring.py +1 -1
- package/src/superlocalmemory/core/ram_lock.py +111 -0
- package/src/superlocalmemory/core/recall_pipeline.py +433 -3
- package/src/superlocalmemory/core/recall_worker.py +8 -3
- package/src/superlocalmemory/core/security_primitives.py +635 -0
- package/src/superlocalmemory/core/shadow_router.py +319 -0
- package/src/superlocalmemory/core/slm_disabled.py +87 -0
- package/src/superlocalmemory/core/slmignore.py +125 -0
- package/src/superlocalmemory/core/topic_signature.py +143 -0
- package/src/superlocalmemory/core/worker_pool.py +14 -3
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
- package/src/superlocalmemory/evolution/budget.py +321 -0
- package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
- package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
- package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
- package/src/superlocalmemory/hooks/adapter_base.py +317 -0
- package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
- package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
- package/src/superlocalmemory/hooks/context_payload.py +312 -0
- package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
- package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
- package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
- package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
- package/src/superlocalmemory/hooks/ide_connector.py +25 -2
- package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
- package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
- package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
- package/src/superlocalmemory/hooks/session_registry.py +186 -0
- package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
- package/src/superlocalmemory/hooks/sync_loop.py +114 -0
- package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
- package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
- package/src/superlocalmemory/infra/backup.py +3 -3
- package/src/superlocalmemory/infra/cloud_backup.py +2 -2
- package/src/superlocalmemory/infra/event_bus.py +2 -2
- package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
- package/src/superlocalmemory/learning/arm_catalog.py +99 -0
- package/src/superlocalmemory/learning/bandit.py +526 -0
- package/src/superlocalmemory/learning/bandit_cache.py +133 -0
- package/src/superlocalmemory/learning/behavioral.py +53 -1
- package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
- package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
- package/src/superlocalmemory/learning/database.py +256 -0
- package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
- package/src/superlocalmemory/learning/ensemble.py +300 -0
- package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
- package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
- package/src/superlocalmemory/learning/labeler.py +87 -0
- package/src/superlocalmemory/learning/legacy_migration.py +277 -0
- package/src/superlocalmemory/learning/memory_merge.py +160 -0
- package/src/superlocalmemory/learning/model_cache.py +269 -0
- package/src/superlocalmemory/learning/model_rollback.py +278 -0
- package/src/superlocalmemory/learning/outcome_queue.py +284 -0
- package/src/superlocalmemory/learning/pattern_miner.py +415 -0
- package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
- package/src/superlocalmemory/learning/ranker.py +225 -81
- package/src/superlocalmemory/learning/ranker_common.py +163 -0
- package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
- package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
- package/src/superlocalmemory/learning/reward.py +777 -0
- package/src/superlocalmemory/learning/reward_archive.py +210 -0
- package/src/superlocalmemory/learning/reward_boost.py +201 -0
- package/src/superlocalmemory/learning/reward_proxy.py +326 -0
- package/src/superlocalmemory/learning/shadow_test.py +524 -0
- package/src/superlocalmemory/learning/signal_worker.py +270 -0
- package/src/superlocalmemory/learning/signals.py +314 -0
- package/src/superlocalmemory/learning/trigram_index.py +547 -0
- package/src/superlocalmemory/mcp/server.py +5 -5
- package/src/superlocalmemory/mcp/tools_context.py +183 -0
- package/src/superlocalmemory/mcp/tools_core.py +92 -27
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
- package/src/superlocalmemory/retrieval/engine.py +52 -0
- package/src/superlocalmemory/server/api.py +2 -2
- package/src/superlocalmemory/server/bandit_loops.py +140 -0
- package/src/superlocalmemory/server/middleware/__init__.py +11 -0
- package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
- package/src/superlocalmemory/server/routes/backup.py +36 -13
- package/src/superlocalmemory/server/routes/behavioral.py +50 -19
- package/src/superlocalmemory/server/routes/brain.py +1234 -0
- package/src/superlocalmemory/server/routes/data_io.py +4 -4
- package/src/superlocalmemory/server/routes/events.py +2 -2
- package/src/superlocalmemory/server/routes/helpers.py +1 -1
- package/src/superlocalmemory/server/routes/learning.py +192 -7
- package/src/superlocalmemory/server/routes/memories.py +189 -1
- package/src/superlocalmemory/server/routes/prewarm.py +171 -0
- package/src/superlocalmemory/server/routes/profiles.py +3 -3
- package/src/superlocalmemory/server/routes/token.py +88 -0
- package/src/superlocalmemory/server/routes/ws.py +5 -5
- package/src/superlocalmemory/server/security_middleware.py +13 -7
- package/src/superlocalmemory/server/ui.py +2 -2
- package/src/superlocalmemory/server/unified_daemon.py +335 -3
- package/src/superlocalmemory/storage/migration_runner.py +545 -0
- package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
- package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
- package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
- package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
- package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
- package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
- package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
- package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
- package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
- package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
- package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
- package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
- package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
- package/src/superlocalmemory/storage/models.py +4 -0
- package/src/superlocalmemory/ui/css/brain.css +409 -0
- package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
- package/src/superlocalmemory/ui/index.html +459 -1345
- package/src/superlocalmemory/ui/js/brain.js +1321 -0
- package/src/superlocalmemory/ui/js/clusters.js +123 -4
- package/src/superlocalmemory/ui/js/init.js +48 -39
- package/src/superlocalmemory/ui/js/memories.js +88 -2
- package/src/superlocalmemory/ui/js/modal.js +71 -1
- package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
- package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
- package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
- package/src/superlocalmemory/ui/js/behavioral.js +0 -447
- package/src/superlocalmemory/ui/js/graph-core.js +0 -447
- package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
- package/src/superlocalmemory/ui/js/learning.js +0 -435
- package/src/superlocalmemory/ui/js/patterns.js +0 -93
- package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
- package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
- package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
- package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
- package/src/superlocalmemory.egg-info/requires.txt +0 -58
- package/src/superlocalmemory.egg-info/top_level.txt +0 -1
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.21 — LLD-02 §4.7
|
|
4
|
+
|
|
5
|
+
"""Integer-label mapping for LightGBM ``lambdarank`` training.
|
|
6
|
+
|
|
7
|
+
LLD reference: ``.backup/active-brain/lld/LLD-02-signal-pipeline-and-lightgbm.md``
|
|
8
|
+
Section 4.7 — single source of truth for outcome-reward / position → int label.
|
|
9
|
+
|
|
10
|
+
Rules:
|
|
11
|
+
- Labels are integers in ``[0, 4]`` (5 relevance tiers).
|
|
12
|
+
- ``label_gain`` length MUST be ``>= max(label) + 1``; we ship
|
|
13
|
+
``label_gain=[0, 1, 3, 7, 15]`` (length 5).
|
|
14
|
+
- Prefer ``outcome_reward`` (v3.4.21) if present; else position proxy.
|
|
15
|
+
- ``NaN`` reward is treated as missing and falls through to position.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import math
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
# Exactly five tiers — do not widen without updating ``label_gain`` callers.
|
|
24
|
+
_LABEL_GAIN: tuple[int, ...] = (0, 1, 3, 7, 15)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def label_gain() -> list[int]:
|
|
28
|
+
"""Return the canonical ``label_gain`` list for LightGBM.
|
|
29
|
+
|
|
30
|
+
Length is ``max(label) + 1 = 5``. Must be passed verbatim to
|
|
31
|
+
``lgb.train(params=..., label_gain=...)``.
|
|
32
|
+
"""
|
|
33
|
+
return list(_LABEL_GAIN)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _coerce_reward(raw: Any) -> float | None:
|
|
37
|
+
"""Coerce a reward-ish input to float, rejecting None / NaN."""
|
|
38
|
+
if raw is None:
|
|
39
|
+
return None
|
|
40
|
+
try:
|
|
41
|
+
val = float(raw)
|
|
42
|
+
except (TypeError, ValueError):
|
|
43
|
+
return None
|
|
44
|
+
if math.isnan(val):
|
|
45
|
+
return None
|
|
46
|
+
return val
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def label_for_row(row: dict) -> int:
|
|
50
|
+
"""Map a training row to integer relevance in ``[0, 4]``.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
row: Dict with optional ``outcome_reward`` (float in [0, 1]) and
|
|
54
|
+
``position`` (int, 0-based rank at recall time).
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
An integer label in ``[0, 4]``. Higher = more relevant.
|
|
58
|
+
"""
|
|
59
|
+
reward = _coerce_reward(row.get("outcome_reward"))
|
|
60
|
+
if reward is not None:
|
|
61
|
+
if reward >= 0.90:
|
|
62
|
+
return 4
|
|
63
|
+
if reward >= 0.60:
|
|
64
|
+
return 3
|
|
65
|
+
if reward >= 0.30:
|
|
66
|
+
return 2
|
|
67
|
+
if reward > 0.00:
|
|
68
|
+
return 1
|
|
69
|
+
return 0
|
|
70
|
+
|
|
71
|
+
# 3.4.21 proxy: position (0 = best, higher = worse).
|
|
72
|
+
try:
|
|
73
|
+
pos = int(row.get("position", 99))
|
|
74
|
+
except (TypeError, ValueError):
|
|
75
|
+
return 0
|
|
76
|
+
if pos == 0:
|
|
77
|
+
return 4
|
|
78
|
+
if pos <= 2:
|
|
79
|
+
return 3
|
|
80
|
+
if pos <= 4:
|
|
81
|
+
return 2
|
|
82
|
+
if pos <= 9:
|
|
83
|
+
return 1
|
|
84
|
+
return 0
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
__all__ = ("label_for_row", "label_gain")
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.21
|
|
4
|
+
|
|
5
|
+
"""Legacy ``learning_feedback`` row migration — data movement only.
|
|
6
|
+
|
|
7
|
+
LLD reference: ``.backup/active-brain/lld/LLD-07-schema-migrations-and-security-primitives.md``
|
|
8
|
+
Section 5 (Legacy learning_feedback Migration).
|
|
9
|
+
|
|
10
|
+
Hard rule H15 (LLD-06 §10): this module MUST NOT contain any schema
|
|
11
|
+
DDL. All schema definitions live in
|
|
12
|
+
``src/superlocalmemory/storage/migrations/`` modules. This file is
|
|
13
|
+
data-copy only — the tables it reads from and writes into are created
|
|
14
|
+
by the LLD-07 runner before this runs.
|
|
15
|
+
|
|
16
|
+
S8-ARC-01 fix: the earlier stub always returned ``copied=0`` while the
|
|
17
|
+
``/api/v3/brain`` endpoint surfaced the pre-existing ``learning_feedback``
|
|
18
|
+
row count as ``legacy_migrated_count`` — a silent integrity lie. This
|
|
19
|
+
implementation does the real row copy and is idempotent (guarded by a
|
|
20
|
+
``migration_log`` sentinel row under the name ``LEG001_feedback_to_signals``).
|
|
21
|
+
D5 (keep ``learning_feedback`` for one release) still holds — this
|
|
22
|
+
migration does NOT delete from ``learning_feedback``; it only inserts
|
|
23
|
+
into ``learning_signals`` + ``learning_features``.
|
|
24
|
+
"""
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import json
|
|
28
|
+
import logging
|
|
29
|
+
import sqlite3
|
|
30
|
+
from datetime import datetime, timezone
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
# Marker used in migration_log to signal the data-copy has run.
|
|
36
|
+
MIGRATION_NAME = "LEG001_feedback_to_signals"
|
|
37
|
+
|
|
38
|
+
# Batch size for the row copy. 500 keeps the single transaction short
|
|
39
|
+
# enough that any concurrent SQLite reader only sees brief locks, while
|
|
40
|
+
# still amortising the per-row overhead.
|
|
41
|
+
_COPY_BATCH_SIZE = 500
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def migrate_legacy_feedback(
|
|
45
|
+
learning_db: Path,
|
|
46
|
+
*,
|
|
47
|
+
dry_run: bool = False,
|
|
48
|
+
) -> dict:
|
|
49
|
+
"""Copy ``learning_feedback`` rows forward into LLD-02 tables.
|
|
50
|
+
|
|
51
|
+
Policy (LLD-07 §5 + D5 + D9):
|
|
52
|
+
* Idempotent: if ``migration_log`` already has ``LEG001_feedback_to_signals``
|
|
53
|
+
with status 'complete', returns a stats dict with ``already_done=True``.
|
|
54
|
+
* Preserves the original ``learning_feedback`` table (D5 — one release).
|
|
55
|
+
* Flags synthetic rows: ``learning_features.is_synthetic=1`` so the
|
|
56
|
+
LightGBM trainer in ``consolidation_worker._retrain_ranker`` can
|
|
57
|
+
exclude them from training (``WHERE is_synthetic=0``).
|
|
58
|
+
* Never raises; any sqlite3 error collapses into ``failed=N`` and a
|
|
59
|
+
WARN log so the daemon lifespan can continue.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
``{"copied": int, "skipped": int, "failed": int,
|
|
63
|
+
"already_done": bool, "details": {...}}``
|
|
64
|
+
"""
|
|
65
|
+
stats = {
|
|
66
|
+
"copied": 0, "skipped": 0, "failed": 0,
|
|
67
|
+
"already_done": False,
|
|
68
|
+
"details": {"name": MIGRATION_NAME, "dry_run": dry_run},
|
|
69
|
+
}
|
|
70
|
+
if not learning_db.exists():
|
|
71
|
+
stats["details"]["reason"] = "db_missing"
|
|
72
|
+
return stats
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
conn = sqlite3.connect(str(learning_db), isolation_level=None, timeout=10.0)
|
|
76
|
+
conn.row_factory = sqlite3.Row
|
|
77
|
+
except sqlite3.Error as exc:
|
|
78
|
+
logger.warning("legacy migration: open failed: %s", exc)
|
|
79
|
+
stats["failed"] = 1
|
|
80
|
+
stats["details"]["reason"] = f"open_failed:{type(exc).__name__}"
|
|
81
|
+
return stats
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
# Every required table must exist. If any is missing we defer to
|
|
85
|
+
# the migration runner (LLD-07 M001/M003) and record a skip.
|
|
86
|
+
if not _tables_exist(conn, ("migration_log", "learning_feedback",
|
|
87
|
+
"learning_signals", "learning_features")):
|
|
88
|
+
stats["details"]["reason"] = "required_tables_missing"
|
|
89
|
+
return stats
|
|
90
|
+
|
|
91
|
+
if _already_complete(conn, MIGRATION_NAME):
|
|
92
|
+
stats["already_done"] = True
|
|
93
|
+
stats["details"]["reason"] = "migration_log_complete"
|
|
94
|
+
return stats
|
|
95
|
+
|
|
96
|
+
# Row count of candidates up front so the stats dict can report
|
|
97
|
+
# progress even under a dry_run.
|
|
98
|
+
total = _count_feedback(conn)
|
|
99
|
+
stats["details"]["source_rows"] = total
|
|
100
|
+
if total == 0:
|
|
101
|
+
_record_migration(conn, MIGRATION_NAME,
|
|
102
|
+
status="complete", rows_affected=0,
|
|
103
|
+
dry_run=dry_run)
|
|
104
|
+
return stats
|
|
105
|
+
|
|
106
|
+
if dry_run:
|
|
107
|
+
stats["details"]["reason"] = "dry_run_noop"
|
|
108
|
+
stats["skipped"] = total
|
|
109
|
+
return stats
|
|
110
|
+
|
|
111
|
+
copied, failed = _copy_rows(conn)
|
|
112
|
+
stats["copied"] = copied
|
|
113
|
+
stats["failed"] = failed
|
|
114
|
+
_record_migration(conn, MIGRATION_NAME,
|
|
115
|
+
status="complete" if failed == 0 else "partial",
|
|
116
|
+
rows_affected=copied, dry_run=False)
|
|
117
|
+
return stats
|
|
118
|
+
finally:
|
|
119
|
+
try:
|
|
120
|
+
conn.close()
|
|
121
|
+
except sqlite3.Error: # pragma: no cover — defensive close
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Helpers — parameterised SQL only; no DDL here per H15.
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _tables_exist(conn: sqlite3.Connection, names: tuple[str, ...]) -> bool:
|
|
131
|
+
rows = conn.execute(
|
|
132
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name IN "
|
|
133
|
+
"(" + ",".join("?" * len(names)) + ")",
|
|
134
|
+
names,
|
|
135
|
+
).fetchall()
|
|
136
|
+
return len(rows) == len(names)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _already_complete(conn: sqlite3.Connection, name: str) -> bool:
|
|
140
|
+
row = conn.execute(
|
|
141
|
+
"SELECT status FROM migration_log WHERE name = ?",
|
|
142
|
+
(name,),
|
|
143
|
+
).fetchone()
|
|
144
|
+
return row is not None and str(row[0]).lower() == "complete"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _count_feedback(conn: sqlite3.Connection) -> int:
|
|
148
|
+
row = conn.execute(
|
|
149
|
+
"SELECT COUNT(*) FROM learning_feedback",
|
|
150
|
+
).fetchone()
|
|
151
|
+
return int(row[0]) if row else 0
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _copy_rows(conn: sqlite3.Connection) -> tuple[int, int]:
|
|
155
|
+
"""Copy all ``learning_feedback`` rows forward.
|
|
156
|
+
|
|
157
|
+
Returns ``(copied, failed)``. Does not raise. Commits per batch so
|
|
158
|
+
a later failure still leaves the earlier batches durable.
|
|
159
|
+
"""
|
|
160
|
+
copied = 0
|
|
161
|
+
failed = 0
|
|
162
|
+
offset = 0
|
|
163
|
+
while True:
|
|
164
|
+
try:
|
|
165
|
+
batch = conn.execute(
|
|
166
|
+
"SELECT id, profile_id, query_hash, fact_id, "
|
|
167
|
+
" signal_type, signal_value, created_at "
|
|
168
|
+
"FROM learning_feedback "
|
|
169
|
+
"ORDER BY id LIMIT ? OFFSET ?",
|
|
170
|
+
(_COPY_BATCH_SIZE, offset),
|
|
171
|
+
).fetchall()
|
|
172
|
+
except sqlite3.Error as exc:
|
|
173
|
+
logger.warning("legacy migration: batch read failed: %s", exc)
|
|
174
|
+
failed += 1
|
|
175
|
+
break
|
|
176
|
+
if not batch:
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
181
|
+
for row in batch:
|
|
182
|
+
try:
|
|
183
|
+
_copy_single_row(conn, row)
|
|
184
|
+
copied += 1
|
|
185
|
+
except sqlite3.Error as exc:
|
|
186
|
+
logger.debug("legacy migration: row %s failed: %s",
|
|
187
|
+
row[0], exc)
|
|
188
|
+
failed += 1
|
|
189
|
+
conn.execute("COMMIT")
|
|
190
|
+
except sqlite3.Error as exc:
|
|
191
|
+
logger.warning("legacy migration: batch commit failed: %s", exc)
|
|
192
|
+
try:
|
|
193
|
+
conn.execute("ROLLBACK")
|
|
194
|
+
except sqlite3.Error: # pragma: no cover
|
|
195
|
+
pass
|
|
196
|
+
failed += len(batch)
|
|
197
|
+
offset += len(batch)
|
|
198
|
+
|
|
199
|
+
return copied, failed
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _copy_single_row(conn: sqlite3.Connection, row: sqlite3.Row) -> None:
|
|
203
|
+
"""Insert one legacy row into learning_signals + learning_features.
|
|
204
|
+
|
|
205
|
+
Synthetic features are minimal (position from the legacy row if present,
|
|
206
|
+
zeros elsewhere). The LightGBM trainer filters these out via
|
|
207
|
+
``is_synthetic=0``; the bandit/heuristic layer can still learn from
|
|
208
|
+
signal counts.
|
|
209
|
+
|
|
210
|
+
Note: ``learning_feedback.query_hash`` is already a privacy-hashed
|
|
211
|
+
digest (LLD-02 §4.1 S2), so we copy it forward as-is. Padding to
|
|
212
|
+
32 chars keeps a stable shape when upstream hashes are shorter.
|
|
213
|
+
"""
|
|
214
|
+
stored_hash = str(row["query_hash"] or "")
|
|
215
|
+
# Keep the hash at 32 hex chars for shape-stability with fresh signals.
|
|
216
|
+
query_hash = (stored_hash + ("0" * 32))[:32]
|
|
217
|
+
created_at = str(row["created_at"] or
|
|
218
|
+
datetime.now(timezone.utc).isoformat(timespec="seconds"))
|
|
219
|
+
profile_id = str(row["profile_id"] or "default")
|
|
220
|
+
fact_id = str(row["fact_id"] or "")
|
|
221
|
+
legacy_query_id = f"legacy:{row['id']}"
|
|
222
|
+
|
|
223
|
+
# Insert the signal row. ``signal_type='legacy_feedback'`` marks it
|
|
224
|
+
# clearly so consumers (dashboard, labeler) can treat it correctly.
|
|
225
|
+
cur = conn.execute(
|
|
226
|
+
"INSERT INTO learning_signals "
|
|
227
|
+
"(profile_id, query, fact_id, signal_type, value, created_at, "
|
|
228
|
+
" query_id, query_text_hash, position, channel_scores, cross_encoder) "
|
|
229
|
+
"VALUES (?, '', ?, 'legacy_feedback', ?, ?, ?, ?, 0, '{}', NULL)",
|
|
230
|
+
(profile_id, fact_id,
|
|
231
|
+
float(row["signal_value"] or 1.0),
|
|
232
|
+
created_at, legacy_query_id, query_hash),
|
|
233
|
+
)
|
|
234
|
+
sid = cur.lastrowid
|
|
235
|
+
|
|
236
|
+
# Synthetic 20-dim feature vector (zeros). The real FEATURE_NAMES
|
|
237
|
+
# come from ``learning.features`` at training time; we don't import
|
|
238
|
+
# it here to keep this module boot-order-independent.
|
|
239
|
+
conn.execute(
|
|
240
|
+
"INSERT INTO learning_features "
|
|
241
|
+
"(profile_id, query_id, fact_id, features_json, label, created_at, "
|
|
242
|
+
" signal_id, is_synthetic) "
|
|
243
|
+
"VALUES (?, ?, ?, '{}', 0.0, ?, ?, 1)",
|
|
244
|
+
(profile_id, legacy_query_id, fact_id, created_at, sid),
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _record_migration(
|
|
249
|
+
conn: sqlite3.Connection,
|
|
250
|
+
name: str,
|
|
251
|
+
*,
|
|
252
|
+
status: str,
|
|
253
|
+
rows_affected: int,
|
|
254
|
+
dry_run: bool,
|
|
255
|
+
) -> None:
|
|
256
|
+
"""Record migration status in ``migration_log``. No DDL here (H15).
|
|
257
|
+
|
|
258
|
+
The ``migration_log`` schema uses ``applied_at`` + ``ddl_sha256``
|
|
259
|
+
(see storage/migrations/M003_migration_log.py). This is a data-only
|
|
260
|
+
migration, so ``ddl_sha256`` is the empty string — the sentinel row
|
|
261
|
+
is for idempotency, not schema-drift detection.
|
|
262
|
+
"""
|
|
263
|
+
applied_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
264
|
+
if dry_run:
|
|
265
|
+
return
|
|
266
|
+
try:
|
|
267
|
+
conn.execute(
|
|
268
|
+
"INSERT OR REPLACE INTO migration_log "
|
|
269
|
+
"(name, applied_at, ddl_sha256, rows_affected, status) "
|
|
270
|
+
"VALUES (?, ?, '', ?, ?)",
|
|
271
|
+
(name, applied_at, rows_affected, status),
|
|
272
|
+
)
|
|
273
|
+
except sqlite3.Error as exc:
|
|
274
|
+
logger.warning("legacy migration: log record failed: %s", exc)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
__all__ = ("migrate_legacy_feedback", "MIGRATION_NAME")
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.21 — LLD-12 §2.3 + §1.4
|
|
4
|
+
|
|
5
|
+
"""Reversible merge-log writer + unmerge for consolidation.
|
|
6
|
+
|
|
7
|
+
LLD-12 §1 hard invariant: consolidation NEVER issues
|
|
8
|
+
``DELETE FROM atomic_facts``. Duplicates merge by:
|
|
9
|
+
|
|
10
|
+
1. INSERT into ``memory_merge_log`` (canonical + merged fact_ids, scores,
|
|
11
|
+
timestamp, reversible flag).
|
|
12
|
+
2. UPDATE ``atomic_facts`` SET archive_status='merged',
|
|
13
|
+
merged_into=<canonical>. Row stays; only status flips.
|
|
14
|
+
|
|
15
|
+
``unmerge(merge_id)`` reverses the operation by flipping archive_status
|
|
16
|
+
back to 'live' and clearing merged_into. The log row is marked
|
|
17
|
+
``reversible=0`` once reversed so a second unmerge is a no-op.
|
|
18
|
+
|
|
19
|
+
All operations run inside a single SQLite transaction with
|
|
20
|
+
``busy_timeout=2000``. Partial failures roll back cleanly, leaving the
|
|
21
|
+
DB in its pre-merge state.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
import sqlite3
|
|
28
|
+
import uuid
|
|
29
|
+
from datetime import datetime, timezone
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _iso_now() -> str:
|
|
36
|
+
return datetime.now(timezone.utc).isoformat()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def apply_merges(
|
|
40
|
+
memory_db_path: str | Path,
|
|
41
|
+
candidates: list[tuple[str, str, float, float]],
|
|
42
|
+
*,
|
|
43
|
+
profile_id: str,
|
|
44
|
+
) -> int:
|
|
45
|
+
"""Apply merge candidates transactionally. Returns number applied.
|
|
46
|
+
|
|
47
|
+
Each candidate is ``(canonical_fact_id, merged_fact_id, cosine, jaccard)``.
|
|
48
|
+
|
|
49
|
+
Never deletes from ``atomic_facts``. Always writes a row to
|
|
50
|
+
``memory_merge_log`` per applied merge.
|
|
51
|
+
|
|
52
|
+
Idempotent: if ``merged_fact_id`` is already ``archive_status='merged'``
|
|
53
|
+
from a prior run, the candidate is skipped (count not incremented).
|
|
54
|
+
"""
|
|
55
|
+
if not candidates:
|
|
56
|
+
return 0
|
|
57
|
+
|
|
58
|
+
conn = sqlite3.connect(str(memory_db_path), timeout=10.0)
|
|
59
|
+
conn.execute("PRAGMA busy_timeout=2000")
|
|
60
|
+
applied = 0
|
|
61
|
+
# S-L02: track the candidate list in flight so a rollback diagnostic
|
|
62
|
+
# can blame the exact set of (canonical, merged) pairs instead of a
|
|
63
|
+
# blanket "rollback" message. Operators on the dashboard previously
|
|
64
|
+
# saw zero fidelity about which candidates were in the transaction
|
|
65
|
+
# at commit-time.
|
|
66
|
+
in_flight: list[tuple[str, str]] = []
|
|
67
|
+
try:
|
|
68
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
69
|
+
for canonical_id, merged_id, cos, jac in candidates:
|
|
70
|
+
# Skip if already merged in a prior cycle.
|
|
71
|
+
row = conn.execute(
|
|
72
|
+
"SELECT archive_status FROM atomic_facts WHERE fact_id=?",
|
|
73
|
+
(merged_id,),
|
|
74
|
+
).fetchone()
|
|
75
|
+
if row is None:
|
|
76
|
+
continue
|
|
77
|
+
if row[0] == "merged":
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
conn.execute(
|
|
81
|
+
"INSERT INTO memory_merge_log "
|
|
82
|
+
"(merge_id, profile_id, canonical_fact_id, merged_fact_id, "
|
|
83
|
+
" cosine_sim, entity_jaccard, merged_at, reversible) "
|
|
84
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, 1)",
|
|
85
|
+
(
|
|
86
|
+
str(uuid.uuid4()),
|
|
87
|
+
profile_id,
|
|
88
|
+
canonical_id,
|
|
89
|
+
merged_id,
|
|
90
|
+
float(cos),
|
|
91
|
+
float(jac),
|
|
92
|
+
_iso_now(),
|
|
93
|
+
),
|
|
94
|
+
)
|
|
95
|
+
conn.execute(
|
|
96
|
+
"UPDATE atomic_facts "
|
|
97
|
+
"SET archive_status='merged', "
|
|
98
|
+
" archive_reason='cosine_dup', "
|
|
99
|
+
" merged_into=? "
|
|
100
|
+
"WHERE fact_id=?",
|
|
101
|
+
(canonical_id, merged_id),
|
|
102
|
+
)
|
|
103
|
+
applied += 1
|
|
104
|
+
in_flight.append((canonical_id, merged_id))
|
|
105
|
+
conn.commit()
|
|
106
|
+
except sqlite3.Error as exc:
|
|
107
|
+
conn.rollback()
|
|
108
|
+
logger.warning(
|
|
109
|
+
"apply_merges rollback: profile=%s pre-rollback_applied=%d "
|
|
110
|
+
"in_flight=%s error=%s",
|
|
111
|
+
profile_id, applied, in_flight, exc,
|
|
112
|
+
)
|
|
113
|
+
applied = 0
|
|
114
|
+
finally:
|
|
115
|
+
conn.close()
|
|
116
|
+
return applied
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def unmerge(memory_db_path: str | Path, merge_id: str) -> bool:
|
|
120
|
+
"""Reverse a merge by merge_id. Returns True on success.
|
|
121
|
+
|
|
122
|
+
Flips the merged fact's archive_status back to 'live', clears
|
|
123
|
+
merged_into, and marks the log row ``reversible=0``.
|
|
124
|
+
"""
|
|
125
|
+
conn = sqlite3.connect(str(memory_db_path), timeout=10.0)
|
|
126
|
+
conn.execute("PRAGMA busy_timeout=2000")
|
|
127
|
+
try:
|
|
128
|
+
row = conn.execute(
|
|
129
|
+
"SELECT merged_fact_id, reversible FROM memory_merge_log "
|
|
130
|
+
"WHERE merge_id=?",
|
|
131
|
+
(merge_id,),
|
|
132
|
+
).fetchone()
|
|
133
|
+
if row is None:
|
|
134
|
+
return False
|
|
135
|
+
merged_fid, reversible = row
|
|
136
|
+
if not reversible:
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
140
|
+
conn.execute(
|
|
141
|
+
"UPDATE atomic_facts "
|
|
142
|
+
"SET archive_status='live', archive_reason=NULL, merged_into=NULL "
|
|
143
|
+
"WHERE fact_id=?",
|
|
144
|
+
(merged_fid,),
|
|
145
|
+
)
|
|
146
|
+
conn.execute(
|
|
147
|
+
"UPDATE memory_merge_log SET reversible=0 WHERE merge_id=?",
|
|
148
|
+
(merge_id,),
|
|
149
|
+
)
|
|
150
|
+
conn.commit()
|
|
151
|
+
return True
|
|
152
|
+
except sqlite3.Error as exc:
|
|
153
|
+
conn.rollback()
|
|
154
|
+
logger.warning("unmerge rollback: %s", exc)
|
|
155
|
+
return False
|
|
156
|
+
finally:
|
|
157
|
+
conn.close()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
__all__ = ("apply_merges", "unmerge")
|