akm-cli 0.9.0-beta.53 → 0.9.0-beta.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/clack.js +56 -0
- package/dist/cli/confirm.js +1 -1
- package/dist/cli.js +5 -3
- package/dist/commands/agent/contribute-cli.js +2 -3
- package/dist/commands/env/env-cli.js +187 -202
- package/dist/commands/env/secret-cli.js +109 -121
- package/dist/commands/feedback-cli.js +152 -155
- package/dist/commands/health/advisories.js +151 -0
- package/dist/commands/health/html-report.js +33 -10
- package/dist/commands/health/improve-metrics.js +754 -0
- package/dist/commands/health/llm-usage.js +65 -0
- package/dist/commands/health/md-report.js +103 -0
- package/dist/commands/health/metrics.js +278 -0
- package/dist/commands/health/task-runs.js +135 -0
- package/dist/commands/health/types.js +18 -0
- package/dist/commands/health/windows.js +196 -0
- package/dist/commands/health.js +15 -1492
- package/dist/commands/improve/anti-collapse.js +170 -0
- package/dist/commands/improve/collapse-detector.js +3 -2
- package/dist/commands/improve/consolidate.js +636 -633
- package/dist/commands/improve/dedup.js +1 -1
- package/dist/commands/improve/distill/content-repair.js +202 -0
- package/dist/commands/improve/distill/promote-memory.js +228 -0
- package/dist/commands/improve/distill/quality-gate.js +233 -0
- package/dist/commands/improve/distill-guards.js +127 -0
- package/dist/commands/improve/distill.js +49 -575
- package/dist/commands/improve/extract-cli.js +74 -76
- package/dist/commands/improve/extract.js +6 -4
- package/dist/commands/improve/hot-probation.js +45 -0
- package/dist/commands/improve/improve-auto-accept.js +3 -2
- package/dist/commands/improve/improve-cli.js +14 -13
- package/dist/commands/improve/improve-result-file.js +2 -1
- package/dist/commands/improve/improve.js +6 -5
- package/dist/commands/improve/loop-stages.js +19 -21
- package/dist/commands/improve/outcome-loop.js +18 -16
- package/dist/commands/improve/preparation.js +23 -5
- package/dist/commands/improve/procedural.js +10 -31
- package/dist/commands/improve/recombine.js +19 -43
- package/dist/commands/improve/reflect.js +1 -1
- package/dist/commands/improve/schema-similarity-gate.js +168 -0
- package/dist/commands/improve/shared.js +48 -0
- package/dist/commands/observability-cli.js +4 -4
- package/dist/commands/proposal/drain-policies.js +2 -2
- package/dist/commands/proposal/drain.js +1 -1
- package/dist/commands/proposal/legacy-import.js +115 -0
- package/dist/commands/proposal/proposal-cli.js +3 -3
- package/dist/commands/proposal/proposal.js +2 -1
- package/dist/commands/proposal/propose.js +1 -1
- package/dist/commands/proposal/repository.js +829 -0
- package/dist/commands/proposal/validators/proposals.js +5 -920
- package/dist/commands/read/curate.js +4 -4
- package/dist/commands/read/remember-cli.js +132 -137
- package/dist/commands/read/search-cli.js +7 -5
- package/dist/commands/read/search.js +7 -3
- package/dist/commands/read/show.js +3 -5
- package/dist/commands/registry-cli.js +76 -87
- package/dist/commands/sources/add-cli.js +91 -95
- package/dist/commands/sources/history.js +1 -1
- package/dist/commands/sources/init.js +12 -0
- package/dist/commands/sources/schema-repair.js +1 -1
- package/dist/commands/sources/sources-cli.js +3 -3
- package/dist/commands/sources/stash-cli.js +2 -2
- package/dist/commands/tasks/default-tasks.js +12 -0
- package/dist/commands/tasks/tasks-cli.js +1 -2
- package/dist/commands/wiki-cli.js +2 -3
- package/dist/core/common.js +3 -3
- package/dist/core/config/config-schema.js +6 -0
- package/dist/core/config/config.js +12 -0
- package/dist/core/deep-merge.js +38 -0
- package/dist/core/events.js +2 -1
- package/dist/core/logs-db.js +8 -13
- package/dist/core/paths.js +14 -14
- package/dist/core/state-db.js +13 -1140
- package/dist/core/warn.js +21 -0
- package/dist/indexer/db/db.js +72 -709
- package/dist/indexer/db/entry-mapper.js +41 -0
- package/dist/indexer/db/schema.js +516 -0
- package/dist/indexer/ensure-index.js +3 -2
- package/dist/indexer/feedback/utility-policy.js +85 -0
- package/dist/indexer/graph/graph-extraction.js +2 -1
- package/dist/indexer/index-writer-lock.js +18 -0
- package/dist/indexer/indexer.js +94 -27
- package/dist/indexer/read-preflight.js +23 -0
- package/dist/indexer/search/fts-query.js +51 -0
- package/dist/indexer/walk/walker.js +21 -13
- package/dist/integrations/agent/detect.js +9 -0
- package/dist/integrations/agent/index.js +1 -1
- package/dist/integrations/agent/spawn.js +15 -66
- package/dist/llm/client.js +12 -0
- package/dist/llm/embedder.js +26 -2
- package/dist/llm/embedders/local.js +7 -1
- package/dist/output/text/helpers.js +13 -0
- package/dist/scripts/migrate-storage.js +6903 -7424
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +49 -44
- package/dist/setup/detect.js +9 -0
- package/dist/setup/legacy-config.js +106 -0
- package/dist/setup/prompt.js +57 -0
- package/dist/setup/providers.js +14 -0
- package/dist/setup/registry-stash-loader.js +12 -0
- package/dist/setup/semantic-assets.js +124 -0
- package/dist/setup/setup.js +25 -1608
- package/dist/setup/steps/connection.js +734 -0
- package/dist/setup/steps/output.js +31 -0
- package/dist/setup/steps/platforms.js +124 -0
- package/dist/setup/steps/semantic.js +27 -0
- package/dist/setup/steps/sources.js +222 -0
- package/dist/setup/steps/stashdir.js +42 -0
- package/dist/setup/steps/tasks.js +152 -0
- package/dist/storage/repositories/canaries-repository.js +107 -0
- package/dist/storage/repositories/consolidation-repository.js +38 -0
- package/dist/storage/repositories/embeddings-repository.js +72 -0
- package/dist/storage/repositories/events-repository.js +187 -0
- package/dist/storage/repositories/extract-sessions-repository.js +96 -0
- package/dist/storage/repositories/improve-runs-repository.js +130 -0
- package/dist/storage/repositories/index-db.js +4 -7
- package/dist/storage/repositories/proposals-repository.js +220 -0
- package/dist/storage/repositories/recombine-repository.js +213 -0
- package/dist/storage/repositories/task-history-repository.js +93 -0
- package/dist/storage/sqlite-pragmas.js +3 -3
- package/dist/tasks/backends/index.js +9 -0
- package/dist/tasks/runner.js +11 -1
- package/package.json +2 -2
- package/dist/commands/improve/homeostatic.js +0 -497
package/dist/indexer/db/db.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
-
import fs from "node:fs";
|
|
5
4
|
import { createRequire } from "node:module";
|
|
6
5
|
import path from "node:path";
|
|
7
6
|
import { parseAssetRef } from "../../core/asset/asset-ref.js";
|
|
@@ -10,43 +9,31 @@ import { getDbPath } from "../../core/paths.js";
|
|
|
10
9
|
import { warn } from "../../core/warn.js";
|
|
11
10
|
import { cosineSimilarity } from "../../llm/embedders/types.js";
|
|
12
11
|
import { sha256Hex } from "../../runtime.js";
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
12
|
+
import { openManagedDatabase } from "../../storage/managed-db.js";
|
|
13
|
+
import { computeNextUtility, HIGH_UTILITY_THRESHOLD, UTILITY_REVIEW_THRESHOLD, } from "../feedback/utility-policy.js";
|
|
14
|
+
import { buildPrefixQuery, sanitizeFtsQuery } from "../search/fts-query.js";
|
|
15
15
|
import { buildSearchFields } from "../search/search-fields.js";
|
|
16
|
-
import {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
// no longer gates any destructive path (the old nuclear drop-and-rebuild was
|
|
20
|
-
// removed; index.db's idempotent CREATE … IF NOT EXISTS schema converges any
|
|
21
|
-
// older/partial DB forward without dropping data). Graph re-keying uses a
|
|
22
|
-
// TARGETED, graph-only migration (migrateGraphFilesSchema) — the model for any
|
|
23
|
-
// incompatible change: migrate in place, never wipe the whole index.
|
|
24
|
-
export const DB_VERSION = 17;
|
|
25
|
-
export const EMBEDDING_DIM = 384;
|
|
26
|
-
// #624-P1: graph_files re-keyed to (stash_root, file_path, body_hash). Bumped 3→4
|
|
27
|
-
// as a marker; the actual migration is the targeted drop in migrateGraphFilesSchema.
|
|
28
|
-
export const GRAPH_SCHEMA_VERSION = 4;
|
|
16
|
+
import { ENTRY_COLUMNS, rowToIndexedEntry } from "./entry-mapper.js";
|
|
17
|
+
import { ensureSchema } from "./schema.js";
|
|
18
|
+
export { HIGH_UTILITY_THRESHOLD, sanitizeFtsQuery, UTILITY_REVIEW_THRESHOLD };
|
|
29
19
|
// ── Database lifecycle ──────────────────────────────────────────────────────
|
|
30
20
|
export function openIndexDatabase(dbPath, options) {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
// Warn once at init if using JS fallback with many entries
|
|
48
|
-
warnIfVecMissing(db, { once: true });
|
|
49
|
-
return db;
|
|
21
|
+
return openManagedDatabase({
|
|
22
|
+
path: dbPath ?? getDbPath(),
|
|
23
|
+
init: (db) => {
|
|
24
|
+
// Try to load sqlite-vec extension
|
|
25
|
+
loadVecExtension(db);
|
|
26
|
+
// Dim resolution: explicit option wins; otherwise consult the on-disk
|
|
27
|
+
// config so unparameterised opens (registry providers, graph helpers,
|
|
28
|
+
// ad-hoc CLI subcommands) honour the operator-declared dimension. Only if
|
|
29
|
+
// both are absent do we fall through to the no-clobber path, which keeps
|
|
30
|
+
// ensureSchema from touching `index_meta.embeddingDim` at all.
|
|
31
|
+
const resolvedDim = options?.embeddingDim ?? resolveConfiguredEmbeddingDim();
|
|
32
|
+
ensureSchema(db, resolvedDim);
|
|
33
|
+
// Warn once at init if using JS fallback with many entries
|
|
34
|
+
warnIfVecMissing(db, { once: true });
|
|
35
|
+
},
|
|
36
|
+
});
|
|
50
37
|
}
|
|
51
38
|
/**
|
|
52
39
|
* Read the operator-configured embedding dimension from the on-disk config.
|
|
@@ -71,14 +58,10 @@ function resolveConfiguredEmbeddingDim() {
|
|
|
71
58
|
}
|
|
72
59
|
}
|
|
73
60
|
export function openExistingDatabase(dbPath) {
|
|
74
|
-
const resolvedPath = dbPath ?? getDbPath();
|
|
75
|
-
const dir = path.dirname(resolvedPath);
|
|
76
|
-
const db = openDatabase(resolvedPath);
|
|
77
|
-
applyStandardPragmas(db, { dataDir: dir });
|
|
78
61
|
// Existing-DB callers must not mutate schema or embedding metadata on open,
|
|
79
|
-
// but some paths still need write access to usage_events and other tables
|
|
80
|
-
|
|
81
|
-
return
|
|
62
|
+
// but some paths still need write access to usage_events and other tables —
|
|
63
|
+
// so init only loads the vec extension, it does not run ensureSchema.
|
|
64
|
+
return openManagedDatabase({ path: dbPath ?? getDbPath(), init: loadVecExtension });
|
|
82
65
|
}
|
|
83
66
|
export function closeDatabase(db) {
|
|
84
67
|
db.close();
|
|
@@ -127,372 +110,6 @@ export function warnIfVecMissing(db, { once } = { once: false }) {
|
|
|
127
110
|
}
|
|
128
111
|
}, "embeddings table may not exist yet during init");
|
|
129
112
|
}
|
|
130
|
-
// ── Schema ──────────────────────────────────────────────────────────────────
|
|
131
|
-
/**
|
|
132
|
-
* DDL for the `registry_index_cache` table. This table lives in index.db
|
|
133
|
-
* (managed by this module), so its DDL belongs here next to the `ensureSchema`
|
|
134
|
-
* that applies it — not in state-db.ts.
|
|
135
|
-
*
|
|
136
|
-
* Created with CREATE TABLE IF NOT EXISTS so it is safe to call inside
|
|
137
|
-
* `ensureSchema()`. Caches the result of resolving and fetching remote registry
|
|
138
|
-
* stash indexes so `akm search` does not hit the network on every invocation.
|
|
139
|
-
*
|
|
140
|
-
* Indexed (query) columns:
|
|
141
|
-
* registry_url TEXT PK — canonical URL of the registry; cache key.
|
|
142
|
-
* fetched_at TEXT — ISO-8601; used to detect stale entries (TTL).
|
|
143
|
-
* etag TEXT — HTTP ETag for conditional GET (If-None-Match).
|
|
144
|
-
* last_modified TEXT — HTTP Last-Modified for conditional GET.
|
|
145
|
-
*
|
|
146
|
-
* Non-indexed payload:
|
|
147
|
-
* index_json TEXT — JSON blob of the fetched registry index document.
|
|
148
|
-
*
|
|
149
|
-
* ADD COLUMN extension points (future migrations):
|
|
150
|
-
* ALTER TABLE registry_index_cache ADD COLUMN schema_version INTEGER DEFAULT 1;
|
|
151
|
-
* ALTER TABLE registry_index_cache ADD COLUMN kit_count INTEGER DEFAULT NULL;
|
|
152
|
-
* ALTER TABLE registry_index_cache ADD COLUMN error_message TEXT DEFAULT NULL;
|
|
153
|
-
*/
|
|
154
|
-
const REGISTRY_INDEX_CACHE_DDL = `
|
|
155
|
-
CREATE TABLE IF NOT EXISTS registry_index_cache (
|
|
156
|
-
registry_url TEXT PRIMARY KEY,
|
|
157
|
-
fetched_at TEXT NOT NULL,
|
|
158
|
-
etag TEXT,
|
|
159
|
-
last_modified TEXT,
|
|
160
|
-
index_json TEXT NOT NULL DEFAULT '{}'
|
|
161
|
-
);
|
|
162
|
-
|
|
163
|
-
CREATE INDEX IF NOT EXISTS idx_registry_cache_fetched
|
|
164
|
-
ON registry_index_cache(fetched_at);
|
|
165
|
-
`;
|
|
166
|
-
/** A row backed up out of the legacy `usage_events` table during a version upgrade. */
|
|
167
|
-
function ensureSchema(db, embeddingDim) {
|
|
168
|
-
// Create meta table first so we can check version
|
|
169
|
-
db.exec(`
|
|
170
|
-
CREATE TABLE IF NOT EXISTS index_meta (
|
|
171
|
-
key TEXT PRIMARY KEY,
|
|
172
|
-
value TEXT NOT NULL
|
|
173
|
-
);
|
|
174
|
-
`);
|
|
175
|
-
// index.db is a fully regenerable derived cache, so its schema is built
|
|
176
|
-
// idempotently below: every table is CREATE … IF NOT EXISTS and column
|
|
177
|
-
// additions go through guarded ALTERs (ensureDerivedFromColumn) and targeted
|
|
178
|
-
// migrations (migrateGraphFilesSchema / migrateGraphDataFromLegacy). Opening a
|
|
179
|
-
// database with an older or partial schema converges it forward WITHOUT ever
|
|
180
|
-
// dropping data — there is intentionally no "nuclear drop the whole index on a
|
|
181
|
-
// DB_VERSION mismatch" path (a destructive design the regenerable index never
|
|
182
|
-
// needed, and whose pre-drop data-dir backup it required). A genuinely
|
|
183
|
-
// incompatible change is handled by an additive/targeted migration; the few
|
|
184
|
-
// derived tables that ever must be rebuilt are regenerated by `akm index`.
|
|
185
|
-
db.exec(`
|
|
186
|
-
CREATE TABLE IF NOT EXISTS entries (
|
|
187
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
188
|
-
entry_key TEXT NOT NULL UNIQUE,
|
|
189
|
-
dir_path TEXT NOT NULL,
|
|
190
|
-
file_path TEXT NOT NULL,
|
|
191
|
-
stash_dir TEXT NOT NULL,
|
|
192
|
-
entry_json TEXT NOT NULL,
|
|
193
|
-
search_text TEXT NOT NULL,
|
|
194
|
-
entry_type TEXT NOT NULL,
|
|
195
|
-
derived_from TEXT
|
|
196
|
-
);
|
|
197
|
-
|
|
198
|
-
CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
|
|
199
|
-
CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
|
|
200
|
-
CREATE INDEX IF NOT EXISTS idx_entries_file_path ON entries(file_path);
|
|
201
|
-
`);
|
|
202
|
-
// Phase 5A / DB v17: backfill `derived_from` column + index on databases
|
|
203
|
-
// that were created at v17 fresh OR carry a partial v17 schema (a DB whose
|
|
204
|
-
// `index_meta.version` was bumped to 17 but whose `entries` table still
|
|
205
|
-
// lacks the column — this happens when a previous v17 binary opened a
|
|
206
|
-
// pre-v17 DB without taking the upgrade path because no version mismatch
|
|
207
|
-
// was seen at boot). The PRAGMA-then-ALTER guard runs unconditionally so
|
|
208
|
-
// both fresh and partial schemas converge. The CREATE INDEX for
|
|
209
|
-
// `derived_from` MUST run after this helper so we never reference a
|
|
210
|
-
// column that has not yet been added on partial schemas.
|
|
211
|
-
ensureDerivedFromColumn(db);
|
|
212
|
-
// Validated WorkflowDocument JSON, one row per indexed workflow entry.
|
|
213
|
-
// Pure index data — fully rebuilt on each `akm index`. ON DELETE CASCADE
|
|
214
|
-
// means clearing entries (full rebuild or per-dir delete) drops these too.
|
|
215
|
-
db.exec(`
|
|
216
|
-
CREATE TABLE IF NOT EXISTS workflow_documents (
|
|
217
|
-
entry_id INTEGER PRIMARY KEY REFERENCES entries(id) ON DELETE CASCADE,
|
|
218
|
-
schema_version INTEGER NOT NULL,
|
|
219
|
-
document_json TEXT NOT NULL,
|
|
220
|
-
source_path TEXT NOT NULL,
|
|
221
|
-
source_hash TEXT NOT NULL,
|
|
222
|
-
updated_at TEXT NOT NULL
|
|
223
|
-
);
|
|
224
|
-
|
|
225
|
-
CREATE INDEX IF NOT EXISTS idx_workflow_documents_source_path
|
|
226
|
-
ON workflow_documents(source_path);
|
|
227
|
-
`);
|
|
228
|
-
// Set version immediately after table creation so a crash before the end of
|
|
229
|
-
// ensureSchema() does not leave the database in a versionless state on next open.
|
|
230
|
-
const versionAfterCreate = getMeta(db, "version");
|
|
231
|
-
if (!versionAfterCreate) {
|
|
232
|
-
setMeta(db, "version", String(DB_VERSION));
|
|
233
|
-
}
|
|
234
|
-
// BLOB-based embedding storage (always available, no sqlite-vec needed)
|
|
235
|
-
db.exec(`
|
|
236
|
-
CREATE TABLE IF NOT EXISTS embeddings (
|
|
237
|
-
id INTEGER PRIMARY KEY,
|
|
238
|
-
embedding BLOB NOT NULL,
|
|
239
|
-
FOREIGN KEY (id) REFERENCES entries(id)
|
|
240
|
-
);
|
|
241
|
-
`);
|
|
242
|
-
// FTS5 table — multi-column with per-field weighting via bm25()
|
|
243
|
-
const ftsExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_fts'").get();
|
|
244
|
-
if (!ftsExists) {
|
|
245
|
-
db.exec(`
|
|
246
|
-
CREATE VIRTUAL TABLE entries_fts USING fts5(
|
|
247
|
-
entry_id UNINDEXED,
|
|
248
|
-
name,
|
|
249
|
-
description,
|
|
250
|
-
tags,
|
|
251
|
-
hints,
|
|
252
|
-
content,
|
|
253
|
-
tokenize='porter unicode61'
|
|
254
|
-
);
|
|
255
|
-
`);
|
|
256
|
-
}
|
|
257
|
-
// Usage events table — created by ensureUsageEventsSchema() at runtime.
|
|
258
|
-
// Utility scores table (aggregated per-entry utility metrics)
|
|
259
|
-
db.exec(`
|
|
260
|
-
CREATE TABLE IF NOT EXISTS utility_scores (
|
|
261
|
-
entry_id INTEGER PRIMARY KEY,
|
|
262
|
-
utility REAL NOT NULL DEFAULT 0,
|
|
263
|
-
show_count INTEGER NOT NULL DEFAULT 0,
|
|
264
|
-
search_count INTEGER NOT NULL DEFAULT 0,
|
|
265
|
-
select_rate REAL NOT NULL DEFAULT 0,
|
|
266
|
-
last_used_at TEXT,
|
|
267
|
-
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
268
|
-
FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
|
|
269
|
-
);
|
|
270
|
-
`);
|
|
271
|
-
// Per-project scoped utility scores — tracks usage per (entry, cwd-anchor)
|
|
272
|
-
// so assets useful in project A don't pollute rankings in project B.
|
|
273
|
-
// The global utility_scores table is preserved as a fallback / cold-start aid.
|
|
274
|
-
db.exec(`
|
|
275
|
-
CREATE TABLE IF NOT EXISTS utility_scores_scoped (
|
|
276
|
-
entry_id INTEGER NOT NULL,
|
|
277
|
-
scope_key TEXT NOT NULL,
|
|
278
|
-
utility REAL NOT NULL DEFAULT 0,
|
|
279
|
-
last_used_at INTEGER NOT NULL,
|
|
280
|
-
PRIMARY KEY (entry_id, scope_key)
|
|
281
|
-
);
|
|
282
|
-
CREATE INDEX IF NOT EXISTS idx_utility_scores_scoped_entry_id
|
|
283
|
-
ON utility_scores_scoped(entry_id);
|
|
284
|
-
`);
|
|
285
|
-
db.exec(`
|
|
286
|
-
CREATE TABLE IF NOT EXISTS index_dir_state (
|
|
287
|
-
dir_path TEXT PRIMARY KEY,
|
|
288
|
-
file_set_hash TEXT NOT NULL,
|
|
289
|
-
file_mtime_max_ms REAL NOT NULL,
|
|
290
|
-
reason TEXT NOT NULL,
|
|
291
|
-
updated_at TEXT NOT NULL
|
|
292
|
-
);
|
|
293
|
-
`);
|
|
294
|
-
// LLM enrichment result cache. Stores a SHA-256 body hash and the JSON
|
|
295
|
-
// result for each asset so that subsequent `akm index --enrich` runs can
|
|
296
|
-
// skip the LLM call when the body hasn't changed. The cache is keyed by
|
|
297
|
-
// a stable asset_ref string (e.g. the absolute file path for graph/memory
|
|
298
|
-
// passes, or `entryKey:passId` for the metadata-enhance pass).
|
|
299
|
-
// Entries are cleaned up when assets are removed or --re-enrich is used.
|
|
300
|
-
db.exec(`
|
|
301
|
-
CREATE TABLE IF NOT EXISTS llm_enrichment_cache (
|
|
302
|
-
asset_ref TEXT NOT NULL,
|
|
303
|
-
cache_variant TEXT NOT NULL,
|
|
304
|
-
body_hash TEXT NOT NULL,
|
|
305
|
-
result_json TEXT NOT NULL,
|
|
306
|
-
updated_at INTEGER NOT NULL,
|
|
307
|
-
PRIMARY KEY (asset_ref, cache_variant)
|
|
308
|
-
);
|
|
309
|
-
|
|
310
|
-
CREATE INDEX IF NOT EXISTS idx_llm_cache_updated
|
|
311
|
-
ON llm_enrichment_cache(updated_at);
|
|
312
|
-
`);
|
|
313
|
-
// Graph extraction tables — schema v4 ((stash_root, file_path, body_hash) PK).
|
|
314
|
-
//
|
|
315
|
-
// graph_files is self-keyed on (stash_root, file_path, body_hash) and is NO
|
|
316
|
-
// LONGER tied to entries.id. This is the #624-P1 win: deleting and
|
|
317
|
-
// re-inserting an entries row during a reindex no longer cascade-wipes the
|
|
318
|
-
// extracted graph — as long as the file's body_hash is unchanged, the graph
|
|
319
|
-
// data survives. body_hash is part of the PK so a content change yields a
|
|
320
|
-
// distinct key; a UNIQUE index on (stash_root, file_path) still enforces
|
|
321
|
-
// exactly one graph_files row per path (delete-then-insert on a hash change).
|
|
322
|
-
//
|
|
323
|
-
// graph_file_entities and graph_file_relations carry (stash_root, file_path,
|
|
324
|
-
// body_hash) and declare a composite FK -> graph_files ON DELETE CASCADE so
|
|
325
|
-
// child rows are removed when a graph_files row is replaced.
|
|
326
|
-
//
|
|
327
|
-
// #624-P1 targeted migration: an existing DB may still hold the OLD graph_files
|
|
328
|
-
// (entry_id PK). SQLite can't ALTER a primary key, so we RENAME the 3 graph
|
|
329
|
-
// tables aside (→ *_legacy) here — ONLY the graph tables, never the index/
|
|
330
|
-
// embeddings — then the CREATE block below builds the new shape, then
|
|
331
|
-
// migrateGraphDataFromLegacy() copies the data across so the graph is PRESERVED
|
|
332
|
-
// (not re-extracted).
|
|
333
|
-
migrateGraphFilesSchema(db);
|
|
334
|
-
db.exec(`
|
|
335
|
-
CREATE TABLE IF NOT EXISTS graph_meta (
|
|
336
|
-
stash_root TEXT PRIMARY KEY,
|
|
337
|
-
schema_version INTEGER NOT NULL,
|
|
338
|
-
generated_at TEXT NOT NULL,
|
|
339
|
-
considered_files INTEGER NOT NULL DEFAULT 0,
|
|
340
|
-
extracted_files INTEGER NOT NULL DEFAULT 0,
|
|
341
|
-
entity_count INTEGER NOT NULL DEFAULT 0,
|
|
342
|
-
relation_count INTEGER NOT NULL DEFAULT 0,
|
|
343
|
-
extraction_coverage REAL NOT NULL DEFAULT 0,
|
|
344
|
-
density REAL NOT NULL DEFAULT 0,
|
|
345
|
-
extractor_id TEXT,
|
|
346
|
-
extraction_run_id TEXT,
|
|
347
|
-
model TEXT,
|
|
348
|
-
prompt_version TEXT,
|
|
349
|
-
batch_size INTEGER,
|
|
350
|
-
cache_hits INTEGER NOT NULL DEFAULT 0,
|
|
351
|
-
cache_misses INTEGER NOT NULL DEFAULT 0,
|
|
352
|
-
truncation_count INTEGER NOT NULL DEFAULT 0,
|
|
353
|
-
failure_count INTEGER NOT NULL DEFAULT 0
|
|
354
|
-
);
|
|
355
|
-
|
|
356
|
-
CREATE TABLE IF NOT EXISTS graph_files (
|
|
357
|
-
stash_root TEXT NOT NULL,
|
|
358
|
-
file_path TEXT NOT NULL,
|
|
359
|
-
file_order INTEGER NOT NULL,
|
|
360
|
-
file_type TEXT NOT NULL,
|
|
361
|
-
body_hash TEXT NOT NULL,
|
|
362
|
-
confidence REAL,
|
|
363
|
-
status TEXT NOT NULL DEFAULT 'extracted',
|
|
364
|
-
reason TEXT,
|
|
365
|
-
extraction_run_id TEXT,
|
|
366
|
-
PRIMARY KEY (stash_root, file_path, body_hash)
|
|
367
|
-
);
|
|
368
|
-
|
|
369
|
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_files_path
|
|
370
|
-
ON graph_files(stash_root, file_path);
|
|
371
|
-
|
|
372
|
-
CREATE INDEX IF NOT EXISTS idx_graph_files_stash_order
|
|
373
|
-
ON graph_files(stash_root, file_order);
|
|
374
|
-
|
|
375
|
-
CREATE TABLE IF NOT EXISTS graph_file_entities (
|
|
376
|
-
stash_root TEXT NOT NULL,
|
|
377
|
-
file_path TEXT NOT NULL,
|
|
378
|
-
body_hash TEXT NOT NULL,
|
|
379
|
-
entity_order INTEGER NOT NULL,
|
|
380
|
-
entity_norm TEXT NOT NULL,
|
|
381
|
-
entity TEXT NOT NULL,
|
|
382
|
-
PRIMARY KEY (stash_root, file_path, body_hash, entity_order),
|
|
383
|
-
FOREIGN KEY (stash_root, file_path, body_hash)
|
|
384
|
-
REFERENCES graph_files(stash_root, file_path, body_hash) ON DELETE CASCADE
|
|
385
|
-
);
|
|
386
|
-
|
|
387
|
-
CREATE INDEX IF NOT EXISTS idx_graph_file_entities_entity_norm
|
|
388
|
-
ON graph_file_entities(stash_root, entity_norm);
|
|
389
|
-
|
|
390
|
-
CREATE TABLE IF NOT EXISTS graph_file_relations (
|
|
391
|
-
stash_root TEXT NOT NULL,
|
|
392
|
-
file_path TEXT NOT NULL,
|
|
393
|
-
body_hash TEXT NOT NULL,
|
|
394
|
-
relation_order INTEGER NOT NULL,
|
|
395
|
-
from_entity_norm TEXT NOT NULL,
|
|
396
|
-
from_entity TEXT NOT NULL,
|
|
397
|
-
to_entity_norm TEXT NOT NULL,
|
|
398
|
-
to_entity TEXT NOT NULL,
|
|
399
|
-
relation_type TEXT,
|
|
400
|
-
confidence REAL,
|
|
401
|
-
PRIMARY KEY (stash_root, file_path, body_hash, relation_order),
|
|
402
|
-
FOREIGN KEY (stash_root, file_path, body_hash)
|
|
403
|
-
REFERENCES graph_files(stash_root, file_path, body_hash) ON DELETE CASCADE
|
|
404
|
-
);
|
|
405
|
-
|
|
406
|
-
-- #624-P3: lazy graph-extraction queue. Standalone table (NO FK to
|
|
407
|
-
-- graph_files — a queued file by definition has no graph row yet).
|
|
408
|
-
-- Idempotent on (stash_root, file_path); drained highest-priority-first.
|
|
409
|
-
-- CREATE TABLE IF NOT EXISTS is the forward migration (no DB_VERSION bump).
|
|
410
|
-
CREATE TABLE IF NOT EXISTS graph_extraction_queue (
|
|
411
|
-
stash_root TEXT NOT NULL,
|
|
412
|
-
file_path TEXT NOT NULL,
|
|
413
|
-
body_hash TEXT NOT NULL,
|
|
414
|
-
queued_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
415
|
-
priority INTEGER NOT NULL DEFAULT 0,
|
|
416
|
-
PRIMARY KEY (stash_root, file_path)
|
|
417
|
-
);
|
|
418
|
-
|
|
419
|
-
CREATE INDEX IF NOT EXISTS idx_graph_extraction_queue_drain
|
|
420
|
-
ON graph_extraction_queue(stash_root, priority DESC, queued_at);
|
|
421
|
-
`);
|
|
422
|
-
// #624-P1 migration step 2: copy any renamed-aside legacy graph data into the
|
|
423
|
-
// new-shape tables (just created above), then drop the legacy tables. No-op
|
|
424
|
-
// unless migrateGraphFilesSchema renamed a legacy graph_files this open.
|
|
425
|
-
migrateGraphDataFromLegacy(db);
|
|
426
|
-
// FTS-dirty queue. Created here (not lazily on first upsert) so the
|
|
427
|
-
// per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
|
|
428
|
-
// every call — that DDL would fire thousands of times during a full
|
|
429
|
-
// index. See `markFtsDirty` and `rebuildFts({ incremental: true })`.
|
|
430
|
-
db.exec(`
|
|
431
|
-
CREATE TABLE IF NOT EXISTS entries_fts_dirty (
|
|
432
|
-
entry_id INTEGER PRIMARY KEY
|
|
433
|
-
);
|
|
434
|
-
`);
|
|
435
|
-
// sqlite-vec table
|
|
436
|
-
//
|
|
437
|
-
// Dimension contract:
|
|
438
|
-
// - When `embeddingDim` is `undefined`, the caller did NOT request a
|
|
439
|
-
// specific dim. Do not touch `index_meta.embeddingDim` and do not run
|
|
440
|
-
// the dim-change wipe — fall back to the stored dim (or the static
|
|
441
|
-
// default) only when we have to materialise the vec table for the
|
|
442
|
-
// first time. Without this guard, registry-side and other dim-unaware
|
|
443
|
-
// `openDatabase()` callers would silently overwrite the dim-aware
|
|
444
|
-
// improve/index value and oscillate the stored dim.
|
|
445
|
-
// - When `embeddingDim` is a number, the caller explicitly asked for
|
|
446
|
-
// that dim and owns the dim-change/backup/wipe semantics.
|
|
447
|
-
const dimExplicit = embeddingDim !== undefined;
|
|
448
|
-
const effectiveDim = embeddingDim ?? (Number(getMeta(db, "embeddingDim")) || EMBEDDING_DIM);
|
|
449
|
-
if (isVecAvailable(db)) {
|
|
450
|
-
// Check if stored embedding dimension differs from configured one
|
|
451
|
-
if (dimExplicit) {
|
|
452
|
-
const storedDim = getMeta(db, "embeddingDim");
|
|
453
|
-
if (storedDim && storedDim !== String(embeddingDim)) {
|
|
454
|
-
// Stored vectors are incompatible with the new dimension. Drop the vec
|
|
455
|
-
// table so the block below recreates it at the new width; the BLOB rows
|
|
456
|
-
// go too. Regenerable from markdown — re-embedded by the next index.
|
|
457
|
-
purgeEmbeddings(db, { dropVecTable: true });
|
|
458
|
-
}
|
|
459
|
-
}
|
|
460
|
-
const vecExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_vec'").get();
|
|
461
|
-
if (!vecExists) {
|
|
462
|
-
if (!Number.isInteger(effectiveDim) || effectiveDim <= 0 || effectiveDim > 4096) {
|
|
463
|
-
throw new Error(`Invalid embedding dimension: ${effectiveDim}`);
|
|
464
|
-
}
|
|
465
|
-
db.exec(`
|
|
466
|
-
CREATE VIRTUAL TABLE entries_vec USING vec0(
|
|
467
|
-
id INTEGER PRIMARY KEY,
|
|
468
|
-
embedding FLOAT[${effectiveDim}]
|
|
469
|
-
);
|
|
470
|
-
`);
|
|
471
|
-
}
|
|
472
|
-
if (dimExplicit) {
|
|
473
|
-
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
474
|
-
}
|
|
475
|
-
}
|
|
476
|
-
else {
|
|
477
|
-
// Also purge BLOB embeddings on dimension change (JS fallback path).
|
|
478
|
-
// When sqlite-vec is unavailable, entries_vec doesn't exist but the BLOB
|
|
479
|
-
// embeddings table still stores vectors. If the configured dimension
|
|
480
|
-
// changes, those stored BLOBs become silently incompatible.
|
|
481
|
-
if (dimExplicit) {
|
|
482
|
-
const storedDim = getMeta(db, "embeddingDim");
|
|
483
|
-
if (storedDim && storedDim !== String(embeddingDim)) {
|
|
484
|
-
// JS-fallback path: no vec table, just clear the stale BLOB vectors.
|
|
485
|
-
purgeEmbeddings(db);
|
|
486
|
-
}
|
|
487
|
-
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
488
|
-
}
|
|
489
|
-
}
|
|
490
|
-
// Usage telemetry table
|
|
491
|
-
ensureUsageEventsSchema(db);
|
|
492
|
-
// Registry index cache table — caches remote registry index documents so
|
|
493
|
-
// `akm search` does not hit the network on every invocation.
|
|
494
|
-
db.exec(REGISTRY_INDEX_CACHE_DDL);
|
|
495
|
-
}
|
|
496
113
|
/**
|
|
497
114
|
* Purge stored embeddings (BLOB rows in `embeddings`, plus the `entries_vec`
|
|
498
115
|
* virtual table) and mark the index as embedding-free. The single place that
|
|
@@ -610,129 +227,6 @@ function getUpsertStmts(db) {
|
|
|
610
227
|
upsertStmtsByDb.set(db, stmts);
|
|
611
228
|
return stmts;
|
|
612
229
|
}
|
|
613
|
-
/**
|
|
614
|
-
* Phase 5A / DB v17 schema guard.
|
|
615
|
-
*
|
|
616
|
-
* Ensures the `entries.derived_from` column + index exist on the open
|
|
617
|
-
* connection. Called from `ensureSchema()` after the entries CREATE so that
|
|
618
|
-
* legacy databases (created against a pre-v17 binary) still gain the new column
|
|
619
|
-
* without data loss. Idempotent: a `PRAGMA table_info` lookup gates the ALTER.
|
|
620
|
-
*/
|
|
621
|
-
function ensureDerivedFromColumn(db) {
|
|
622
|
-
bestEffort(() => {
|
|
623
|
-
const cols = db.prepare("PRAGMA table_info(entries)").all();
|
|
624
|
-
const hasColumn = cols.some((c) => c.name === "derived_from");
|
|
625
|
-
if (!hasColumn) {
|
|
626
|
-
db.exec("ALTER TABLE entries ADD COLUMN derived_from TEXT");
|
|
627
|
-
}
|
|
628
|
-
// Index creation is idempotent on its own; safe to call unconditionally.
|
|
629
|
-
db.exec("CREATE INDEX IF NOT EXISTS idx_entries_derived_from ON entries(derived_from)");
|
|
630
|
-
}, "entries table may not exist on a brand-new DB before CREATE — caller is responsible");
|
|
631
|
-
}
|
|
632
|
-
/**
|
|
633
|
-
* Returns true when a table exists in the current database.
|
|
634
|
-
*/
|
|
635
|
-
function tableExists(db, name) {
|
|
636
|
-
const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1").get(name);
|
|
637
|
-
return row !== undefined && row !== null;
|
|
638
|
-
}
|
|
639
|
-
/**
|
|
640
|
-
* #624-P1 targeted graph-schema migration — STEP 1 of 2 (rename).
|
|
641
|
-
*
|
|
642
|
-
* graph_files was re-keyed from `entry_id INTEGER PRIMARY KEY REFERENCES
|
|
643
|
-
* entries(id)` to a self-contained `(stash_root, file_path, body_hash)` PK.
|
|
644
|
-
* SQLite cannot ALTER a primary key, so an existing DB carrying the OLD shape
|
|
645
|
-
* has its 3 graph tables RENAMED to `*_legacy` here; ensureSchema's CREATE block
|
|
646
|
-
* then builds the new-shape tables, and {@link migrateGraphDataFromLegacy} COPIES
|
|
647
|
-
* the data across before dropping the legacy tables. The graph is preserved —
|
|
648
|
-
* NOT re-extracted (re-extraction is ~19s/file of LLM work).
|
|
649
|
-
*
|
|
650
|
-
* Crucially this is GRAPH-SCOPED: it touches ONLY the graph tables, never the
|
|
651
|
-
* index / embeddings / enrichment cache. So users keep their (expensive)
|
|
652
|
-
* embeddings instead of being forced into a full re-embed by a DB_VERSION bump.
|
|
653
|
-
*
|
|
654
|
-
* Detection: the old schema has an `entry_id` column on graph_files. Fresh DBs
|
|
655
|
-
* (no graph_files yet) and already-migrated DBs (no entry_id column) are no-ops.
|
|
656
|
-
* Idempotent.
|
|
657
|
-
*/
|
|
658
|
-
function migrateGraphFilesSchema(db) {
|
|
659
|
-
bestEffort(() => {
|
|
660
|
-
const cols = db.prepare("PRAGMA table_info(graph_files)").all();
|
|
661
|
-
const isLegacyShape = cols.some((c) => c.name === "entry_id");
|
|
662
|
-
if (!isLegacyShape)
|
|
663
|
-
return;
|
|
664
|
-
// A previous interrupted migration may have left *_legacy behind — drop those
|
|
665
|
-
// husks first so the rename below doesn't collide.
|
|
666
|
-
db.exec("DROP TABLE IF EXISTS graph_file_relations_legacy");
|
|
667
|
-
db.exec("DROP TABLE IF EXISTS graph_file_entities_legacy");
|
|
668
|
-
db.exec("DROP TABLE IF EXISTS graph_files_legacy");
|
|
669
|
-
// Rename the 3 entry_id-keyed tables aside. graph_meta is unchanged (stash_root
|
|
670
|
-
// key) so it is left in place. ALTER … RENAME auto-updates child FK refs in
|
|
671
|
-
// SQLite ≥3.25, which is fine — the legacy children are dropped after the copy.
|
|
672
|
-
db.exec("ALTER TABLE graph_files RENAME TO graph_files_legacy");
|
|
673
|
-
if (tableExists(db, "graph_file_entities")) {
|
|
674
|
-
db.exec("ALTER TABLE graph_file_entities RENAME TO graph_file_entities_legacy");
|
|
675
|
-
}
|
|
676
|
-
if (tableExists(db, "graph_file_relations")) {
|
|
677
|
-
db.exec("ALTER TABLE graph_file_relations RENAME TO graph_file_relations_legacy");
|
|
678
|
-
}
|
|
679
|
-
}, "graph_files may not exist on a brand-new DB before CREATE — caller is responsible");
|
|
680
|
-
}
|
|
681
|
-
/**
|
|
682
|
-
* #624-P1 targeted graph-schema migration — STEP 2 of 2 (copy + drop legacy).
|
|
683
|
-
*
|
|
684
|
-
* Runs AFTER the graph CREATE TABLE block, so the new-shape tables exist. Copies
|
|
685
|
-
* every legacy row into the re-keyed tables — the old tables already carry
|
|
686
|
-
* (stash_root, file_path, body_hash) next to entry_id, so the projection is a
|
|
687
|
-
* straight column copy (children JOIN back to graph_files_legacy to resolve the
|
|
688
|
-
* composite key from their entry_id). Then drops the `*_legacy` tables.
|
|
689
|
-
*
|
|
690
|
-
* Best-effort: a copy failure (e.g. a pre-body_hash legacy schema) is tolerated,
|
|
691
|
-
* and the legacy tables are dropped regardless so they never linger. Rows whose
|
|
692
|
-
* body_hash is null/empty can't form the new PK and are skipped (they re-extract).
|
|
693
|
-
*/
|
|
694
|
-
function migrateGraphDataFromLegacy(db) {
|
|
695
|
-
if (!tableExists(db, "graph_files_legacy"))
|
|
696
|
-
return;
|
|
697
|
-
let migratedFiles = 0;
|
|
698
|
-
bestEffort(() => {
|
|
699
|
-
db.transaction(() => {
|
|
700
|
-
const res = db
|
|
701
|
-
.prepare(`INSERT OR IGNORE INTO graph_files
|
|
702
|
-
(stash_root, file_path, body_hash, file_order, file_type, confidence, status, reason, extraction_run_id)
|
|
703
|
-
SELECT stash_root, file_path, body_hash, file_order, file_type, confidence, status, reason, extraction_run_id
|
|
704
|
-
FROM graph_files_legacy
|
|
705
|
-
WHERE body_hash IS NOT NULL AND body_hash != ''`)
|
|
706
|
-
.run();
|
|
707
|
-
migratedFiles = Number(res.changes);
|
|
708
|
-
if (tableExists(db, "graph_file_entities_legacy")) {
|
|
709
|
-
db.exec(`INSERT OR IGNORE INTO graph_file_entities
|
|
710
|
-
(stash_root, file_path, body_hash, entity_order, entity_norm, entity)
|
|
711
|
-
SELECT gf.stash_root, gf.file_path, gf.body_hash, e.entity_order, e.entity_norm, e.entity
|
|
712
|
-
FROM graph_file_entities_legacy e
|
|
713
|
-
JOIN graph_files_legacy gf ON gf.entry_id = e.entry_id
|
|
714
|
-
WHERE gf.body_hash IS NOT NULL AND gf.body_hash != ''`);
|
|
715
|
-
}
|
|
716
|
-
if (tableExists(db, "graph_file_relations_legacy")) {
|
|
717
|
-
db.exec(`INSERT OR IGNORE INTO graph_file_relations
|
|
718
|
-
(stash_root, file_path, body_hash, relation_order, from_entity_norm, from_entity, to_entity_norm, to_entity, relation_type, confidence)
|
|
719
|
-
SELECT gf.stash_root, gf.file_path, gf.body_hash, r.relation_order, r.from_entity_norm, r.from_entity, r.to_entity_norm, r.to_entity, r.relation_type, r.confidence
|
|
720
|
-
FROM graph_file_relations_legacy r
|
|
721
|
-
JOIN graph_files_legacy gf ON gf.entry_id = r.entry_id
|
|
722
|
-
WHERE gf.body_hash IS NOT NULL AND gf.body_hash != ''`);
|
|
723
|
-
}
|
|
724
|
-
})();
|
|
725
|
-
}, "graph data migration is best-effort; legacy tables are dropped regardless below");
|
|
726
|
-
// Always drop the legacy tables (children first), migrated or not.
|
|
727
|
-
bestEffort(() => {
|
|
728
|
-
db.exec("DROP TABLE IF EXISTS graph_file_relations_legacy");
|
|
729
|
-
db.exec("DROP TABLE IF EXISTS graph_file_entities_legacy");
|
|
730
|
-
db.exec("DROP TABLE IF EXISTS graph_files_legacy");
|
|
731
|
-
}, "drop legacy graph tables after migration");
|
|
732
|
-
if (migratedFiles > 0) {
|
|
733
|
-
warn(`[akm] graph index re-keyed (#624): migrated ${migratedFiles} extracted file(s) to the new schema — no re-extraction needed. Index + embeddings untouched.`);
|
|
734
|
-
}
|
|
735
|
-
}
|
|
736
230
|
/**
|
|
737
231
|
* Phase 5A / Advantage D5: look up the derived-memory child row whose
|
|
738
232
|
* `derived_from` column matches `parentRef` (e.g. `"memory:claude-prefs"`).
|
|
@@ -747,7 +241,7 @@ export function getDerivedForParent(db, parentRef) {
|
|
|
747
241
|
return null;
|
|
748
242
|
try {
|
|
749
243
|
const row = db
|
|
750
|
-
.prepare(`SELECT
|
|
244
|
+
.prepare(`SELECT ${ENTRY_COLUMNS}
|
|
751
245
|
FROM entries
|
|
752
246
|
WHERE derived_from = ?
|
|
753
247
|
ORDER BY id DESC
|
|
@@ -755,23 +249,7 @@ export function getDerivedForParent(db, parentRef) {
|
|
|
755
249
|
.get(parentRef);
|
|
756
250
|
if (!row)
|
|
757
251
|
return null;
|
|
758
|
-
|
|
759
|
-
try {
|
|
760
|
-
entry = JSON.parse(row.entry_json);
|
|
761
|
-
}
|
|
762
|
-
catch {
|
|
763
|
-
warn(`[db] getDerivedForParent: skipping entry id=${row.id} — corrupt entry_json`);
|
|
764
|
-
return null;
|
|
765
|
-
}
|
|
766
|
-
return {
|
|
767
|
-
id: row.id,
|
|
768
|
-
entryKey: row.entry_key,
|
|
769
|
-
dirPath: row.dir_path,
|
|
770
|
-
filePath: row.file_path,
|
|
771
|
-
stashDir: row.stash_dir,
|
|
772
|
-
entry,
|
|
773
|
-
searchText: row.search_text,
|
|
774
|
-
};
|
|
252
|
+
return rowToIndexedEntry(row, "getDerivedForParent");
|
|
775
253
|
}
|
|
776
254
|
catch {
|
|
777
255
|
/* `derived_from` column may not exist on legacy DBs that haven't been
|
|
@@ -1115,65 +593,38 @@ export function searchFts(db, query, limit, entryType, excludeTypes) {
|
|
|
1115
593
|
return [];
|
|
1116
594
|
return runFtsQuery(db, prefixQuery, limit, entryType, excludeTypes);
|
|
1117
595
|
}
|
|
1118
|
-
/**
|
|
1119
|
-
* Build a prefix query from an FTS5 query string by appending `*` to each
|
|
1120
|
-
* token that is 3+ characters long. Tokens shorter than 3 characters are
|
|
1121
|
-
* kept as-is (no prefix expansion) to avoid overly broad matches.
|
|
1122
|
-
*
|
|
1123
|
-
* Returns null if no tokens qualify for prefix expansion.
|
|
1124
|
-
*/
|
|
1125
|
-
function buildPrefixQuery(ftsQuery) {
|
|
1126
|
-
const tokens = ftsQuery.split(/\s+/).filter(Boolean);
|
|
1127
|
-
let hasPrefix = false;
|
|
1128
|
-
const prefixTokens = tokens.map((t) => {
|
|
1129
|
-
if (t.length >= 3) {
|
|
1130
|
-
hasPrefix = true;
|
|
1131
|
-
return `${t}*`;
|
|
1132
|
-
}
|
|
1133
|
-
return t;
|
|
1134
|
-
});
|
|
1135
|
-
if (!hasPrefix)
|
|
1136
|
-
return null;
|
|
1137
|
-
return prefixTokens.join(" ");
|
|
1138
|
-
}
|
|
1139
596
|
function runFtsQuery(db, ftsQuery, limit, entryType, excludeTypes) {
|
|
1140
|
-
let sql;
|
|
1141
|
-
let params;
|
|
1142
597
|
// #627 — exclude-type clause. Only applies on the untyped ('any') path; an
|
|
1143
598
|
// explicit include filter (entryType) already narrows to a single type, so
|
|
1144
599
|
// exclusion is redundant there. An empty list skips the clause entirely
|
|
1145
600
|
// (never emit `NOT IN ()`, which is a SQL error / always-false).
|
|
1146
601
|
const excludes = excludeTypes && excludeTypes.length > 0 ? excludeTypes : [];
|
|
1147
|
-
//
|
|
1148
|
-
//
|
|
602
|
+
// The typed and untyped paths differ ONLY by one WHERE clause (an entry_type
|
|
603
|
+
// equality vs. an optional NOT IN exclusion) and their param order — the
|
|
604
|
+
// SELECT/JOIN/ORDER/LIMIT is shared, so build it once. Join on integer
|
|
605
|
+
// entry_id directly (no CAST; we store integer). bm25() per-column weights:
|
|
606
|
+
// entry_id(0), name(10), description(5), tags(3), hints(2), content(1).
|
|
607
|
+
let filterClause;
|
|
608
|
+
let params;
|
|
1149
609
|
if (entryType && entryType !== "any") {
|
|
1150
|
-
|
|
1151
|
-
SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
|
|
1152
|
-
bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
|
|
1153
|
-
FROM entries_fts f
|
|
1154
|
-
JOIN entries e ON e.id = f.entry_id
|
|
1155
|
-
WHERE entries_fts MATCH ?
|
|
1156
|
-
AND e.entry_type = ?
|
|
1157
|
-
ORDER BY bm25Score, e.id ASC
|
|
1158
|
-
LIMIT ?
|
|
1159
|
-
`;
|
|
610
|
+
filterClause = "AND e.entry_type = ?";
|
|
1160
611
|
params = [ftsQuery, entryType, limit];
|
|
1161
612
|
}
|
|
1162
613
|
else {
|
|
1163
|
-
|
|
1164
|
-
sql = `
|
|
1165
|
-
SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
|
|
1166
|
-
bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
|
|
1167
|
-
FROM entries_fts f
|
|
1168
|
-
JOIN entries e ON e.id = f.entry_id
|
|
1169
|
-
WHERE entries_fts MATCH ?
|
|
1170
|
-
${excludeClause}
|
|
1171
|
-
ORDER BY bm25Score, e.id ASC
|
|
1172
|
-
LIMIT ?
|
|
1173
|
-
`;
|
|
614
|
+
filterClause = excludes.length > 0 ? `AND e.entry_type NOT IN (${excludes.map(() => "?").join(", ")})` : "";
|
|
1174
615
|
// Param order: MATCH, then the NOT IN values, then LIMIT.
|
|
1175
616
|
params = [ftsQuery, ...excludes, limit];
|
|
1176
617
|
}
|
|
618
|
+
const sql = `
|
|
619
|
+
SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
|
|
620
|
+
bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
|
|
621
|
+
FROM entries_fts f
|
|
622
|
+
JOIN entries e ON e.id = f.entry_id
|
|
623
|
+
WHERE entries_fts MATCH ?
|
|
624
|
+
${filterClause}
|
|
625
|
+
ORDER BY bm25Score, e.id ASC
|
|
626
|
+
LIMIT ?
|
|
627
|
+
`;
|
|
1177
628
|
try {
|
|
1178
629
|
const rows = db.prepare(sql).all(...params);
|
|
1179
630
|
// Guard against corrupt JSON — skip the row rather than crashing
|
|
@@ -1201,43 +652,13 @@ function runFtsQuery(db, ftsQuery, limit, entryType, excludeTypes) {
|
|
|
1201
652
|
return [];
|
|
1202
653
|
}
|
|
1203
654
|
}
|
|
1204
|
-
|
|
1205
|
-
// Allow only characters safe in FTS5 queries: letters, digits, underscores,
|
|
1206
|
-
// and whitespace. Everything else (hyphens, dots, quotes, parens, asterisks,
|
|
1207
|
-
// colons, carets, @, !, etc.) is replaced with a space so that compound
|
|
1208
|
-
// identifiers like "code-review" or "k8s.setup" become AND-joined tokens
|
|
1209
|
-
// ("code review", "k8s setup") rather than triggering FTS5 syntax errors.
|
|
1210
|
-
let sanitized = query.replace(/[^a-zA-Z0-9_\s]/g, " ");
|
|
1211
|
-
// Neutralize the NEAR operator (FTS5 proximity syntax)
|
|
1212
|
-
sanitized = sanitized.replace(/\bNEAR\b/g, " ");
|
|
1213
|
-
const tokens = sanitized.split(/\s+/).filter((t) => t.length >= 1);
|
|
1214
|
-
if (tokens.length === 0)
|
|
1215
|
-
return "";
|
|
1216
|
-
// Use implicit AND (space-separated tokens) for precision. FTS5 treats
|
|
1217
|
-
// space-separated tokens as an implicit AND, matching only rows that
|
|
1218
|
-
// contain ALL terms.
|
|
1219
|
-
return tokens.join(" ");
|
|
1220
|
-
}
|
|
655
|
+
// ── All entries ─────────────────────────────────────────────────────────────
|
|
1221
656
|
function parseEntryRows(rows, context) {
|
|
1222
657
|
const entries = [];
|
|
1223
658
|
for (const row of rows) {
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
}
|
|
1228
|
-
catch {
|
|
1229
|
-
warn(`[db] ${context}: skipping entry id=${row.id} — corrupt entry_json`);
|
|
1230
|
-
continue;
|
|
1231
|
-
}
|
|
1232
|
-
entries.push({
|
|
1233
|
-
id: row.id,
|
|
1234
|
-
entryKey: row.entry_key,
|
|
1235
|
-
dirPath: row.dir_path,
|
|
1236
|
-
filePath: row.file_path,
|
|
1237
|
-
stashDir: row.stash_dir,
|
|
1238
|
-
entry,
|
|
1239
|
-
searchText: row.search_text,
|
|
1240
|
-
});
|
|
659
|
+
const mapped = rowToIndexedEntry(row, context);
|
|
660
|
+
if (mapped)
|
|
661
|
+
entries.push(mapped);
|
|
1241
662
|
}
|
|
1242
663
|
return entries;
|
|
1243
664
|
}
|
|
@@ -1248,16 +669,15 @@ export function getAllEntries(db, entryType, excludeTypes) {
|
|
|
1248
669
|
// list skips the clause (never `NOT IN ()`).
|
|
1249
670
|
const excludes = excludeTypes && excludeTypes.length > 0 ? excludeTypes : [];
|
|
1250
671
|
if (entryType && entryType !== "any") {
|
|
1251
|
-
sql =
|
|
1252
|
-
"SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type = ?";
|
|
672
|
+
sql = `SELECT ${ENTRY_COLUMNS} FROM entries WHERE entry_type = ?`;
|
|
1253
673
|
params = [entryType];
|
|
1254
674
|
}
|
|
1255
675
|
else if (excludes.length > 0) {
|
|
1256
|
-
sql = `SELECT
|
|
676
|
+
sql = `SELECT ${ENTRY_COLUMNS} FROM entries WHERE entry_type NOT IN (${excludes.map(() => "?").join(", ")})`;
|
|
1257
677
|
params = [...excludes];
|
|
1258
678
|
}
|
|
1259
679
|
else {
|
|
1260
|
-
sql =
|
|
680
|
+
sql = `SELECT ${ENTRY_COLUMNS} FROM entries`;
|
|
1261
681
|
params = [];
|
|
1262
682
|
}
|
|
1263
683
|
const rows = db.prepare(sql).all(...params);
|
|
@@ -1350,9 +770,7 @@ export function getEntryById(db, id) {
|
|
|
1350
770
|
return { filePath: row.file_path, entry };
|
|
1351
771
|
}
|
|
1352
772
|
export function getEntriesByDir(db, dirPath) {
|
|
1353
|
-
const rows = db
|
|
1354
|
-
.prepare("SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE dir_path = ?")
|
|
1355
|
-
.all(dirPath);
|
|
773
|
+
const rows = db.prepare(`SELECT ${ENTRY_COLUMNS} FROM entries WHERE dir_path = ?`).all(dirPath);
|
|
1356
774
|
return parseEntryRows(rows, "getEntriesByDir");
|
|
1357
775
|
}
|
|
1358
776
|
/**
|
|
@@ -1633,6 +1051,11 @@ function bareRef(ref) {
|
|
|
1633
1051
|
* entry_ref populated (see logCurateEvent), so curation is a real retrieval
|
|
1634
1052
|
* signal here. Legacy summary-only curate rows with a NULL entry_ref simply
|
|
1635
1053
|
* contribute nothing.
|
|
1054
|
+
*
|
|
1055
|
+
* Machine-sourced events (`source` = 'improve' or 'task') are EXCLUDED: this
|
|
1056
|
+
* count feeds salience/ranking, and pipeline probe traffic counting as demand
|
|
1057
|
+
* creates a self-reinforcing loop (meta-review 05 DRIFT-6). NULL sources
|
|
1058
|
+
* (pre-column rows) count as user demand.
|
|
1636
1059
|
*/
|
|
1637
1060
|
export function getRetrievalCounts(db, refs) {
|
|
1638
1061
|
if (refs.length === 0)
|
|
@@ -1671,6 +1094,7 @@ export function getRetrievalCounts(db, refs) {
|
|
|
1671
1094
|
FROM usage_events
|
|
1672
1095
|
WHERE event_type IN ('search','show','curate')
|
|
1673
1096
|
AND entry_ref IS NOT NULL
|
|
1097
|
+
AND (source IS NULL OR source NOT IN ('improve','task'))
|
|
1674
1098
|
AND CASE
|
|
1675
1099
|
WHEN instr(entry_ref, '//') > 0
|
|
1676
1100
|
THEN substr(entry_ref, instr(entry_ref, '//') + 2)
|
|
@@ -1823,84 +1247,24 @@ export function getEntryByRef(db, type, name) {
|
|
|
1823
1247
|
return db.prepare("SELECT id FROM entries WHERE entry_type = ? AND entry_key = ?").get(type, `${type}:${name}`);
|
|
1824
1248
|
}
|
|
1825
1249
|
/**
|
|
1826
|
-
*
|
|
1827
|
-
*
|
|
1828
|
-
*
|
|
1829
|
-
*
|
|
1830
|
-
*
|
|
1831
|
-
* This replaces the unbounded `-0.03 × negativeCount` delta that could
|
|
1832
|
-
* silently remove high-utility assets from the improvement loop.
|
|
1833
|
-
*/
|
|
1834
|
-
const FEEDBACK_LR = 0.1;
|
|
1835
|
-
/**
|
|
1836
|
-
* Positive reward signal for a single positive feedback event.
|
|
1837
|
-
* Reward 1.0 means "fully correct / helpful".
|
|
1838
|
-
*/
|
|
1839
|
-
const FEEDBACK_REWARD_POSITIVE = 1.0;
|
|
1840
|
-
/**
|
|
1841
|
-
* Negative reward signal for a single negative feedback event.
|
|
1842
|
-
* Reward 0.0 means "not helpful" (lowest MemRL signal).
|
|
1843
|
-
*/
|
|
1844
|
-
const FEEDBACK_REWARD_NEGATIVE = 0.0;
|
|
1845
|
-
/**
|
|
1846
|
-
* Maximum total negative utility delta allowed in a single
|
|
1847
|
-
* `applyFeedbackToUtilityScore` call regardless of negativeCount.
|
|
1848
|
-
*
|
|
1849
|
-
* This caps the per-day negative impact (the function is called once per
|
|
1850
|
-
* feedback event — spamming 10 negatives in one session can move utility
|
|
1851
|
-
* at most `MAX_NEG_DELTA_PER_CALL`). The cap prevents a noisy negative-
|
|
1852
|
-
* feedback stream from silently destroying a high-utility asset's ranking.
|
|
1853
|
-
*/
|
|
1854
|
-
const MAX_NEG_DELTA_PER_CALL = 0.15;
|
|
1855
|
-
/**
|
|
1856
|
-
* Utility threshold below which a review-needed escalation is triggered.
|
|
1857
|
-
* When a previously high-utility asset (≥ HIGH_UTILITY_THRESHOLD) drops
|
|
1858
|
-
* below this value, the caller should create an escalation proposal.
|
|
1859
|
-
*/
|
|
1860
|
-
export const UTILITY_REVIEW_THRESHOLD = 0.5;
|
|
1861
|
-
/**
|
|
1862
|
-
* Utility level considered "high" — assets above this are tracked for
|
|
1863
|
-
* threshold-crossing escalation.
|
|
1864
|
-
*/
|
|
1865
|
-
export const HIGH_UTILITY_THRESHOLD = 0.5;
|
|
1866
|
-
/**
|
|
1867
|
-
* Apply accumulated feedback counts to the utility score of an entry using the
|
|
1868
|
-
* MemRL bounded-step EMA formula (F-5 / #386, arXiv:2601.03192).
|
|
1869
|
-
*
|
|
1870
|
-
* Replaces the previous unbounded `-0.03 × negativeCount` formula with:
|
|
1871
|
-
*
|
|
1872
|
-
* reward = weighted average of positive and negative signals
|
|
1873
|
-
* nextUtil = clamp(currentUtil + lr × (reward − currentUtil), 0, 1)
|
|
1874
|
-
*
|
|
1875
|
-
* The negative impact is additionally capped at {@link MAX_NEG_DELTA_PER_CALL}
|
|
1876
|
-
* to prevent a noisy feedback stream from silently erasing a high-utility asset.
|
|
1250
|
+
* Apply accumulated feedback counts to the utility score of an entry, persisting
|
|
1251
|
+
* the result. The bounded-step EMA policy itself (MemRL, F-5 / #386,
|
|
1252
|
+
* arXiv:2601.03192) lives in {@link computeNextUtility} (feedback/utility-policy);
|
|
1253
|
+
* this function only reads the current utility, applies the policy, and writes
|
|
1254
|
+
* the new value.
|
|
1877
1255
|
*
|
|
1878
1256
|
* A new entry starts at 0.5 (neutral midpoint) before the EMA step is applied.
|
|
1879
|
-
*
|
|
1880
|
-
* Returns a {@link FeedbackUtilityResult} so the caller can detect
|
|
1881
|
-
* previously high-utility asset
|
|
1882
|
-
* an escalation proposal.
|
|
1257
|
+
* When there is no feedback (both counts zero) the score is left untouched — no
|
|
1258
|
+
* DB write. Returns a {@link FeedbackUtilityResult} so the caller can detect a
|
|
1259
|
+
* previously high-utility asset crossing below the review threshold and escalate.
|
|
1883
1260
|
*/
|
|
1884
1261
|
export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negativeCount) {
|
|
1885
1262
|
const existing = getUtilityScore(db, entryId);
|
|
1886
1263
|
const previousUtility = existing?.utility ?? 0.5;
|
|
1264
|
+
const result = computeNextUtility(previousUtility, positiveCount, negativeCount);
|
|
1887
1265
|
if (positiveCount === 0 && negativeCount === 0) {
|
|
1888
|
-
return
|
|
1889
|
-
}
|
|
1890
|
-
const total = positiveCount + negativeCount;
|
|
1891
|
-
// Weighted reward: proportion of positive signals.
|
|
1892
|
-
const reward = positiveCount > 0 && negativeCount === 0
|
|
1893
|
-
? FEEDBACK_REWARD_POSITIVE
|
|
1894
|
-
: negativeCount > 0 && positiveCount === 0
|
|
1895
|
-
? FEEDBACK_REWARD_NEGATIVE
|
|
1896
|
-
: (positiveCount * FEEDBACK_REWARD_POSITIVE + negativeCount * FEEDBACK_REWARD_NEGATIVE) / total;
|
|
1897
|
-
// MemRL bounded-step EMA: lr × (reward − current)
|
|
1898
|
-
let delta = FEEDBACK_LR * (reward - previousUtility);
|
|
1899
|
-
// Per-call negative cap: if delta is negative (net negative feedback), cap it.
|
|
1900
|
-
if (delta < 0) {
|
|
1901
|
-
delta = Math.max(delta, -MAX_NEG_DELTA_PER_CALL);
|
|
1266
|
+
return result;
|
|
1902
1267
|
}
|
|
1903
|
-
const nextUtility = Math.max(0, Math.min(1, previousUtility + delta));
|
|
1904
1268
|
const now = new Date().toISOString();
|
|
1905
1269
|
db.prepare(`
|
|
1906
1270
|
INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
|
|
@@ -1908,9 +1272,8 @@ export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negative
|
|
|
1908
1272
|
ON CONFLICT(entry_id) DO UPDATE SET
|
|
1909
1273
|
utility = ?,
|
|
1910
1274
|
updated_at = ?
|
|
1911
|
-
`).run(entryId, nextUtility, now, now, nextUtility, now);
|
|
1912
|
-
|
|
1913
|
-
return { previousUtility, nextUtility, crossedReviewThreshold };
|
|
1275
|
+
`).run(entryId, result.nextUtility, now, now, result.nextUtility, now);
|
|
1276
|
+
return result;
|
|
1914
1277
|
}
|
|
1915
1278
|
/**
|
|
1916
1279
|
* Re-link detached usage_events to their current entry_ids via entry_ref.
|