akm-cli 0.8.0-rc2 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{.github/CHANGELOG.md → CHANGELOG.md} +238 -3
- package/README.md +22 -6
- package/SECURITY.md +93 -0
- package/dist/assets/help/help-accept.md +12 -0
- package/dist/assets/help/help-improve.md +81 -0
- package/dist/{commands → assets}/help/help-proposals.md +7 -4
- package/dist/assets/help/help-reject.md +11 -0
- package/dist/{output → assets/hints}/cli-hints-full.md +60 -32
- package/dist/{output → assets/hints}/cli-hints-short.md +10 -7
- package/dist/assets/profiles/default.json +15 -0
- package/dist/assets/profiles/graph-refresh.json +13 -0
- package/dist/assets/profiles/memory-focus.json +12 -0
- package/dist/assets/profiles/quick.json +15 -0
- package/dist/assets/profiles/thorough.json +15 -0
- package/dist/assets/prompts/extract-session.md +80 -0
- package/dist/assets/prompts/graph-extract-user-prompt.md +35 -0
- package/dist/assets/tasks/graph-refresh-weekly.yml +10 -0
- package/dist/cli/config-migrate.js +144 -0
- package/dist/cli/config-validate.js +39 -0
- package/dist/cli/confirm.js +73 -0
- package/dist/cli/parse-args.js +93 -3
- package/dist/cli/shared.js +129 -0
- package/dist/cli.js +2141 -1268
- package/dist/commands/add-cli.js +279 -0
- package/dist/commands/agent-dispatch.js +20 -12
- package/dist/commands/agent-support.js +11 -5
- package/dist/commands/completions.js +3 -0
- package/dist/commands/config-cli.js +129 -517
- package/dist/commands/consolidate.js +1557 -147
- package/dist/commands/curate.js +44 -3
- package/dist/commands/db-cli.js +23 -0
- package/dist/commands/distill-promotion-policy.js +5 -3
- package/dist/commands/distill.js +906 -100
- package/dist/commands/env.js +213 -0
- package/dist/commands/eval-cases.js +3 -0
- package/dist/commands/events.js +3 -0
- package/dist/commands/extract-cli.js +127 -0
- package/dist/commands/extract-prompt.js +217 -0
- package/dist/commands/extract.js +477 -0
- package/dist/commands/feedback-cli.js +331 -0
- package/dist/commands/graph.js +260 -5
- package/dist/commands/health.js +1042 -55
- package/dist/commands/history.js +51 -16
- package/dist/commands/improve-auto-accept.js +97 -0
- package/dist/commands/improve-cli.js +236 -0
- package/dist/commands/improve-profiles.js +138 -0
- package/dist/commands/improve-result-file.js +167 -0
- package/dist/commands/improve.js +1736 -346
- package/dist/commands/info.js +26 -28
- package/dist/commands/init.js +49 -1
- package/dist/commands/installed-stashes.js +6 -23
- package/dist/commands/knowledge.js +3 -0
- package/dist/commands/lint/agent-linter.js +3 -0
- package/dist/commands/lint/base-linter.js +199 -5
- package/dist/commands/lint/command-linter.js +3 -0
- package/dist/commands/lint/default-linter.js +3 -0
- package/dist/commands/lint/env-key-rules.js +154 -0
- package/dist/commands/lint/index.js +92 -3
- package/dist/commands/lint/knowledge-linter.js +3 -0
- package/dist/commands/lint/markdown-insertion.js +343 -0
- package/dist/commands/lint/memory-linter.js +3 -0
- package/dist/commands/lint/registry.js +3 -0
- package/dist/commands/lint/skill-linter.js +3 -0
- package/dist/commands/lint/task-linter.js +15 -12
- package/dist/commands/lint/types.js +3 -0
- package/dist/commands/lint/workflow-linter.js +3 -0
- package/dist/commands/lint.js +3 -0
- package/dist/commands/migration-help.js +5 -2
- package/dist/commands/proposal-drain-policies.js +128 -0
- package/dist/commands/proposal-drain.js +477 -0
- package/dist/commands/proposal.js +60 -6
- package/dist/commands/propose.js +24 -19
- package/dist/commands/reflect.js +1004 -94
- package/dist/commands/registry-cli.js +150 -0
- package/dist/commands/registry-search.js +3 -0
- package/dist/commands/remember-cli.js +257 -0
- package/dist/commands/remember.js +15 -6
- package/dist/commands/schema-repair.js +88 -15
- package/dist/commands/search.js +99 -14
- package/dist/commands/secret.js +173 -0
- package/dist/commands/self-update.js +3 -0
- package/dist/commands/show.js +32 -13
- package/dist/commands/source-add.js +7 -35
- package/dist/commands/source-clone.js +3 -0
- package/dist/commands/source-manage.js +3 -0
- package/dist/commands/tasks.js +161 -95
- package/dist/commands/url-checker.js +3 -0
- package/dist/core/action-contributors.js +3 -0
- package/dist/core/asset-ref.js +13 -2
- package/dist/core/asset-registry.js +9 -2
- package/dist/core/asset-serialize.js +88 -0
- package/dist/core/asset-spec.js +61 -5
- package/dist/core/common.js +93 -5
- package/dist/core/concurrent.js +3 -0
- package/dist/core/config-io.js +347 -0
- package/dist/core/config-migration.js +622 -0
- package/dist/core/config-schema.js +558 -0
- package/dist/core/config-sources.js +108 -0
- package/dist/core/config-types.js +4 -0
- package/dist/core/config-walker.js +337 -0
- package/dist/core/config.js +366 -1077
- package/dist/core/errors.js +42 -20
- package/dist/core/events.js +31 -25
- package/dist/core/file-lock.js +104 -0
- package/dist/core/frontmatter.js +75 -10
- package/dist/core/lesson-lint.js +3 -0
- package/dist/core/markdown.js +3 -0
- package/dist/core/memory-belief.js +62 -0
- package/dist/core/memory-contradiction-detect.js +274 -0
- package/dist/core/memory-improve.js +142 -14
- package/dist/core/parse.js +3 -0
- package/dist/core/paths.js +218 -50
- package/dist/core/proposal-quality-validators.js +380 -0
- package/dist/core/proposal-validators.js +11 -3
- package/dist/core/proposals.js +464 -5
- package/dist/core/state-db.js +349 -56
- package/dist/core/text-truncation.js +107 -0
- package/dist/core/time.js +3 -0
- package/dist/core/tty.js +59 -0
- package/dist/core/warn.js +7 -2
- package/dist/core/write-source.js +12 -0
- package/dist/indexer/db-backup.js +391 -0
- package/dist/indexer/db-search.js +136 -28
- package/dist/indexer/db.js +661 -166
- package/dist/indexer/ensure-index.js +3 -0
- package/dist/indexer/file-context.js +3 -0
- package/dist/indexer/graph-boost.js +162 -40
- package/dist/indexer/graph-db.js +241 -51
- package/dist/indexer/graph-dedup.js +3 -7
- package/dist/indexer/graph-extraction.js +242 -149
- package/dist/indexer/index-context.js +3 -9
- package/dist/indexer/indexer.js +86 -16
- package/dist/indexer/llm-cache.js +24 -19
- package/dist/indexer/manifest.js +3 -0
- package/dist/indexer/matchers.js +184 -11
- package/dist/indexer/memory-inference.js +94 -50
- package/dist/indexer/metadata-contributors.js +3 -0
- package/dist/indexer/metadata.js +110 -50
- package/dist/indexer/path-resolver.js +3 -0
- package/dist/indexer/project-context.js +192 -0
- package/dist/indexer/ranking-contributors.js +134 -7
- package/dist/indexer/ranking.js +8 -1
- package/dist/indexer/search-fields.js +5 -9
- package/dist/indexer/search-hit-enrichers.js +91 -2
- package/dist/indexer/search-source.js +20 -1
- package/dist/indexer/semantic-status.js +4 -1
- package/dist/indexer/staleness-detect.js +447 -0
- package/dist/indexer/usage-events.js +12 -9
- package/dist/indexer/walker.js +3 -0
- package/dist/integrations/agent/builders.js +135 -0
- package/dist/integrations/agent/config.js +121 -401
- package/dist/integrations/agent/detect.js +3 -0
- package/dist/integrations/agent/index.js +6 -14
- package/dist/integrations/agent/model-aliases.js +55 -0
- package/dist/integrations/agent/profiles.js +3 -0
- package/dist/integrations/agent/prompts.js +137 -8
- package/dist/integrations/agent/runner.js +208 -0
- package/dist/integrations/agent/sdk-runner.js +8 -2
- package/dist/integrations/agent/spawn.js +54 -14
- package/dist/integrations/github.js +3 -0
- package/dist/integrations/lockfile.js +22 -51
- package/dist/integrations/session-logs/index.js +4 -0
- package/dist/integrations/session-logs/inline-refs.js +35 -0
- package/dist/integrations/session-logs/pre-filter.js +152 -0
- package/dist/integrations/session-logs/providers/claude-code.js +226 -0
- package/dist/integrations/session-logs/providers/opencode.js +231 -25
- package/dist/integrations/session-logs/types.js +3 -0
- package/dist/llm/call-ai.js +14 -26
- package/dist/llm/client.js +16 -2
- package/dist/llm/embedder.js +20 -29
- package/dist/llm/embedders/cache.js +3 -7
- package/dist/llm/embedders/local.js +42 -1
- package/dist/llm/embedders/remote.js +20 -8
- package/dist/llm/embedders/types.js +3 -7
- package/dist/llm/feature-gate.js +92 -56
- package/dist/llm/graph-extract.js +402 -31
- package/dist/llm/index-passes.js +44 -29
- package/dist/llm/memory-infer.js +30 -2
- package/dist/llm/metadata-enhance.js +3 -7
- package/dist/output/cli-hints.js +7 -4
- package/dist/output/context.js +60 -8
- package/dist/output/renderers.js +170 -194
- package/dist/output/shapes/curate.js +56 -0
- package/dist/output/shapes/distill.js +10 -0
- package/dist/output/shapes/env-list.js +19 -0
- package/dist/output/shapes/events.js +11 -0
- package/dist/output/shapes/helpers.js +424 -0
- package/dist/output/shapes/history.js +7 -0
- package/dist/output/shapes/passthrough.js +105 -0
- package/dist/output/shapes/proposal-accept.js +7 -0
- package/dist/output/shapes/proposal-diff.js +7 -0
- package/dist/output/shapes/proposal-list.js +7 -0
- package/dist/output/shapes/proposal-producer.js +11 -0
- package/dist/output/shapes/proposal-reject.js +7 -0
- package/dist/output/shapes/proposal-show.js +7 -0
- package/dist/output/shapes/registry-search.js +6 -0
- package/dist/output/shapes/registry.js +30 -0
- package/dist/output/shapes/search.js +6 -0
- package/dist/output/shapes/secret-list.js +19 -0
- package/dist/output/shapes/show.js +6 -0
- package/dist/output/shapes/vault-list.js +19 -0
- package/dist/output/shapes.js +51 -549
- package/dist/output/text/add.js +6 -0
- package/dist/output/text/clone.js +6 -0
- package/dist/output/text/config.js +6 -0
- package/dist/output/text/curate.js +6 -0
- package/dist/output/text/distill.js +7 -0
- package/dist/output/text/enable-disable.js +7 -0
- package/dist/output/text/events.js +10 -0
- package/dist/output/text/feedback.js +6 -0
- package/dist/output/text/helpers.js +1059 -0
- package/dist/output/text/history.js +7 -0
- package/dist/output/text/import.js +6 -0
- package/dist/output/text/index.js +6 -0
- package/dist/output/text/info.js +6 -0
- package/dist/output/text/init.js +6 -0
- package/dist/output/text/list.js +6 -0
- package/dist/output/text/proposal-producer.js +8 -0
- package/dist/output/text/proposal.js +12 -0
- package/dist/output/text/registry-commands.js +11 -0
- package/dist/output/text/registry.js +30 -0
- package/dist/output/text/remember.js +6 -0
- package/dist/output/text/remove.js +6 -0
- package/dist/output/text/save.js +6 -0
- package/dist/output/text/search.js +6 -0
- package/dist/output/text/show.js +6 -0
- package/dist/output/text/update.js +6 -0
- package/dist/output/text/upgrade.js +6 -0
- package/dist/output/text/vault.js +16 -0
- package/dist/output/text/wiki.js +15 -0
- package/dist/output/text/workflow.js +14 -0
- package/dist/output/text.js +44 -1329
- package/dist/registry/build-index.js +3 -0
- package/dist/registry/create-provider-registry.js +3 -0
- package/dist/registry/factory.js +4 -1
- package/dist/registry/origin-resolve.js +3 -0
- package/dist/registry/providers/index.js +3 -0
- package/dist/registry/providers/skills-sh.js +11 -2
- package/dist/registry/providers/static-index.js +10 -1
- package/dist/registry/providers/types.js +3 -24
- package/dist/registry/resolve.js +11 -16
- package/dist/registry/types.js +3 -0
- package/dist/scripts/migrate-storage.js +17767 -0
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
- package/dist/scripts/migrations/v16-to-v17.js +141 -0
- package/dist/setup/detect.js +3 -0
- package/dist/setup/ripgrep-install.js +3 -0
- package/dist/setup/ripgrep-resolve.js +3 -0
- package/dist/setup/setup.js +306 -67
- package/dist/setup/steps.js +3 -15
- package/dist/sources/include.js +3 -0
- package/dist/sources/provider-factory.js +3 -11
- package/dist/sources/provider.js +3 -20
- package/dist/sources/providers/filesystem.js +19 -23
- package/dist/sources/providers/git.js +171 -21
- package/dist/sources/providers/index.js +3 -0
- package/dist/sources/providers/install-types.js +3 -13
- package/dist/sources/providers/npm.js +3 -4
- package/dist/sources/providers/provider-utils.js +3 -0
- package/dist/sources/providers/sync-from-ref.js +3 -11
- package/dist/sources/providers/tar-utils.js +3 -0
- package/dist/sources/providers/website.js +18 -22
- package/dist/sources/resolve.js +3 -0
- package/dist/sources/types.js +3 -0
- package/dist/sources/website-ingest.js +3 -0
- package/dist/tasks/backends/cron.js +3 -0
- package/dist/tasks/backends/exec-utils.js +3 -0
- package/dist/tasks/backends/index.js +3 -11
- package/dist/tasks/backends/launchd.js +4 -1
- package/dist/tasks/backends/schtasks.js +4 -1
- package/dist/tasks/parser.js +51 -38
- package/dist/tasks/resolveAkmBin.js +3 -0
- package/dist/tasks/runner.js +35 -9
- package/dist/tasks/schedule.js +20 -1
- package/dist/tasks/schema.js +5 -3
- package/dist/tasks/validator.js +6 -3
- package/dist/version.js +3 -0
- package/dist/wiki/wiki-templates.js +6 -3
- package/dist/wiki/wiki.js +4 -1
- package/dist/workflows/authoring.js +4 -1
- package/dist/workflows/cli.js +3 -0
- package/dist/workflows/db.js +140 -10
- package/dist/workflows/document-cache.js +3 -10
- package/dist/workflows/parser.js +3 -0
- package/dist/workflows/renderer.js +3 -0
- package/dist/workflows/runs.js +18 -1
- package/dist/workflows/schema.js +3 -0
- package/dist/workflows/scope-key.js +3 -0
- package/dist/workflows/validator.js +5 -9
- package/docs/README.md +7 -2
- package/docs/data-and-telemetry.md +225 -0
- package/docs/migration/release-notes/0.7.5.md +2 -2
- package/docs/migration/release-notes/0.8.0.md +57 -5
- package/docs/migration/v0.7-to-v0.8.md +1378 -0
- package/package.json +28 -11
- package/.github/LICENSE +0 -374
- package/dist/commands/help/help-accept.md +0 -9
- package/dist/commands/help/help-improve.md +0 -53
- package/dist/commands/help/help-reject.md +0 -8
- package/dist/commands/install-audit.js +0 -385
- package/dist/commands/vault.js +0 -310
- package/dist/indexer/match-contributors.js +0 -141
- package/dist/integrations/agent/pipeline.js +0 -39
- package/dist/integrations/agent/runners.js +0 -31
- package/dist/llm/prompts/graph-extract-user-prompt.md +0 -12
- /package/dist/{tasks → assets}/backends/launchd-template.xml +0 -0
- /package/dist/{tasks → assets}/backends/schtasks-template.xml +0 -0
- /package/dist/{commands → assets}/help/help-propose.md +0 -0
- /package/dist/{wiki → assets/wiki}/index-template.md +0 -0
- /package/dist/{wiki → assets/wiki}/ingest-workflow-template.md +0 -0
- /package/dist/{wiki → assets/wiki}/log-template.md +0 -0
- /package/dist/{wiki → assets/wiki}/schema-template.md +0 -0
- /package/dist/{workflows → assets/workflows}/workflow-template.md +0 -0
package/dist/indexer/db.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
1
4
|
import { Database } from "bun:sqlite";
|
|
2
5
|
import fs from "node:fs";
|
|
3
6
|
import { createRequire } from "node:module";
|
|
@@ -7,12 +10,13 @@ import { getDbPath } from "../core/paths";
|
|
|
7
10
|
import { REGISTRY_INDEX_CACHE_DDL } from "../core/state-db";
|
|
8
11
|
import { warn } from "../core/warn";
|
|
9
12
|
import { cosineSimilarity } from "../llm/embedders/types";
|
|
13
|
+
import { backupDataDir, EMBEDDING_DIM_CHANGE_REASON } from "./db-backup";
|
|
10
14
|
import { buildSearchFields } from "./search-fields";
|
|
11
15
|
import { ensureUsageEventsSchema } from "./usage-events";
|
|
12
16
|
// ── Constants ───────────────────────────────────────────────────────────────
|
|
13
|
-
export const DB_VERSION =
|
|
17
|
+
export const DB_VERSION = 17;
|
|
14
18
|
export const EMBEDDING_DIM = 384;
|
|
15
|
-
export const GRAPH_SCHEMA_VERSION =
|
|
19
|
+
export const GRAPH_SCHEMA_VERSION = 3;
|
|
16
20
|
// ── Database lifecycle ──────────────────────────────────────────────────────
|
|
17
21
|
export function openDatabase(dbPath, options) {
|
|
18
22
|
const resolvedPath = dbPath ?? getDbPath();
|
|
@@ -26,11 +30,39 @@ export function openDatabase(dbPath, options) {
|
|
|
26
30
|
db.exec("PRAGMA foreign_keys = ON");
|
|
27
31
|
// Try to load sqlite-vec extension
|
|
28
32
|
loadVecExtension(db);
|
|
29
|
-
|
|
33
|
+
// Dim resolution: explicit option wins; otherwise consult the on-disk
|
|
34
|
+
// config so unparameterised opens (registry providers, graph helpers,
|
|
35
|
+
// ad-hoc CLI subcommands) honour the operator-declared dimension. Only if
|
|
36
|
+
// both are absent do we fall through to the no-clobber path, which keeps
|
|
37
|
+
// ensureSchema from touching `index_meta.embeddingDim` at all.
|
|
38
|
+
const resolvedDim = options?.embeddingDim ?? resolveConfiguredEmbeddingDim();
|
|
39
|
+
ensureSchema(db, resolvedDim, { dataDir: dir });
|
|
30
40
|
// Warn once at init if using JS fallback with many entries
|
|
31
41
|
warnIfVecMissing(db, { once: true });
|
|
32
42
|
return db;
|
|
33
43
|
}
|
|
44
|
+
/**
|
|
45
|
+
* Read the operator-configured embedding dimension from the on-disk config.
|
|
46
|
+
* Returns `undefined` when no config file is present, when the config has
|
|
47
|
+
* no `embedding.dimension` set, or when reading the config throws (e.g.
|
|
48
|
+
* inside isolated test fixtures with no XDG home). Failure is silent on
|
|
49
|
+
* purpose — every openDatabase() call would otherwise have to handle a
|
|
50
|
+
* config-not-found error path, and the fallback (no-clobber semantics) is
|
|
51
|
+
* already correct.
|
|
52
|
+
*/
|
|
53
|
+
function resolveConfiguredEmbeddingDim() {
|
|
54
|
+
try {
|
|
55
|
+
const { loadConfig } = require("../core/config");
|
|
56
|
+
const dim = loadConfig().embedding?.dimension;
|
|
57
|
+
if (typeof dim === "number" && Number.isInteger(dim) && dim > 0 && dim <= 4096) {
|
|
58
|
+
return dim;
|
|
59
|
+
}
|
|
60
|
+
return undefined;
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
return undefined;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
34
66
|
export function openExistingDatabase(dbPath) {
|
|
35
67
|
const resolvedPath = dbPath ?? getDbPath();
|
|
36
68
|
const db = new Database(resolvedPath);
|
|
@@ -88,7 +120,7 @@ export function warnIfVecMissing(db, { once } = { once: false }) {
|
|
|
88
120
|
/* embeddings table may not exist yet during init */
|
|
89
121
|
}
|
|
90
122
|
}
|
|
91
|
-
function ensureSchema(db, embeddingDim) {
|
|
123
|
+
function ensureSchema(db, embeddingDim, options) {
|
|
92
124
|
// Create meta table first so we can check version
|
|
93
125
|
db.exec(`
|
|
94
126
|
CREATE TABLE IF NOT EXISTS index_meta (
|
|
@@ -96,6 +128,39 @@ function ensureSchema(db, embeddingDim) {
|
|
|
96
128
|
value TEXT NOT NULL
|
|
97
129
|
);
|
|
98
130
|
`);
|
|
131
|
+
// MVP DB-backup hook (0.8.x): when the stored DB version differs from the
|
|
132
|
+
// running binary's DB_VERSION, snapshot the data directory BEFORE
|
|
133
|
+
// `handleVersionUpgrade()` drops tables. This is best-effort —
|
|
134
|
+
// `backupDataDir` returns null on opt-out, missing data dir, low free
|
|
135
|
+
// space, or copy errors, and we proceed with the upgrade in all cases.
|
|
136
|
+
// The proper migration framework lands in 0.9.0; until then this lets
|
|
137
|
+
// operators recover with `scripts/migrations/restore-data-dir.sh`.
|
|
138
|
+
if (options?.dataDir) {
|
|
139
|
+
const storedVersionRaw = getMeta(db, "version");
|
|
140
|
+
const storedVersion = storedVersionRaw !== undefined && storedVersionRaw !== "" ? Number.parseInt(storedVersionRaw, 10) : null;
|
|
141
|
+
const willUpgrade = storedVersionRaw !== undefined && storedVersionRaw !== "" && storedVersionRaw !== String(DB_VERSION);
|
|
142
|
+
if (willUpgrade) {
|
|
143
|
+
try {
|
|
144
|
+
// Pass env explicitly so tests can override AKM_DB_BACKUP / AKM_DB_BACKUP_RETAIN
|
|
145
|
+
// without mutating process.env. Production callers default to process.env.
|
|
146
|
+
const result = backupDataDir({
|
|
147
|
+
dataDir: options.dataDir,
|
|
148
|
+
sourceVersion: storedVersion !== null && !Number.isNaN(storedVersion) ? storedVersion : null,
|
|
149
|
+
targetVersion: DB_VERSION,
|
|
150
|
+
env: process.env,
|
|
151
|
+
});
|
|
152
|
+
if (result) {
|
|
153
|
+
warn("[akm] data directory backed up to %s before v%s→v%d upgrade", result.path, storedVersionRaw, DB_VERSION);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
catch (err) {
|
|
157
|
+
// Defensive — backupDataDir already swallows most errors, but if it
|
|
158
|
+
// throws for an unexpected reason we must still proceed with the
|
|
159
|
+
// upgrade so the user isn't locked out of their binary.
|
|
160
|
+
warn("[akm] pre-upgrade data dir backup raised an unexpected error — %s; upgrade will proceed without a snapshot", err instanceof Error ? err.message : String(err));
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
99
164
|
// Check stored version — if it differs from DB_VERSION, drop and recreate all tables.
|
|
100
165
|
// Usage events are preserved across version upgrades so that utility score
|
|
101
166
|
// history is not silently lost. The backup is captured here and threaded
|
|
@@ -112,12 +177,24 @@ function ensureSchema(db, embeddingDim) {
|
|
|
112
177
|
stash_dir TEXT NOT NULL,
|
|
113
178
|
entry_json TEXT NOT NULL,
|
|
114
179
|
search_text TEXT NOT NULL,
|
|
115
|
-
entry_type TEXT NOT NULL
|
|
180
|
+
entry_type TEXT NOT NULL,
|
|
181
|
+
derived_from TEXT
|
|
116
182
|
);
|
|
117
183
|
|
|
118
184
|
CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
|
|
119
185
|
CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
|
|
186
|
+
CREATE INDEX IF NOT EXISTS idx_entries_file_path ON entries(file_path);
|
|
120
187
|
`);
|
|
188
|
+
// Phase 5A / DB v17: backfill `derived_from` column + index on databases
|
|
189
|
+
// that were created at v17 fresh OR carry a partial v17 schema (a DB whose
|
|
190
|
+
// `index_meta.version` was bumped to 17 but whose `entries` table still
|
|
191
|
+
// lacks the column — this happens when a previous v17 binary opened a
|
|
192
|
+
// pre-v17 DB without taking the upgrade path because no version mismatch
|
|
193
|
+
// was seen at boot). The PRAGMA-then-ALTER guard runs unconditionally so
|
|
194
|
+
// both fresh and partial schemas converge. The CREATE INDEX for
|
|
195
|
+
// `derived_from` MUST run after this helper so we never reference a
|
|
196
|
+
// column that has not yet been added on partial schemas.
|
|
197
|
+
ensureDerivedFromColumn(db);
|
|
121
198
|
// Validated WorkflowDocument JSON, one row per indexed workflow entry.
|
|
122
199
|
// Pure index data — fully rebuilt on each `akm index`. ON DELETE CASCADE
|
|
123
200
|
// means clearing entries (full rebuild or per-dir delete) drops these too.
|
|
@@ -176,6 +253,20 @@ function ensureSchema(db, embeddingDim) {
|
|
|
176
253
|
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
177
254
|
FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
|
|
178
255
|
);
|
|
256
|
+
`);
|
|
257
|
+
// Per-project scoped utility scores — tracks usage per (entry, cwd-anchor)
|
|
258
|
+
// so assets useful in project A don't pollute rankings in project B.
|
|
259
|
+
// The global utility_scores table is preserved as a fallback / cold-start aid.
|
|
260
|
+
db.exec(`
|
|
261
|
+
CREATE TABLE IF NOT EXISTS utility_scores_scoped (
|
|
262
|
+
entry_id INTEGER NOT NULL,
|
|
263
|
+
scope_key TEXT NOT NULL,
|
|
264
|
+
utility REAL NOT NULL DEFAULT 0,
|
|
265
|
+
last_used_at INTEGER NOT NULL,
|
|
266
|
+
PRIMARY KEY (entry_id, scope_key)
|
|
267
|
+
);
|
|
268
|
+
CREATE INDEX IF NOT EXISTS idx_utility_scores_scoped_entry_id
|
|
269
|
+
ON utility_scores_scoped(entry_id);
|
|
179
270
|
`);
|
|
180
271
|
db.exec(`
|
|
181
272
|
CREATE TABLE IF NOT EXISTS index_dir_state (
|
|
@@ -194,15 +285,26 @@ function ensureSchema(db, embeddingDim) {
|
|
|
194
285
|
// Entries are cleaned up when assets are removed or --re-enrich is used.
|
|
195
286
|
db.exec(`
|
|
196
287
|
CREATE TABLE IF NOT EXISTS llm_enrichment_cache (
|
|
197
|
-
asset_ref
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
288
|
+
asset_ref TEXT NOT NULL,
|
|
289
|
+
cache_variant TEXT NOT NULL,
|
|
290
|
+
body_hash TEXT NOT NULL,
|
|
291
|
+
result_json TEXT NOT NULL,
|
|
292
|
+
updated_at INTEGER NOT NULL,
|
|
293
|
+
PRIMARY KEY (asset_ref, cache_variant)
|
|
201
294
|
);
|
|
202
295
|
|
|
203
296
|
CREATE INDEX IF NOT EXISTS idx_llm_cache_updated
|
|
204
297
|
ON llm_enrichment_cache(updated_at);
|
|
205
298
|
`);
|
|
299
|
+
// Graph extraction tables — schema v2 (entry_id PK).
|
|
300
|
+
//
|
|
301
|
+
// graph_files is keyed on entries.id so child tables cascade-delete cleanly
|
|
302
|
+
// when an entry is removed, and so JOINs from graph rows to entries are a
|
|
303
|
+
// direct PK lookup. (stash_root, file_path) is retained as UNIQUE so the
|
|
304
|
+
// extractor's path-based upsert still works.
|
|
305
|
+
//
|
|
306
|
+
// graph_file_entities and graph_file_relations no longer duplicate file_path;
|
|
307
|
+
// they reference entry_id and inherit stash scoping via graph_files.
|
|
206
308
|
db.exec(`
|
|
207
309
|
CREATE TABLE IF NOT EXISTS graph_meta (
|
|
208
310
|
stash_root TEXT PRIMARY KEY,
|
|
@@ -213,53 +315,58 @@ function ensureSchema(db, embeddingDim) {
|
|
|
213
315
|
entity_count INTEGER NOT NULL DEFAULT 0,
|
|
214
316
|
relation_count INTEGER NOT NULL DEFAULT 0,
|
|
215
317
|
extraction_coverage REAL NOT NULL DEFAULT 0,
|
|
216
|
-
density REAL NOT NULL DEFAULT 0
|
|
318
|
+
density REAL NOT NULL DEFAULT 0,
|
|
319
|
+
extractor_id TEXT,
|
|
320
|
+
extraction_run_id TEXT,
|
|
321
|
+
model TEXT,
|
|
322
|
+
prompt_version TEXT,
|
|
323
|
+
batch_size INTEGER,
|
|
324
|
+
cache_hits INTEGER NOT NULL DEFAULT 0,
|
|
325
|
+
cache_misses INTEGER NOT NULL DEFAULT 0,
|
|
326
|
+
truncation_count INTEGER NOT NULL DEFAULT 0,
|
|
327
|
+
failure_count INTEGER NOT NULL DEFAULT 0
|
|
217
328
|
);
|
|
218
329
|
|
|
219
330
|
CREATE TABLE IF NOT EXISTS graph_files (
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
331
|
+
entry_id INTEGER PRIMARY KEY REFERENCES entries(id) ON DELETE CASCADE,
|
|
332
|
+
stash_root TEXT NOT NULL,
|
|
333
|
+
file_path TEXT NOT NULL,
|
|
334
|
+
file_order INTEGER NOT NULL,
|
|
335
|
+
file_type TEXT NOT NULL,
|
|
336
|
+
body_hash TEXT NOT NULL,
|
|
337
|
+
confidence REAL,
|
|
338
|
+
status TEXT NOT NULL DEFAULT 'extracted',
|
|
339
|
+
reason TEXT,
|
|
340
|
+
extraction_run_id TEXT,
|
|
341
|
+
UNIQUE(stash_root, file_path)
|
|
228
342
|
);
|
|
229
343
|
|
|
230
344
|
CREATE INDEX IF NOT EXISTS idx_graph_files_stash_order
|
|
231
345
|
ON graph_files(stash_root, file_order);
|
|
232
346
|
|
|
233
347
|
CREATE TABLE IF NOT EXISTS graph_file_entities (
|
|
234
|
-
|
|
235
|
-
file_path TEXT NOT NULL,
|
|
348
|
+
entry_id INTEGER NOT NULL REFERENCES graph_files(entry_id) ON DELETE CASCADE,
|
|
236
349
|
entity_order INTEGER NOT NULL,
|
|
350
|
+
stash_root TEXT NOT NULL,
|
|
351
|
+
entity_norm TEXT NOT NULL,
|
|
237
352
|
entity TEXT NOT NULL,
|
|
238
|
-
PRIMARY KEY (
|
|
239
|
-
FOREIGN KEY (stash_root, file_path)
|
|
240
|
-
REFERENCES graph_files(stash_root, file_path)
|
|
241
|
-
ON DELETE CASCADE
|
|
353
|
+
PRIMARY KEY (entry_id, entity_order)
|
|
242
354
|
);
|
|
243
355
|
|
|
244
|
-
CREATE INDEX IF NOT EXISTS
|
|
245
|
-
ON graph_file_entities(stash_root,
|
|
356
|
+
CREATE INDEX IF NOT EXISTS idx_graph_file_entities_entity_norm
|
|
357
|
+
ON graph_file_entities(stash_root, entity_norm);
|
|
246
358
|
|
|
247
359
|
CREATE TABLE IF NOT EXISTS graph_file_relations (
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
from_entity
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
REFERENCES graph_files(stash_root, file_path)
|
|
258
|
-
ON DELETE CASCADE
|
|
360
|
+
entry_id INTEGER NOT NULL REFERENCES graph_files(entry_id) ON DELETE CASCADE,
|
|
361
|
+
relation_order INTEGER NOT NULL,
|
|
362
|
+
from_entity_norm TEXT NOT NULL,
|
|
363
|
+
from_entity TEXT NOT NULL,
|
|
364
|
+
to_entity_norm TEXT NOT NULL,
|
|
365
|
+
to_entity TEXT NOT NULL,
|
|
366
|
+
relation_type TEXT,
|
|
367
|
+
confidence REAL,
|
|
368
|
+
PRIMARY KEY (entry_id, relation_order)
|
|
259
369
|
);
|
|
260
|
-
|
|
261
|
-
CREATE INDEX IF NOT EXISTS idx_graph_file_relations_lookup
|
|
262
|
-
ON graph_file_relations(stash_root, file_path, relation_order);
|
|
263
370
|
`);
|
|
264
371
|
// FTS-dirty queue. Created here (not lazily on first upsert) so the
|
|
265
372
|
// per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
|
|
@@ -271,56 +378,82 @@ function ensureSchema(db, embeddingDim) {
|
|
|
271
378
|
);
|
|
272
379
|
`);
|
|
273
380
|
// sqlite-vec table
|
|
381
|
+
//
|
|
382
|
+
// Dimension contract:
|
|
383
|
+
// - When `embeddingDim` is `undefined`, the caller did NOT request a
|
|
384
|
+
// specific dim. Do not touch `index_meta.embeddingDim` and do not run
|
|
385
|
+
// the dim-change wipe — fall back to the stored dim (or the static
|
|
386
|
+
// default) only when we have to materialise the vec table for the
|
|
387
|
+
// first time. Without this guard, registry-side and other dim-unaware
|
|
388
|
+
// `openDatabase()` callers would silently overwrite the dim-aware
|
|
389
|
+
// improve/index value and oscillate the stored dim.
|
|
390
|
+
// - When `embeddingDim` is a number, the caller explicitly asked for
|
|
391
|
+
// that dim and owns the dim-change/backup/wipe semantics.
|
|
392
|
+
const dimExplicit = embeddingDim !== undefined;
|
|
393
|
+
const effectiveDim = embeddingDim ?? (Number(getMeta(db, "embeddingDim")) || EMBEDDING_DIM);
|
|
274
394
|
if (isVecAvailable(db)) {
|
|
275
395
|
// Check if stored embedding dimension differs from configured one
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
396
|
+
if (dimExplicit) {
|
|
397
|
+
const storedDim = getMeta(db, "embeddingDim");
|
|
398
|
+
if (storedDim && storedDim !== String(embeddingDim)) {
|
|
399
|
+
// Re-embedding the whole stash is expensive (LLM API calls + cache
|
|
400
|
+
// misses), so snapshot the data dir before we drop the vec table and
|
|
401
|
+
// wipe `embeddings`. This is the SAME hook the version-upgrade path
|
|
402
|
+
// uses earlier in this function, just gated on embedding-dim mismatch
|
|
403
|
+
// and tagged so operators can tell the two backup kinds apart.
|
|
404
|
+
backupBeforeEmbeddingDimChange(options?.dataDir, storedDim, String(embeddingDim));
|
|
405
|
+
try {
|
|
406
|
+
db.exec("DROP TABLE IF EXISTS entries_vec");
|
|
407
|
+
}
|
|
408
|
+
catch {
|
|
409
|
+
/* ignore */
|
|
410
|
+
}
|
|
411
|
+
// Delete stale BLOB embeddings so they don't produce silently wrong
|
|
412
|
+
// similarity scores against the new-dimension vec table.
|
|
413
|
+
try {
|
|
414
|
+
db.exec("DELETE FROM embeddings");
|
|
415
|
+
}
|
|
416
|
+
catch {
|
|
417
|
+
/* ignore */
|
|
418
|
+
}
|
|
419
|
+
setMeta(db, "hasEmbeddings", "0");
|
|
291
420
|
}
|
|
292
|
-
setMeta(db, "hasEmbeddings", "0");
|
|
293
421
|
}
|
|
294
422
|
const vecExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_vec'").get();
|
|
295
423
|
if (!vecExists) {
|
|
296
|
-
if (!Number.isInteger(
|
|
297
|
-
throw new Error(`Invalid embedding dimension: ${
|
|
424
|
+
if (!Number.isInteger(effectiveDim) || effectiveDim <= 0 || effectiveDim > 4096) {
|
|
425
|
+
throw new Error(`Invalid embedding dimension: ${effectiveDim}`);
|
|
298
426
|
}
|
|
299
427
|
db.exec(`
|
|
300
428
|
CREATE VIRTUAL TABLE entries_vec USING vec0(
|
|
301
429
|
id INTEGER PRIMARY KEY,
|
|
302
|
-
embedding FLOAT[${
|
|
430
|
+
embedding FLOAT[${effectiveDim}]
|
|
303
431
|
);
|
|
304
432
|
`);
|
|
305
433
|
}
|
|
306
|
-
|
|
434
|
+
if (dimExplicit) {
|
|
435
|
+
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
436
|
+
}
|
|
307
437
|
}
|
|
308
438
|
else {
|
|
309
439
|
// Also purge BLOB embeddings on dimension change (JS fallback path).
|
|
310
440
|
// When sqlite-vec is unavailable, entries_vec doesn't exist but the BLOB
|
|
311
441
|
// embeddings table still stores vectors. If the configured dimension
|
|
312
442
|
// changes, those stored BLOBs become silently incompatible.
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
443
|
+
if (dimExplicit) {
|
|
444
|
+
const storedDim = getMeta(db, "embeddingDim");
|
|
445
|
+
if (storedDim && storedDim !== String(embeddingDim)) {
|
|
446
|
+
backupBeforeEmbeddingDimChange(options?.dataDir, storedDim, String(embeddingDim));
|
|
447
|
+
try {
|
|
448
|
+
db.exec("DELETE FROM embeddings");
|
|
449
|
+
}
|
|
450
|
+
catch {
|
|
451
|
+
/* ignore */
|
|
452
|
+
}
|
|
453
|
+
setMeta(db, "hasEmbeddings", "0");
|
|
320
454
|
}
|
|
321
|
-
setMeta(db, "
|
|
455
|
+
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
322
456
|
}
|
|
323
|
-
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
324
457
|
}
|
|
325
458
|
// Usage telemetry table
|
|
326
459
|
ensureUsageEventsSchema(db);
|
|
@@ -358,6 +491,8 @@ function handleVersionUpgrade(db) {
|
|
|
358
491
|
/* table may not exist in older versions */
|
|
359
492
|
}
|
|
360
493
|
db.exec("DROP TABLE IF EXISTS utility_scores");
|
|
494
|
+
db.exec("DROP TABLE IF EXISTS utility_scores_scoped");
|
|
495
|
+
db.exec("DROP INDEX IF EXISTS idx_utility_scores_scoped_entry_id");
|
|
361
496
|
db.exec("DROP TABLE IF EXISTS usage_events");
|
|
362
497
|
db.exec("DROP TABLE IF EXISTS embeddings");
|
|
363
498
|
db.exec("DROP TABLE IF EXISTS entries_vec");
|
|
@@ -380,6 +515,48 @@ function handleVersionUpgrade(db) {
|
|
|
380
515
|
warn("[akm] Index rebuilt due to version upgrade. Run 'akm index' to repopulate.");
|
|
381
516
|
return usageBackup;
|
|
382
517
|
}
|
|
518
|
+
/**
|
|
519
|
+
* Snapshot the data directory before the embedding-dimension drop path wipes
|
|
520
|
+
* `embeddings` and recreates `entries_vec`. Re-embedding a real-world stash
|
|
521
|
+
* is expensive (LLM calls + cache misses), so we capture the pre-drop state
|
|
522
|
+
* here using the same MVP backup helper the version-upgrade hook uses
|
|
523
|
+
* earlier in {@link ensureSchema}.
|
|
524
|
+
*
|
|
525
|
+
* The backup is tagged with the `embedding-dim-change` reason so it lands in
|
|
526
|
+
* `<dataDir>/backups/<timestamp>-embedding-dim-change/` instead of the
|
|
527
|
+
* version-upgrade-flavored `<timestamp>-pre-v<N>/` directory. Restoration
|
|
528
|
+
* works identically via `scripts/migrations/restore-data-dir.sh`.
|
|
529
|
+
*
|
|
530
|
+
* Failures are non-fatal — they downgrade to a warning and the destructive
|
|
531
|
+
* ops run anyway, matching the version-upgrade hook's behavior so a broken
|
|
532
|
+
* backup cannot brick a binary that bumped the configured dim. Likewise,
|
|
533
|
+
* `AKM_DB_BACKUP=0` opts out via the same path.
|
|
534
|
+
*/
|
|
535
|
+
function backupBeforeEmbeddingDimChange(dataDir, fromDim, toDim) {
|
|
536
|
+
if (!dataDir)
|
|
537
|
+
return;
|
|
538
|
+
try {
|
|
539
|
+
const result = backupDataDir({
|
|
540
|
+
dataDir,
|
|
541
|
+
// The DB version isn't changing here — pass the current DB_VERSION for
|
|
542
|
+
// both source and target so the metadata sidecar still records the
|
|
543
|
+
// running binary's version for forensic context.
|
|
544
|
+
sourceVersion: DB_VERSION,
|
|
545
|
+
targetVersion: DB_VERSION,
|
|
546
|
+
reason: EMBEDDING_DIM_CHANGE_REASON,
|
|
547
|
+
env: process.env,
|
|
548
|
+
});
|
|
549
|
+
if (result) {
|
|
550
|
+
warn("[akm] embedding dimension changed %s→%s; data directory backed up to %s; embeddings will be regenerated", fromDim, toDim, result.path);
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
catch (err) {
|
|
554
|
+
// Defensive — backupDataDir already swallows most errors, but if it
|
|
555
|
+
// throws for an unexpected reason we must still proceed with the drop
|
|
556
|
+
// so the user isn't locked out of their binary on a changed dim.
|
|
557
|
+
warn("[akm] pre-embedding-dim-change data dir backup raised an unexpected error — %s; embeddings will be regenerated without a snapshot", err instanceof Error ? err.message : String(err));
|
|
558
|
+
}
|
|
559
|
+
}
|
|
383
560
|
/**
|
|
384
561
|
* Re-insert backed-up `usage_events` rows into the freshly-created table.
|
|
385
562
|
*
|
|
@@ -474,6 +651,12 @@ export function deleteIndexDirStatesByStashDir(db, stashDir) {
|
|
|
474
651
|
db.prepare("DELETE FROM index_dir_state WHERE dir_path = ? OR dir_path LIKE ?").run(stashDir, `${stashDir}${path.sep}%`);
|
|
475
652
|
}
|
|
476
653
|
// ── Entry operations ────────────────────────────────────────────────────────
|
|
654
|
+
/**
|
|
655
|
+
* SQLite parameter chunk size — chosen well below SQLITE_MAX_VARIABLE_NUMBER
|
|
656
|
+
* (default 999 on most builds) so multi-row `IN (?, ?, ...)` queries stay
|
|
657
|
+
* within bounds. Shared by helpers below.
|
|
658
|
+
*/
|
|
659
|
+
const SQLITE_CHUNK_SIZE = 500;
|
|
477
660
|
/**
|
|
478
661
|
* Insert or update an entry in the `entries` table. Returns the row id.
|
|
479
662
|
*
|
|
@@ -487,7 +670,11 @@ export function upsertEntry(db, entryKey, dirPath, filePath, stashDir, entry, se
|
|
|
487
670
|
// every call. The dirty-mark INSERT and the upsert-with-RETURNING
|
|
488
671
|
// share the same WeakMap so they live and die with the connection.
|
|
489
672
|
const stmts = getUpsertStmts(db);
|
|
490
|
-
|
|
673
|
+
// Phase 5A / Advantage D5: surface derived memory parent ref into the
|
|
674
|
+
// dedicated `derived_from` column so retrieval-time lookup (parent→child)
|
|
675
|
+
// does not have to scan + JSON-decode every memory row.
|
|
676
|
+
const derivedFrom = typeof entry.derivedFrom === "string" && entry.derivedFrom.trim() ? entry.derivedFrom.trim() : null;
|
|
677
|
+
const result = stmts.upsert.get(entryKey, dirPath, filePath, stashDir, JSON.stringify(entry), searchText, entry.type, derivedFrom);
|
|
491
678
|
if (!result)
|
|
492
679
|
throw new Error("upsertEntry: entry_key not found after upsert");
|
|
493
680
|
// Mark this entry as FTS-dirty so `rebuildFts({ incremental: true })`
|
|
@@ -506,15 +693,16 @@ function getUpsertStmts(db) {
|
|
|
506
693
|
// SELECT round-trip needed (last_insert_rowid() is unreliable for
|
|
507
694
|
// ON CONFLICT). Use `.get()` so a single row comes back.
|
|
508
695
|
upsert: db.prepare(`
|
|
509
|
-
INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type)
|
|
510
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
696
|
+
INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type, derived_from)
|
|
697
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
511
698
|
ON CONFLICT(entry_key) DO UPDATE SET
|
|
512
699
|
dir_path = excluded.dir_path,
|
|
513
700
|
file_path = excluded.file_path,
|
|
514
701
|
stash_dir = excluded.stash_dir,
|
|
515
702
|
entry_json = excluded.entry_json,
|
|
516
703
|
search_text = excluded.search_text,
|
|
517
|
-
entry_type = excluded.entry_type
|
|
704
|
+
entry_type = excluded.entry_type,
|
|
705
|
+
derived_from = excluded.derived_from
|
|
518
706
|
RETURNING id
|
|
519
707
|
`),
|
|
520
708
|
markDirty: db.prepare("INSERT OR IGNORE INTO entries_fts_dirty (entry_id) VALUES (?)"),
|
|
@@ -522,21 +710,128 @@ function getUpsertStmts(db) {
|
|
|
522
710
|
upsertStmtsByDb.set(db, stmts);
|
|
523
711
|
return stmts;
|
|
524
712
|
}
|
|
525
|
-
|
|
713
|
+
/**
|
|
714
|
+
* Phase 5A / DB v17 schema guard.
|
|
715
|
+
*
|
|
716
|
+
* Ensures the `entries.derived_from` column + index exist on the open
|
|
717
|
+
* connection. Called from `ensureSchema()` after the entries CREATE so that
|
|
718
|
+
* legacy databases (created against a pre-v17 binary but reopened without
|
|
719
|
+
* triggering `handleVersionUpgrade()`) still gain the new column without
|
|
720
|
+
* data loss. Idempotent: a `PRAGMA table_info` lookup gates the ALTER.
|
|
721
|
+
*/
|
|
722
|
+
function ensureDerivedFromColumn(db) {
|
|
723
|
+
try {
|
|
724
|
+
const cols = db.prepare("PRAGMA table_info(entries)").all();
|
|
725
|
+
const hasColumn = cols.some((c) => c.name === "derived_from");
|
|
726
|
+
if (!hasColumn) {
|
|
727
|
+
db.exec("ALTER TABLE entries ADD COLUMN derived_from TEXT");
|
|
728
|
+
}
|
|
729
|
+
// Index creation is idempotent on its own; safe to call unconditionally.
|
|
730
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_entries_derived_from ON entries(derived_from)");
|
|
731
|
+
}
|
|
732
|
+
catch {
|
|
733
|
+
/* table may not exist on a brand-new DB before CREATE — caller is responsible */
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
/**
|
|
737
|
+
* Phase 5A / Advantage D5: look up the derived-memory child row whose
|
|
738
|
+
* `derived_from` column matches `parentRef` (e.g. `"memory:claude-prefs"`).
|
|
739
|
+
*
|
|
740
|
+
* Returns the most-recently-updated derived child when multiple exist (one
|
|
741
|
+
* parent should yield exactly one `.derived` child in practice, but the
|
|
742
|
+
* ordering keeps results deterministic). Returns `null` when no derived
|
|
743
|
+
* child has been indexed for this parent.
|
|
744
|
+
*/
|
|
745
|
+
export function getDerivedForParent(db, parentRef) {
|
|
746
|
+
if (!parentRef)
|
|
747
|
+
return null;
|
|
748
|
+
try {
|
|
749
|
+
const row = db
|
|
750
|
+
.prepare(`SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text
|
|
751
|
+
FROM entries
|
|
752
|
+
WHERE derived_from = ?
|
|
753
|
+
ORDER BY id DESC
|
|
754
|
+
LIMIT 1`)
|
|
755
|
+
.get(parentRef);
|
|
756
|
+
if (!row)
|
|
757
|
+
return null;
|
|
758
|
+
let entry;
|
|
759
|
+
try {
|
|
760
|
+
entry = JSON.parse(row.entry_json);
|
|
761
|
+
}
|
|
762
|
+
catch {
|
|
763
|
+
warn(`[db] getDerivedForParent: skipping entry id=${row.id} — corrupt entry_json`);
|
|
764
|
+
return null;
|
|
765
|
+
}
|
|
766
|
+
return {
|
|
767
|
+
id: row.id,
|
|
768
|
+
entryKey: row.entry_key,
|
|
769
|
+
dirPath: row.dir_path,
|
|
770
|
+
filePath: row.file_path,
|
|
771
|
+
stashDir: row.stash_dir,
|
|
772
|
+
entry,
|
|
773
|
+
searchText: row.search_text,
|
|
774
|
+
};
|
|
775
|
+
}
|
|
776
|
+
catch {
|
|
777
|
+
/* `derived_from` column may not exist on legacy DBs that haven't been
|
|
778
|
+
rebuilt; treat as "no derived child". */
|
|
779
|
+
return null;
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
/**
|
|
783
|
+
* Phase 2A / Rec 5: bulk-load positive feedback event counts for the given
|
|
784
|
+
* entry ids. Used by the utility-decay forgetting curve to stabilize
|
|
785
|
+
* (extend the half-life of) memories that have repeatedly proven useful.
|
|
786
|
+
*
|
|
787
|
+
* Returns a `Map<entryId, count>` containing only entries with at least one
|
|
788
|
+
* positive feedback event — missing ids implicitly map to `0`. Chunks at
|
|
789
|
+
* `SQLITE_CHUNK_SIZE` (500) to respect `SQLITE_MAX_VARIABLE_NUMBER`.
|
|
790
|
+
*
|
|
791
|
+
* Cheap when called with zero ids, and silently empty when the
|
|
792
|
+
* `usage_events` table is missing.
|
|
793
|
+
*/
|
|
794
|
+
export function getPositiveFeedbackCountsByIds(db, ids) {
|
|
795
|
+
const result = new Map();
|
|
796
|
+
if (ids.length === 0)
|
|
797
|
+
return result;
|
|
798
|
+
for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
|
|
799
|
+
const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
800
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
801
|
+
try {
|
|
802
|
+
const rows = db
|
|
803
|
+
.prepare(`SELECT entry_id, COUNT(*) AS cnt
|
|
804
|
+
FROM usage_events
|
|
805
|
+
WHERE event_type = 'feedback'
|
|
806
|
+
AND signal = 'positive'
|
|
807
|
+
AND entry_id IN (${placeholders})
|
|
808
|
+
GROUP BY entry_id`)
|
|
809
|
+
.all(...chunk);
|
|
810
|
+
for (const row of rows) {
|
|
811
|
+
if (row.entry_id !== null && row.cnt > 0) {
|
|
812
|
+
result.set(row.entry_id, row.cnt);
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
catch {
|
|
817
|
+
/* usage_events table may be missing on legacy DBs — treat as zero counts */
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
return result;
|
|
821
|
+
}
|
|
822
|
+
function deleteEntriesWhere(db, column, value) {
|
|
526
823
|
db.transaction(() => {
|
|
527
|
-
const ids = db.prepare(
|
|
824
|
+
const ids = db.prepare(`SELECT id FROM entries WHERE ${column} = ?`).all(value);
|
|
528
825
|
deleteRelatedRows(db, ids);
|
|
529
|
-
db.prepare(
|
|
826
|
+
db.prepare(`DELETE FROM entries WHERE ${column} = ?`).run(value);
|
|
530
827
|
})();
|
|
531
828
|
}
|
|
829
|
+
export function deleteEntriesByDir(db, dirPath) {
|
|
830
|
+
deleteEntriesWhere(db, "dir_path", dirPath);
|
|
831
|
+
}
|
|
532
832
|
export function deleteEntriesByStashDir(db, stashDir) {
|
|
533
|
-
db
|
|
534
|
-
const ids = db.prepare("SELECT id FROM entries WHERE stash_dir = ?").all(stashDir);
|
|
535
|
-
deleteRelatedRows(db, ids);
|
|
536
|
-
db.prepare("DELETE FROM entries WHERE stash_dir = ?").run(stashDir);
|
|
537
|
-
})();
|
|
833
|
+
deleteEntriesWhere(db, "stash_dir", stashDir);
|
|
538
834
|
}
|
|
539
|
-
const SQLITE_CHUNK_SIZE = 500;
|
|
540
835
|
function deleteRelatedRows(db, ids) {
|
|
541
836
|
if (ids.length === 0)
|
|
542
837
|
return;
|
|
@@ -571,13 +866,6 @@ function deleteRelatedRows(db, ids) {
|
|
|
571
866
|
catch {
|
|
572
867
|
/* ignore */
|
|
573
868
|
}
|
|
574
|
-
// Also delete from FTS table so orphaned FTS rows don't remain
|
|
575
|
-
try {
|
|
576
|
-
db.prepare(`DELETE FROM entries_fts WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
577
|
-
}
|
|
578
|
-
catch {
|
|
579
|
-
/* ignore */
|
|
580
|
-
}
|
|
581
869
|
if (vecAvail) {
|
|
582
870
|
try {
|
|
583
871
|
db.prepare(`DELETE FROM entries_vec WHERE id IN (${placeholders})`).run(...chunk);
|
|
@@ -593,6 +881,12 @@ function deleteRelatedRows(db, ids) {
|
|
|
593
881
|
catch {
|
|
594
882
|
/* ignore */
|
|
595
883
|
}
|
|
884
|
+
try {
|
|
885
|
+
db.prepare(`DELETE FROM utility_scores_scoped WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
886
|
+
}
|
|
887
|
+
catch {
|
|
888
|
+
/* ignore */
|
|
889
|
+
}
|
|
596
890
|
// Clean up usage events before deleting entries
|
|
597
891
|
try {
|
|
598
892
|
db.prepare(`DELETE FROM usage_events WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
@@ -602,6 +896,26 @@ function deleteRelatedRows(db, ids) {
|
|
|
602
896
|
}
|
|
603
897
|
}
|
|
604
898
|
}
|
|
899
|
+
/**
|
|
900
|
+
* Delete entries by their primary key IDs, along with all related rows
|
|
901
|
+
* (embeddings, entries_vec, entries_fts, utility_scores, usage_events).
|
|
902
|
+
*
|
|
903
|
+
* Used by the `--clean` post-pass to remove stale entries whose source files
|
|
904
|
+
* no longer exist on disk.
|
|
905
|
+
*/
|
|
906
|
+
export function deleteEntriesByIds(db, ids) {
|
|
907
|
+
if (ids.length === 0)
|
|
908
|
+
return;
|
|
909
|
+
db.transaction(() => {
|
|
910
|
+
const idObjs = ids.map((id) => ({ id }));
|
|
911
|
+
deleteRelatedRows(db, idObjs);
|
|
912
|
+
for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
|
|
913
|
+
const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
914
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
915
|
+
db.prepare(`DELETE FROM entries WHERE id IN (${placeholders})`).run(...chunk);
|
|
916
|
+
}
|
|
917
|
+
})();
|
|
918
|
+
}
|
|
605
919
|
/**
|
|
606
920
|
* Rebuild the FTS5 search index.
|
|
607
921
|
*
|
|
@@ -676,19 +990,32 @@ export function rebuildFts(db, options) {
|
|
|
676
990
|
}
|
|
677
991
|
// ── Vector operations ───────────────────────────────────────────────────────
|
|
678
992
|
export function upsertEmbedding(db, entryId, embedding) {
|
|
993
|
+
// Pre-flight FK guard: when an entry is deleted between when its id is queued
|
|
994
|
+
// for embedding and when this INSERT runs (e.g. consolidation deletes during
|
|
995
|
+
// a concurrent improve cycle), the INSERT throws "FOREIGN KEY constraint failed"
|
|
996
|
+
// and rolls back the entire batch transaction in the caller, losing every
|
|
997
|
+
// embedding for that run. A cheap SELECT here turns the race into a clean skip.
|
|
998
|
+
const exists = db.prepare("SELECT 1 FROM entries WHERE id = ?").get(entryId);
|
|
999
|
+
if (!exists)
|
|
1000
|
+
return false;
|
|
679
1001
|
const buf = float32Buffer(embedding);
|
|
680
1002
|
// Always write to BLOB table (works without sqlite-vec)
|
|
681
1003
|
db.prepare("INSERT OR REPLACE INTO embeddings (id, embedding) VALUES (?, ?)").run(entryId, buf);
|
|
682
|
-
// Also write to sqlite-vec table when available (fast path)
|
|
1004
|
+
// Also write to sqlite-vec table when available (fast path).
|
|
1005
|
+
// Wrapped in a transaction so a crash between DELETE and INSERT does not
|
|
1006
|
+
// leave the entry missing from the vec table.
|
|
683
1007
|
if (isVecAvailable(db)) {
|
|
684
1008
|
try {
|
|
685
|
-
db.
|
|
1009
|
+
db.transaction(() => {
|
|
1010
|
+
db.prepare("DELETE FROM entries_vec WHERE id = ?").run(entryId);
|
|
1011
|
+
db.prepare("INSERT INTO entries_vec (id, embedding) VALUES (?, ?)").run(entryId, buf);
|
|
1012
|
+
})();
|
|
686
1013
|
}
|
|
687
1014
|
catch {
|
|
688
|
-
/* ignore */
|
|
1015
|
+
/* ignore — vec table unavailable or constraint failure */
|
|
689
1016
|
}
|
|
690
|
-
db.prepare("INSERT INTO entries_vec (id, embedding) VALUES (?, ?)").run(entryId, buf);
|
|
691
1017
|
}
|
|
1018
|
+
return true;
|
|
692
1019
|
}
|
|
693
1020
|
export function searchVec(db, queryEmbedding, k) {
|
|
694
1021
|
// Fast path: use sqlite-vec when available
|
|
@@ -708,6 +1035,23 @@ export function searchVec(db, queryEmbedding, k) {
|
|
|
708
1035
|
// Fallback: JS-based cosine similarity over BLOB table
|
|
709
1036
|
return searchBlobVec(db, queryEmbedding, k);
|
|
710
1037
|
}
|
|
1038
|
+
/**
|
|
1039
|
+
* Return the k nearest neighbours of an already-indexed entry using its
|
|
1040
|
+
* persisted embedding — no re-embedding, no network. Decodes the stored BLOB by
|
|
1041
|
+
* byte length (dim = bytes / 4) and reuses searchVec (sqlite-vec fast path or
|
|
1042
|
+
* JS-cosine fallback). Returns [] when the entry has no stored embedding or the
|
|
1043
|
+
* BLOB is corrupt. The query entry itself is typically returned with distance
|
|
1044
|
+
* ~0 — callers should filter it out by id.
|
|
1045
|
+
*/
|
|
1046
|
+
export function getNeighborsByEntryId(db, id, k) {
|
|
1047
|
+
const row = db.prepare("SELECT embedding FROM embeddings WHERE id = ?").get(id);
|
|
1048
|
+
if (!row)
|
|
1049
|
+
return [];
|
|
1050
|
+
const queryEmbedding = bufferToFloat32(row.embedding, Math.floor(row.embedding.byteLength / 4));
|
|
1051
|
+
if (!queryEmbedding)
|
|
1052
|
+
return [];
|
|
1053
|
+
return searchVec(db, queryEmbedding, k);
|
|
1054
|
+
}
|
|
711
1055
|
function float32Buffer(vec) {
|
|
712
1056
|
const f32 = new Float32Array(vec);
|
|
713
1057
|
return Buffer.from(f32.buffer);
|
|
@@ -814,7 +1158,7 @@ function runFtsQuery(db, ftsQuery, limit, entryType) {
|
|
|
814
1158
|
JOIN entries e ON e.id = f.entry_id
|
|
815
1159
|
WHERE entries_fts MATCH ?
|
|
816
1160
|
AND e.entry_type = ?
|
|
817
|
-
ORDER BY bm25Score
|
|
1161
|
+
ORDER BY bm25Score, e.id ASC
|
|
818
1162
|
LIMIT ?
|
|
819
1163
|
`;
|
|
820
1164
|
params = [ftsQuery, entryType, limit];
|
|
@@ -826,7 +1170,7 @@ function runFtsQuery(db, ftsQuery, limit, entryType) {
|
|
|
826
1170
|
FROM entries_fts f
|
|
827
1171
|
JOIN entries e ON e.id = f.entry_id
|
|
828
1172
|
WHERE entries_fts MATCH ?
|
|
829
|
-
ORDER BY bm25Score
|
|
1173
|
+
ORDER BY bm25Score, e.id ASC
|
|
830
1174
|
LIMIT ?
|
|
831
1175
|
`;
|
|
832
1176
|
params = [ftsQuery, limit];
|
|
@@ -875,21 +1219,7 @@ export function sanitizeFtsQuery(query) {
|
|
|
875
1219
|
// contain ALL terms.
|
|
876
1220
|
return tokens.join(" ");
|
|
877
1221
|
}
|
|
878
|
-
|
|
879
|
-
export function getAllEntries(db, entryType) {
|
|
880
|
-
let sql;
|
|
881
|
-
let params;
|
|
882
|
-
if (entryType && entryType !== "any") {
|
|
883
|
-
sql =
|
|
884
|
-
"SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type = ?";
|
|
885
|
-
params = [entryType];
|
|
886
|
-
}
|
|
887
|
-
else {
|
|
888
|
-
sql = "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries";
|
|
889
|
-
params = [];
|
|
890
|
-
}
|
|
891
|
-
const rows = db.prepare(sql).all(...params);
|
|
892
|
-
// Guard against corrupt JSON — skip the row rather than crashing
|
|
1222
|
+
function parseEntryRows(rows, context) {
|
|
893
1223
|
const entries = [];
|
|
894
1224
|
for (const row of rows) {
|
|
895
1225
|
let entry;
|
|
@@ -897,7 +1227,7 @@ export function getAllEntries(db, entryType) {
|
|
|
897
1227
|
entry = JSON.parse(row.entry_json);
|
|
898
1228
|
}
|
|
899
1229
|
catch {
|
|
900
|
-
warn(`[db]
|
|
1230
|
+
warn(`[db] ${context}: skipping entry id=${row.id} — corrupt entry_json`);
|
|
901
1231
|
continue;
|
|
902
1232
|
}
|
|
903
1233
|
entries.push({
|
|
@@ -912,6 +1242,21 @@ export function getAllEntries(db, entryType) {
|
|
|
912
1242
|
}
|
|
913
1243
|
return entries;
|
|
914
1244
|
}
|
|
1245
|
+
export function getAllEntries(db, entryType) {
|
|
1246
|
+
let sql;
|
|
1247
|
+
let params;
|
|
1248
|
+
if (entryType && entryType !== "any") {
|
|
1249
|
+
sql =
|
|
1250
|
+
"SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE entry_type = ?";
|
|
1251
|
+
params = [entryType];
|
|
1252
|
+
}
|
|
1253
|
+
else {
|
|
1254
|
+
sql = "SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries";
|
|
1255
|
+
params = [];
|
|
1256
|
+
}
|
|
1257
|
+
const rows = db.prepare(sql).all(...params);
|
|
1258
|
+
return parseEntryRows(rows, "getAllEntries");
|
|
1259
|
+
}
|
|
915
1260
|
export function findEntryIdByRef(db, ref) {
|
|
916
1261
|
const parsed = parseAssetRef(ref);
|
|
917
1262
|
const nameVariants = [parsed.name];
|
|
@@ -957,28 +1302,7 @@ export function getEntriesByDir(db, dirPath) {
|
|
|
957
1302
|
const rows = db
|
|
958
1303
|
.prepare("SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE dir_path = ?")
|
|
959
1304
|
.all(dirPath);
|
|
960
|
-
|
|
961
|
-
const entries = [];
|
|
962
|
-
for (const row of rows) {
|
|
963
|
-
let entry;
|
|
964
|
-
try {
|
|
965
|
-
entry = JSON.parse(row.entry_json);
|
|
966
|
-
}
|
|
967
|
-
catch {
|
|
968
|
-
warn(`[db] getEntriesByDir: skipping entry id=${row.id} — corrupt entry_json`);
|
|
969
|
-
continue;
|
|
970
|
-
}
|
|
971
|
-
entries.push({
|
|
972
|
-
id: row.id,
|
|
973
|
-
entryKey: row.entry_key,
|
|
974
|
-
dirPath: row.dir_path,
|
|
975
|
-
filePath: row.file_path,
|
|
976
|
-
stashDir: row.stash_dir,
|
|
977
|
-
entry,
|
|
978
|
-
searchText: row.search_text,
|
|
979
|
-
});
|
|
980
|
-
}
|
|
981
|
-
return entries;
|
|
1305
|
+
return parseEntryRows(rows, "getEntriesByDir");
|
|
982
1306
|
}
|
|
983
1307
|
/**
|
|
984
1308
|
* Get the utility score for an entry, or undefined if none exists.
|
|
@@ -1001,12 +1325,17 @@ export function getUtilityScore(db, entryId) {
|
|
|
1001
1325
|
}
|
|
1002
1326
|
/**
|
|
1003
1327
|
* Batch-load utility scores for multiple entry IDs in a single query.
|
|
1004
|
-
* Returns a
|
|
1328
|
+
* Returns a `{ global, scoped }` pair, both Maps keyed by entry_id.
|
|
1329
|
+
*
|
|
1330
|
+
* When `scopeKey` is provided a second query runs against
|
|
1331
|
+
* `utility_scores_scoped` and the result is returned as `scoped`.
|
|
1332
|
+
* Both maps are always present; `scoped` is empty when `scopeKey` is absent.
|
|
1005
1333
|
*/
|
|
1006
|
-
export function getUtilityScoresByIds(db, ids) {
|
|
1334
|
+
export function getUtilityScoresByIds(db, ids, scopeKey) {
|
|
1335
|
+
const global = new Map();
|
|
1336
|
+
const scoped = new Map();
|
|
1007
1337
|
if (ids.length === 0)
|
|
1008
|
-
return
|
|
1009
|
-
const result = new Map();
|
|
1338
|
+
return { global, scoped };
|
|
1010
1339
|
// Process in chunks to stay within SQLITE_MAX_VARIABLE_NUMBER
|
|
1011
1340
|
for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
|
|
1012
1341
|
const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
@@ -1015,7 +1344,7 @@ export function getUtilityScoresByIds(db, ids) {
|
|
|
1015
1344
|
.prepare(`SELECT entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at FROM utility_scores WHERE entry_id IN (${placeholders})`)
|
|
1016
1345
|
.all(...chunk);
|
|
1017
1346
|
for (const row of rows) {
|
|
1018
|
-
|
|
1347
|
+
global.set(row.entry_id, {
|
|
1019
1348
|
entryId: row.entry_id,
|
|
1020
1349
|
utility: row.utility,
|
|
1021
1350
|
showCount: row.show_count,
|
|
@@ -1025,13 +1354,34 @@ export function getUtilityScoresByIds(db, ids) {
|
|
|
1025
1354
|
updatedAt: row.updated_at,
|
|
1026
1355
|
});
|
|
1027
1356
|
}
|
|
1357
|
+
if (scopeKey) {
|
|
1358
|
+
const scopedRows = db
|
|
1359
|
+
.prepare(`SELECT entry_id, scope_key, utility, last_used_at FROM utility_scores_scoped WHERE scope_key = ? AND entry_id IN (${placeholders})`)
|
|
1360
|
+
.all(scopeKey, ...chunk);
|
|
1361
|
+
for (const row of scopedRows) {
|
|
1362
|
+
scoped.set(row.entry_id, {
|
|
1363
|
+
entryId: row.entry_id,
|
|
1364
|
+
scopeKey: row.scope_key,
|
|
1365
|
+
utility: row.utility,
|
|
1366
|
+
lastUsedAt: row.last_used_at,
|
|
1367
|
+
});
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1028
1370
|
}
|
|
1029
|
-
return
|
|
1371
|
+
return { global, scoped };
|
|
1030
1372
|
}
|
|
1031
1373
|
/**
|
|
1032
1374
|
* Insert or update a utility score for an entry.
|
|
1033
1375
|
*/
|
|
1034
1376
|
export function upsertUtilityScore(db, entryId, data) {
|
|
1377
|
+
// Pre-flight FK guard (mirrors `upsertEmbedding`): when an entry is
|
|
1378
|
+
// deleted between when its id is aggregated from usage_events and when
|
|
1379
|
+
// this INSERT runs, the FK constraint fails and rolls back the entire
|
|
1380
|
+
// finalize transaction. A cheap SELECT here turns the race into a
|
|
1381
|
+
// clean skip. Returns false when the entry no longer exists.
|
|
1382
|
+
const exists = db.prepare("SELECT 1 FROM entries WHERE id = ?").get(entryId);
|
|
1383
|
+
if (!exists)
|
|
1384
|
+
return false;
|
|
1035
1385
|
db.prepare(`
|
|
1036
1386
|
INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
|
|
1037
1387
|
VALUES (?, ?, ?, ?, ?, ?, datetime('now'))
|
|
@@ -1043,6 +1393,7 @@ export function upsertUtilityScore(db, entryId, data) {
|
|
|
1043
1393
|
last_used_at = excluded.last_used_at,
|
|
1044
1394
|
updated_at = datetime('now')
|
|
1045
1395
|
`).run(entryId, data.utility, data.showCount, data.searchCount, data.selectRate, data.lastUsedAt ?? null);
|
|
1396
|
+
return true;
|
|
1046
1397
|
}
|
|
1047
1398
|
/**
|
|
1048
1399
|
* Look up a cached LLM result for the given asset_ref.
|
|
@@ -1052,10 +1403,10 @@ export function upsertUtilityScore(db, entryId, data) {
|
|
|
1052
1403
|
* cached). In both cases the caller should invoke the LLM and write a new
|
|
1053
1404
|
* cache entry.
|
|
1054
1405
|
*/
|
|
1055
|
-
export function getLlmCacheEntry(db, assetRef, currentBodyHash) {
|
|
1406
|
+
export function getLlmCacheEntry(db, assetRef, currentBodyHash, cacheVariant = "") {
|
|
1056
1407
|
const row = db
|
|
1057
|
-
.prepare("SELECT asset_ref, body_hash, result_json, updated_at FROM llm_enrichment_cache WHERE asset_ref = ?")
|
|
1058
|
-
.get(assetRef);
|
|
1408
|
+
.prepare("SELECT asset_ref, cache_variant, body_hash, result_json, updated_at FROM llm_enrichment_cache WHERE asset_ref = ? AND cache_variant = ?")
|
|
1409
|
+
.get(assetRef, cacheVariant);
|
|
1059
1410
|
if (!row)
|
|
1060
1411
|
return undefined;
|
|
1061
1412
|
// Hash mismatch → body changed, treat as cache miss.
|
|
@@ -1063,21 +1414,54 @@ export function getLlmCacheEntry(db, assetRef, currentBodyHash) {
|
|
|
1063
1414
|
return undefined;
|
|
1064
1415
|
return {
|
|
1065
1416
|
assetRef: row.asset_ref,
|
|
1417
|
+
cacheVariant: row.cache_variant,
|
|
1066
1418
|
bodyHash: row.body_hash,
|
|
1067
1419
|
resultJson: row.result_json,
|
|
1068
1420
|
updatedAt: row.updated_at,
|
|
1069
1421
|
};
|
|
1070
1422
|
}
|
|
1423
|
+
/**
|
|
1424
|
+
* Batched variant of {@link getLlmCacheEntry}. Fetches every cache row whose
|
|
1425
|
+
* `asset_ref` is in `refs` with a single `IN (...)` query (chunked to respect
|
|
1426
|
+
* SQLITE_MAX_VARIABLE_NUMBER), returning a `Map<assetRef, LlmCacheEntry>`.
|
|
1427
|
+
*
|
|
1428
|
+
* Unlike `getLlmCacheEntry`, this does NOT filter by body hash — callers must
|
|
1429
|
+
* compare `entry.bodyHash` against the current body hash themselves. This lets
|
|
1430
|
+
* the batch path issue one DB query per chunk instead of one per file.
|
|
1431
|
+
*/
|
|
1432
|
+
export function getLlmCacheEntriesByRefs(db, refs, cacheVariant = "") {
|
|
1433
|
+
const result = new Map();
|
|
1434
|
+
if (refs.length === 0)
|
|
1435
|
+
return result;
|
|
1436
|
+
for (let i = 0; i < refs.length; i += SQLITE_CHUNK_SIZE) {
|
|
1437
|
+
const chunk = refs.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
1438
|
+
const placeholders = chunk.map(() => "?").join(", ");
|
|
1439
|
+
const rows = db
|
|
1440
|
+
.prepare(`SELECT asset_ref, cache_variant, body_hash, result_json, updated_at FROM llm_enrichment_cache
|
|
1441
|
+
WHERE cache_variant = ? AND asset_ref IN (${placeholders})`)
|
|
1442
|
+
.all(cacheVariant, ...chunk);
|
|
1443
|
+
for (const row of rows) {
|
|
1444
|
+
result.set(row.asset_ref, {
|
|
1445
|
+
assetRef: row.asset_ref,
|
|
1446
|
+
cacheVariant: row.cache_variant,
|
|
1447
|
+
bodyHash: row.body_hash,
|
|
1448
|
+
resultJson: row.result_json,
|
|
1449
|
+
updatedAt: row.updated_at,
|
|
1450
|
+
});
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1453
|
+
return result;
|
|
1454
|
+
}
|
|
1071
1455
|
/**
|
|
1072
1456
|
* Insert or update a cached LLM result for the given asset_ref.
|
|
1073
1457
|
*/
|
|
1074
|
-
export function upsertLlmCacheEntry(db, assetRef, bodyHash, resultJson) {
|
|
1075
|
-
db.prepare(`INSERT INTO llm_enrichment_cache (asset_ref, body_hash, result_json, updated_at)
|
|
1076
|
-
VALUES (?, ?, ?, ?)
|
|
1077
|
-
ON CONFLICT(asset_ref) DO UPDATE SET
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1458
|
+
export function upsertLlmCacheEntry(db, assetRef, bodyHash, resultJson, cacheVariant = "") {
|
|
1459
|
+
db.prepare(`INSERT INTO llm_enrichment_cache (asset_ref, cache_variant, body_hash, result_json, updated_at)
|
|
1460
|
+
VALUES (?, ?, ?, ?, ?)
|
|
1461
|
+
ON CONFLICT(asset_ref, cache_variant) DO UPDATE SET
|
|
1462
|
+
body_hash = excluded.body_hash,
|
|
1463
|
+
result_json = excluded.result_json,
|
|
1464
|
+
updated_at = excluded.updated_at`).run(assetRef, cacheVariant, bodyHash, resultJson, Date.now());
|
|
1081
1465
|
}
|
|
1082
1466
|
/**
|
|
1083
1467
|
* Delete LLM cache entries whose asset_ref is no longer present in the
|
|
@@ -1145,26 +1529,55 @@ export function getRetrievalCounts(db, refs) {
|
|
|
1145
1529
|
* The indexer (`akm index`) will overwrite these values at next reindex run;
|
|
1146
1530
|
* bumps are intentionally temporary hints between index runs, not permanent
|
|
1147
1531
|
* overrides.
|
|
1532
|
+
*
|
|
1533
|
+
* When `scopeKey` is provided, also writes a scoped bump to
|
|
1534
|
+
* `utility_scores_scoped` so per-project usage signals accumulate alongside
|
|
1535
|
+
* the global ones. The global table is always updated regardless.
|
|
1148
1536
|
*/
|
|
1149
|
-
export function bumpUtilityScoresBatch(db, entryIds, reward, lr = 0.1) {
|
|
1537
|
+
export function bumpUtilityScoresBatch(db, entryIds, reward, lr = 0.1, scopeKey) {
|
|
1150
1538
|
if (entryIds.length === 0)
|
|
1151
1539
|
return;
|
|
1152
1540
|
db.transaction(() => {
|
|
1153
|
-
const scoreMap = getUtilityScoresByIds(db, entryIds);
|
|
1541
|
+
const { global: scoreMap } = getUtilityScoresByIds(db, entryIds);
|
|
1154
1542
|
const now = new Date().toISOString();
|
|
1543
|
+
const nowMs = Date.now();
|
|
1155
1544
|
const stmt = db.prepare(`INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
|
|
1156
1545
|
VALUES (?, ?, 0, 0, 0, ?, ?)
|
|
1157
1546
|
ON CONFLICT(entry_id) DO UPDATE SET
|
|
1158
1547
|
utility = excluded.utility,
|
|
1159
1548
|
updated_at = excluded.updated_at`);
|
|
1549
|
+
// Prepare scoped upsert once outside the loop when scopeKey is present.
|
|
1550
|
+
const scopedStmt = scopeKey
|
|
1551
|
+
? db.prepare(`INSERT INTO utility_scores_scoped (entry_id, scope_key, utility, last_used_at)
|
|
1552
|
+
VALUES (?, ?, ?, ?)
|
|
1553
|
+
ON CONFLICT(entry_id, scope_key) DO UPDATE SET
|
|
1554
|
+
utility = excluded.utility,
|
|
1555
|
+
last_used_at = excluded.last_used_at`)
|
|
1556
|
+
: null;
|
|
1160
1557
|
for (const entryId of entryIds) {
|
|
1161
1558
|
const existing = scoreMap.get(entryId);
|
|
1162
1559
|
const current = existing?.utility ?? 0;
|
|
1163
1560
|
const next = Math.max(0, Math.min(1, current + lr * (reward - current)));
|
|
1164
1561
|
stmt.run(entryId, next, now, now);
|
|
1562
|
+
if (scopedStmt && scopeKey) {
|
|
1563
|
+
// Retrieve the current scoped utility so we can apply the same EMA.
|
|
1564
|
+
const scopedCurrent = getScopedUtility(db, entryId, scopeKey);
|
|
1565
|
+
const scopedNext = Math.max(0, Math.min(1, scopedCurrent + lr * (reward - scopedCurrent)));
|
|
1566
|
+
scopedStmt.run(entryId, scopeKey, scopedNext, nowMs);
|
|
1567
|
+
}
|
|
1165
1568
|
}
|
|
1166
1569
|
})();
|
|
1167
1570
|
}
|
|
1571
|
+
/**
|
|
1572
|
+
* Return the current utility value for a single (entry_id, scope_key) pair.
|
|
1573
|
+
* Returns 0 when no row exists yet.
|
|
1574
|
+
*/
|
|
1575
|
+
function getScopedUtility(db, entryId, scopeKey) {
|
|
1576
|
+
const row = db
|
|
1577
|
+
.prepare("SELECT utility FROM utility_scores_scoped WHERE entry_id = ? AND scope_key = ?")
|
|
1578
|
+
.get(entryId, scopeKey);
|
|
1579
|
+
return row?.utility ?? 0;
|
|
1580
|
+
}
|
|
1168
1581
|
// ── Indexer-phase helpers (moved from indexer.ts) ────────────────────────────
|
|
1169
1582
|
/**
|
|
1170
1583
|
* Return all entries that do not yet have an embedding row.
|
|
@@ -1238,31 +1651,97 @@ export function getZeroResultSearches(db, sinceDays = 30) {
|
|
|
1238
1651
|
* Returns null when no matching row is found.
|
|
1239
1652
|
*/
|
|
1240
1653
|
export function getEntryByRef(db, type, name) {
|
|
1241
|
-
return db
|
|
1242
|
-
.prepare("SELECT id FROM entries WHERE entry_type = ? AND entry_key LIKE ?")
|
|
1243
|
-
.get(type, `%${type}:${name}`);
|
|
1654
|
+
return db.prepare("SELECT id FROM entries WHERE entry_type = ? AND entry_key = ?").get(type, `${type}:${name}`);
|
|
1244
1655
|
}
|
|
1245
1656
|
/**
|
|
1246
|
-
*
|
|
1657
|
+
* MemRL learning rate for feedback-driven utility updates (F-5 / #386).
|
|
1658
|
+
*
|
|
1659
|
+
* Follows the bounded-step formula from MemRL (arXiv:2601.03192):
|
|
1660
|
+
* next = clamp(current + lr × (reward − current), 0, 1)
|
|
1661
|
+
*
|
|
1662
|
+
* This replaces the unbounded `-0.03 × negativeCount` delta that could
|
|
1663
|
+
* silently remove high-utility assets from the improvement loop.
|
|
1664
|
+
*/
|
|
1665
|
+
const FEEDBACK_LR = 0.1;
|
|
1666
|
+
/**
|
|
1667
|
+
* Positive reward signal for a single positive feedback event.
|
|
1668
|
+
* Reward 1.0 means "fully correct / helpful".
|
|
1669
|
+
*/
|
|
1670
|
+
const FEEDBACK_REWARD_POSITIVE = 1.0;
|
|
1671
|
+
/**
|
|
1672
|
+
* Negative reward signal for a single negative feedback event.
|
|
1673
|
+
* Reward 0.0 means "not helpful" (lowest MemRL signal).
|
|
1674
|
+
*/
|
|
1675
|
+
const FEEDBACK_REWARD_NEGATIVE = 0.0;
|
|
1676
|
+
/**
|
|
1677
|
+
* Maximum total negative utility delta allowed in a single
|
|
1678
|
+
* `applyFeedbackToUtilityScore` call regardless of negativeCount.
|
|
1247
1679
|
*
|
|
1248
|
-
*
|
|
1249
|
-
*
|
|
1250
|
-
*
|
|
1251
|
-
*
|
|
1252
|
-
|
|
1680
|
+
* This caps the per-day negative impact (the function is called once per
|
|
1681
|
+
* feedback event — spamming 10 negatives in one session can move utility
|
|
1682
|
+
* at most `MAX_NEG_DELTA_PER_CALL`). The cap prevents a noisy negative-
|
|
1683
|
+
* feedback stream from silently destroying a high-utility asset's ranking.
|
|
1684
|
+
*/
|
|
1685
|
+
const MAX_NEG_DELTA_PER_CALL = 0.15;
|
|
1686
|
+
/**
|
|
1687
|
+
* Utility threshold below which a review-needed escalation is triggered.
|
|
1688
|
+
* When a previously high-utility asset (≥ HIGH_UTILITY_THRESHOLD) drops
|
|
1689
|
+
* below this value, the caller should create an escalation proposal.
|
|
1690
|
+
*/
|
|
1691
|
+
export const UTILITY_REVIEW_THRESHOLD = 0.5;
|
|
1692
|
+
/**
|
|
1693
|
+
* Utility level considered "high" — assets above this are tracked for
|
|
1694
|
+
* threshold-crossing escalation.
|
|
1695
|
+
*/
|
|
1696
|
+
export const HIGH_UTILITY_THRESHOLD = 0.5;
|
|
1697
|
+
/**
|
|
1698
|
+
* Apply accumulated feedback counts to the utility score of an entry using the
|
|
1699
|
+
* MemRL bounded-step EMA formula (F-5 / #386, arXiv:2601.03192).
|
|
1700
|
+
*
|
|
1701
|
+
* Replaces the previous unbounded `-0.03 × negativeCount` formula with:
|
|
1702
|
+
*
|
|
1703
|
+
* reward = weighted average of positive and negative signals
|
|
1704
|
+
* nextUtil = clamp(currentUtil + lr × (reward − currentUtil), 0, 1)
|
|
1705
|
+
*
|
|
1706
|
+
* The negative impact is additionally capped at {@link MAX_NEG_DELTA_PER_CALL}
|
|
1707
|
+
* to prevent a noisy feedback stream from silently erasing a high-utility asset.
|
|
1708
|
+
*
|
|
1709
|
+
* A new entry starts at 0.5 (neutral midpoint) before the EMA step is applied.
|
|
1710
|
+
*
|
|
1711
|
+
* Returns a {@link FeedbackUtilityResult} so the caller can detect when a
|
|
1712
|
+
* previously high-utility asset crosses below the review threshold and create
|
|
1713
|
+
* an escalation proposal.
|
|
1253
1714
|
*/
|
|
1254
1715
|
export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negativeCount) {
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1716
|
+
const existing = getUtilityScore(db, entryId);
|
|
1717
|
+
const previousUtility = existing?.utility ?? 0.5;
|
|
1718
|
+
if (positiveCount === 0 && negativeCount === 0) {
|
|
1719
|
+
return { previousUtility, nextUtility: previousUtility, crossedReviewThreshold: false };
|
|
1720
|
+
}
|
|
1721
|
+
const total = positiveCount + negativeCount;
|
|
1722
|
+
// Weighted reward: proportion of positive signals.
|
|
1723
|
+
const reward = positiveCount > 0 && negativeCount === 0
|
|
1724
|
+
? FEEDBACK_REWARD_POSITIVE
|
|
1725
|
+
: negativeCount > 0 && positiveCount === 0
|
|
1726
|
+
? FEEDBACK_REWARD_NEGATIVE
|
|
1727
|
+
: (positiveCount * FEEDBACK_REWARD_POSITIVE + negativeCount * FEEDBACK_REWARD_NEGATIVE) / total;
|
|
1728
|
+
// MemRL bounded-step EMA: lr × (reward − current)
|
|
1729
|
+
let delta = FEEDBACK_LR * (reward - previousUtility);
|
|
1730
|
+
// Per-call negative cap: if delta is negative (net negative feedback), cap it.
|
|
1731
|
+
if (delta < 0) {
|
|
1732
|
+
delta = Math.max(delta, -MAX_NEG_DELTA_PER_CALL);
|
|
1733
|
+
}
|
|
1734
|
+
const nextUtility = Math.max(0, Math.min(1, previousUtility + delta));
|
|
1258
1735
|
const now = new Date().toISOString();
|
|
1259
1736
|
db.prepare(`
|
|
1260
1737
|
INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
|
|
1261
|
-
VALUES (?,
|
|
1738
|
+
VALUES (?, ?, 0, 0, 0, ?, ?)
|
|
1262
1739
|
ON CONFLICT(entry_id) DO UPDATE SET
|
|
1263
|
-
utility =
|
|
1740
|
+
utility = ?,
|
|
1264
1741
|
updated_at = ?
|
|
1265
|
-
`).run(entryId,
|
|
1742
|
+
`).run(entryId, nextUtility, now, now, nextUtility, now);
|
|
1743
|
+
const crossedReviewThreshold = previousUtility >= HIGH_UTILITY_THRESHOLD && nextUtility < UTILITY_REVIEW_THRESHOLD;
|
|
1744
|
+
return { previousUtility, nextUtility, crossedReviewThreshold };
|
|
1266
1745
|
}
|
|
1267
1746
|
/**
|
|
1268
1747
|
* Re-link detached usage_events to their current entry_ids via entry_ref.
|
|
@@ -1273,6 +1752,22 @@ export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negative
|
|
|
1273
1752
|
*/
|
|
1274
1753
|
export function relinkUsageEvents(db) {
|
|
1275
1754
|
try {
|
|
1755
|
+
// Step 1: null out stale entry_ids (entry was deleted, re-keyed, etc).
|
|
1756
|
+
// Leaving them in place would let `recomputeUtilityScores` aggregate
|
|
1757
|
+
// by an entry_id that no longer exists in `entries`, then trip the FK
|
|
1758
|
+
// constraint on the utility_scores INSERT and roll back the entire
|
|
1759
|
+
// finalize transaction. Nulled rows can be re-resolved by step 2 below;
|
|
1760
|
+
// events whose entry is permanently gone simply stay null and age out
|
|
1761
|
+
// via the 90-day retention policy.
|
|
1762
|
+
db.exec(`
|
|
1763
|
+
UPDATE usage_events
|
|
1764
|
+
SET entry_id = NULL
|
|
1765
|
+
WHERE entry_id IS NOT NULL
|
|
1766
|
+
AND entry_id NOT IN (SELECT id FROM entries)
|
|
1767
|
+
`);
|
|
1768
|
+
// Step 2: re-resolve any null entry_id from entry_ref against the
|
|
1769
|
+
// current entries table. Picks up entries that were re-created with
|
|
1770
|
+
// the same ref (e.g. an asset moved between sources).
|
|
1276
1771
|
db.exec(`
|
|
1277
1772
|
UPDATE usage_events SET entry_id = (
|
|
1278
1773
|
SELECT e.id FROM entries e
|