akm-cli 0.9.0-beta.53 → 0.9.0-beta.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/clack.js +56 -0
- package/dist/cli/confirm.js +1 -1
- package/dist/cli.js +5 -3
- package/dist/commands/agent/contribute-cli.js +2 -3
- package/dist/commands/env/env-cli.js +187 -202
- package/dist/commands/env/secret-cli.js +109 -121
- package/dist/commands/feedback-cli.js +152 -155
- package/dist/commands/health/advisories.js +151 -0
- package/dist/commands/health/html-report.js +33 -10
- package/dist/commands/health/improve-metrics.js +754 -0
- package/dist/commands/health/llm-usage.js +65 -0
- package/dist/commands/health/md-report.js +103 -0
- package/dist/commands/health/metrics.js +278 -0
- package/dist/commands/health/task-runs.js +135 -0
- package/dist/commands/health/types.js +18 -0
- package/dist/commands/health/windows.js +196 -0
- package/dist/commands/health.js +15 -1492
- package/dist/commands/improve/anti-collapse.js +170 -0
- package/dist/commands/improve/collapse-detector.js +3 -2
- package/dist/commands/improve/consolidate.js +636 -633
- package/dist/commands/improve/dedup.js +1 -1
- package/dist/commands/improve/distill/content-repair.js +202 -0
- package/dist/commands/improve/distill/promote-memory.js +228 -0
- package/dist/commands/improve/distill/quality-gate.js +233 -0
- package/dist/commands/improve/distill-guards.js +127 -0
- package/dist/commands/improve/distill.js +49 -575
- package/dist/commands/improve/extract-cli.js +74 -76
- package/dist/commands/improve/extract.js +6 -4
- package/dist/commands/improve/hot-probation.js +45 -0
- package/dist/commands/improve/improve-auto-accept.js +3 -2
- package/dist/commands/improve/improve-cli.js +14 -13
- package/dist/commands/improve/improve-result-file.js +2 -1
- package/dist/commands/improve/improve.js +6 -5
- package/dist/commands/improve/loop-stages.js +19 -21
- package/dist/commands/improve/outcome-loop.js +18 -16
- package/dist/commands/improve/preparation.js +23 -5
- package/dist/commands/improve/procedural.js +10 -31
- package/dist/commands/improve/recombine.js +19 -43
- package/dist/commands/improve/reflect.js +1 -1
- package/dist/commands/improve/schema-similarity-gate.js +168 -0
- package/dist/commands/improve/shared.js +48 -0
- package/dist/commands/observability-cli.js +4 -4
- package/dist/commands/proposal/drain-policies.js +2 -2
- package/dist/commands/proposal/drain.js +1 -1
- package/dist/commands/proposal/legacy-import.js +115 -0
- package/dist/commands/proposal/proposal-cli.js +3 -3
- package/dist/commands/proposal/proposal.js +2 -1
- package/dist/commands/proposal/propose.js +1 -1
- package/dist/commands/proposal/repository.js +829 -0
- package/dist/commands/proposal/validators/proposals.js +5 -920
- package/dist/commands/read/curate.js +4 -4
- package/dist/commands/read/remember-cli.js +132 -137
- package/dist/commands/read/search-cli.js +7 -5
- package/dist/commands/read/search.js +7 -3
- package/dist/commands/read/show.js +3 -5
- package/dist/commands/registry-cli.js +76 -87
- package/dist/commands/sources/add-cli.js +91 -95
- package/dist/commands/sources/history.js +1 -1
- package/dist/commands/sources/init.js +12 -0
- package/dist/commands/sources/schema-repair.js +1 -1
- package/dist/commands/sources/sources-cli.js +3 -3
- package/dist/commands/sources/stash-cli.js +2 -2
- package/dist/commands/tasks/default-tasks.js +12 -0
- package/dist/commands/tasks/tasks-cli.js +1 -2
- package/dist/commands/wiki-cli.js +2 -3
- package/dist/core/common.js +3 -3
- package/dist/core/config/config-schema.js +6 -0
- package/dist/core/config/config.js +12 -0
- package/dist/core/deep-merge.js +38 -0
- package/dist/core/events.js +2 -1
- package/dist/core/logs-db.js +8 -13
- package/dist/core/paths.js +14 -14
- package/dist/core/state-db.js +13 -1140
- package/dist/core/warn.js +21 -0
- package/dist/indexer/db/db.js +72 -709
- package/dist/indexer/db/entry-mapper.js +41 -0
- package/dist/indexer/db/schema.js +516 -0
- package/dist/indexer/ensure-index.js +3 -2
- package/dist/indexer/feedback/utility-policy.js +85 -0
- package/dist/indexer/graph/graph-extraction.js +2 -1
- package/dist/indexer/index-writer-lock.js +18 -0
- package/dist/indexer/indexer.js +94 -27
- package/dist/indexer/read-preflight.js +23 -0
- package/dist/indexer/search/fts-query.js +51 -0
- package/dist/indexer/walk/walker.js +21 -13
- package/dist/integrations/agent/detect.js +9 -0
- package/dist/integrations/agent/index.js +1 -1
- package/dist/integrations/agent/spawn.js +15 -66
- package/dist/llm/client.js +12 -0
- package/dist/llm/embedder.js +26 -2
- package/dist/llm/embedders/local.js +7 -1
- package/dist/output/text/helpers.js +13 -0
- package/dist/scripts/migrate-storage.js +6903 -7424
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +49 -44
- package/dist/setup/detect.js +9 -0
- package/dist/setup/legacy-config.js +106 -0
- package/dist/setup/prompt.js +57 -0
- package/dist/setup/providers.js +14 -0
- package/dist/setup/registry-stash-loader.js +12 -0
- package/dist/setup/semantic-assets.js +124 -0
- package/dist/setup/setup.js +25 -1608
- package/dist/setup/steps/connection.js +734 -0
- package/dist/setup/steps/output.js +31 -0
- package/dist/setup/steps/platforms.js +124 -0
- package/dist/setup/steps/semantic.js +27 -0
- package/dist/setup/steps/sources.js +222 -0
- package/dist/setup/steps/stashdir.js +42 -0
- package/dist/setup/steps/tasks.js +152 -0
- package/dist/storage/repositories/canaries-repository.js +107 -0
- package/dist/storage/repositories/consolidation-repository.js +38 -0
- package/dist/storage/repositories/embeddings-repository.js +72 -0
- package/dist/storage/repositories/events-repository.js +187 -0
- package/dist/storage/repositories/extract-sessions-repository.js +96 -0
- package/dist/storage/repositories/improve-runs-repository.js +130 -0
- package/dist/storage/repositories/index-db.js +4 -7
- package/dist/storage/repositories/proposals-repository.js +220 -0
- package/dist/storage/repositories/recombine-repository.js +213 -0
- package/dist/storage/repositories/task-history-repository.js +93 -0
- package/dist/storage/sqlite-pragmas.js +3 -3
- package/dist/tasks/backends/index.js +9 -0
- package/dist/tasks/runner.js +11 -1
- package/package.json +2 -2
- package/dist/commands/improve/homeostatic.js +0 -497
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* Shared `entries`-row projection + mapper, extracted from indexer/db/db.ts.
|
|
6
|
+
*
|
|
7
|
+
* Centralizes the one canonical `entries` SELECT column list and the
|
|
8
|
+
* JSON-parse-guarded row → {@link DbIndexedEntry} mapping that several queries
|
|
9
|
+
* used to reimplement. Corrupt `entry_json` rows are skipped (warn once) rather
|
|
10
|
+
* than crashing the caller.
|
|
11
|
+
*/
|
|
12
|
+
import { warn } from "../../core/warn.js";
|
|
13
|
+
/**
|
|
14
|
+
* Canonical column list for reading a full indexed entry from the `entries`
|
|
15
|
+
* table, in the order {@link rowToIndexedEntry} expects.
|
|
16
|
+
*/
|
|
17
|
+
export const ENTRY_COLUMNS = "id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text";
|
|
18
|
+
/**
|
|
19
|
+
* Map one raw `entries` row to a {@link DbIndexedEntry}, parsing `entry_json`.
|
|
20
|
+
* Returns `null` (and warns, tagged with `context`) when the JSON is corrupt so
|
|
21
|
+
* callers can skip the row instead of crashing.
|
|
22
|
+
*/
|
|
23
|
+
export function rowToIndexedEntry(row, context) {
|
|
24
|
+
let entry;
|
|
25
|
+
try {
|
|
26
|
+
entry = JSON.parse(row.entry_json);
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
warn(`[db] ${context}: skipping entry id=${row.id} — corrupt entry_json`);
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
return {
|
|
33
|
+
id: row.id,
|
|
34
|
+
entryKey: row.entry_key,
|
|
35
|
+
dirPath: row.dir_path,
|
|
36
|
+
filePath: row.file_path,
|
|
37
|
+
stashDir: row.stash_dir,
|
|
38
|
+
entry,
|
|
39
|
+
searchText: row.search_text,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
@@ -0,0 +1,516 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* index.db schema, version stamps, and targeted migrations — extracted from
|
|
6
|
+
* indexer/db/db.ts. This isolates the one genuinely risky area (schema
|
|
7
|
+
* evolution) from the CRUD/FTS/vector queries that stay in db.ts.
|
|
8
|
+
*
|
|
9
|
+
* The meta accessors, embedding purge, and vec-availability probe that
|
|
10
|
+
* `ensureSchema` leans on remain in db.ts (they are part of the database
|
|
11
|
+
* lifecycle) and are imported back here.
|
|
12
|
+
*/
|
|
13
|
+
import { bestEffort } from "../../core/best-effort.js";
|
|
14
|
+
import { warn } from "../../core/warn.js";
|
|
15
|
+
import { ensureUsageEventsSchema } from "../usage/usage-events.js";
|
|
16
|
+
import { getMeta, isVecAvailable, purgeEmbeddings, setMeta } from "./db.js";
|
|
17
|
+
// ── Constants ───────────────────────────────────────────────────────────────
|
|
18
|
+
// NOTE: schema changes are additive. DB_VERSION is a forensic stamp only — it
|
|
19
|
+
// no longer gates any destructive path (the old nuclear drop-and-rebuild was
|
|
20
|
+
// removed; index.db's idempotent CREATE … IF NOT EXISTS schema converges any
|
|
21
|
+
// older/partial DB forward without dropping data). Graph re-keying uses a
|
|
22
|
+
// TARGETED, graph-only migration (migrateGraphFilesSchema) — the model for any
|
|
23
|
+
// incompatible change: migrate in place, never wipe the whole index.
|
|
24
|
+
export const DB_VERSION = 17;
|
|
25
|
+
export const EMBEDDING_DIM = 384;
|
|
26
|
+
// #624-P1: graph_files re-keyed to (stash_root, file_path, body_hash). Bumped 3→4
|
|
27
|
+
// as a marker; the actual migration is the targeted drop in migrateGraphFilesSchema.
|
|
28
|
+
export const GRAPH_SCHEMA_VERSION = 4;
|
|
29
|
+
// ── Schema ──────────────────────────────────────────────────────────────────
|
|
30
|
+
/**
|
|
31
|
+
* DDL for the `registry_index_cache` table. This table lives in index.db
|
|
32
|
+
* (managed by this module), so its DDL belongs here next to the `ensureSchema`
|
|
33
|
+
* that applies it — not in state-db.ts.
|
|
34
|
+
*
|
|
35
|
+
* Created with CREATE TABLE IF NOT EXISTS so it is safe to call inside
|
|
36
|
+
* `ensureSchema()`. Caches the result of resolving and fetching remote registry
|
|
37
|
+
* stash indexes so `akm search` does not hit the network on every invocation.
|
|
38
|
+
*
|
|
39
|
+
* Indexed (query) columns:
|
|
40
|
+
* registry_url TEXT PK — canonical URL of the registry; cache key.
|
|
41
|
+
* fetched_at TEXT — ISO-8601; used to detect stale entries (TTL).
|
|
42
|
+
* etag TEXT — HTTP ETag for conditional GET (If-None-Match).
|
|
43
|
+
* last_modified TEXT — HTTP Last-Modified for conditional GET.
|
|
44
|
+
*
|
|
45
|
+
* Non-indexed payload:
|
|
46
|
+
* index_json TEXT — JSON blob of the fetched registry index document.
|
|
47
|
+
*
|
|
48
|
+
* ADD COLUMN extension points (future migrations):
|
|
49
|
+
* ALTER TABLE registry_index_cache ADD COLUMN schema_version INTEGER DEFAULT 1;
|
|
50
|
+
* ALTER TABLE registry_index_cache ADD COLUMN kit_count INTEGER DEFAULT NULL;
|
|
51
|
+
* ALTER TABLE registry_index_cache ADD COLUMN error_message TEXT DEFAULT NULL;
|
|
52
|
+
*/
|
|
53
|
+
const REGISTRY_INDEX_CACHE_DDL = `
|
|
54
|
+
CREATE TABLE IF NOT EXISTS registry_index_cache (
|
|
55
|
+
registry_url TEXT PRIMARY KEY,
|
|
56
|
+
fetched_at TEXT NOT NULL,
|
|
57
|
+
etag TEXT,
|
|
58
|
+
last_modified TEXT,
|
|
59
|
+
index_json TEXT NOT NULL DEFAULT '{}'
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
CREATE INDEX IF NOT EXISTS idx_registry_cache_fetched
|
|
63
|
+
ON registry_index_cache(fetched_at);
|
|
64
|
+
`;
|
|
65
|
+
export function ensureSchema(db, embeddingDim) {
|
|
66
|
+
// Create meta table first so we can check version
|
|
67
|
+
db.exec(`
|
|
68
|
+
CREATE TABLE IF NOT EXISTS index_meta (
|
|
69
|
+
key TEXT PRIMARY KEY,
|
|
70
|
+
value TEXT NOT NULL
|
|
71
|
+
);
|
|
72
|
+
`);
|
|
73
|
+
// index.db is a fully regenerable derived cache, so its schema is built
|
|
74
|
+
// idempotently below: every table is CREATE … IF NOT EXISTS and column
|
|
75
|
+
// additions go through guarded ALTERs (ensureDerivedFromColumn) and targeted
|
|
76
|
+
// migrations (migrateGraphFilesSchema / migrateGraphDataFromLegacy). Opening a
|
|
77
|
+
// database with an older or partial schema converges it forward WITHOUT ever
|
|
78
|
+
// dropping data — there is intentionally no "nuclear drop the whole index on a
|
|
79
|
+
// DB_VERSION mismatch" path (a destructive design the regenerable index never
|
|
80
|
+
// needed, and whose pre-drop data-dir backup it required). A genuinely
|
|
81
|
+
// incompatible change is handled by an additive/targeted migration; the few
|
|
82
|
+
// derived tables that ever must be rebuilt are regenerated by `akm index`.
|
|
83
|
+
db.exec(`
|
|
84
|
+
CREATE TABLE IF NOT EXISTS entries (
|
|
85
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
86
|
+
entry_key TEXT NOT NULL UNIQUE,
|
|
87
|
+
dir_path TEXT NOT NULL,
|
|
88
|
+
file_path TEXT NOT NULL,
|
|
89
|
+
stash_dir TEXT NOT NULL,
|
|
90
|
+
entry_json TEXT NOT NULL,
|
|
91
|
+
search_text TEXT NOT NULL,
|
|
92
|
+
entry_type TEXT NOT NULL,
|
|
93
|
+
derived_from TEXT
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
|
|
97
|
+
CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
|
|
98
|
+
CREATE INDEX IF NOT EXISTS idx_entries_file_path ON entries(file_path);
|
|
99
|
+
`);
|
|
100
|
+
// Phase 5A / DB v17: backfill `derived_from` column + index on databases
|
|
101
|
+
// that were created at v17 fresh OR carry a partial v17 schema (a DB whose
|
|
102
|
+
// `index_meta.version` was bumped to 17 but whose `entries` table still
|
|
103
|
+
// lacks the column — this happens when a previous v17 binary opened a
|
|
104
|
+
// pre-v17 DB without taking the upgrade path because no version mismatch
|
|
105
|
+
// was seen at boot). The PRAGMA-then-ALTER guard runs unconditionally so
|
|
106
|
+
// both fresh and partial schemas converge. The CREATE INDEX for
|
|
107
|
+
// `derived_from` MUST run after this helper so we never reference a
|
|
108
|
+
// column that has not yet been added on partial schemas.
|
|
109
|
+
ensureDerivedFromColumn(db);
|
|
110
|
+
// Validated WorkflowDocument JSON, one row per indexed workflow entry.
|
|
111
|
+
// Pure index data — fully rebuilt on each `akm index`. ON DELETE CASCADE
|
|
112
|
+
// means clearing entries (full rebuild or per-dir delete) drops these too.
|
|
113
|
+
db.exec(`
|
|
114
|
+
CREATE TABLE IF NOT EXISTS workflow_documents (
|
|
115
|
+
entry_id INTEGER PRIMARY KEY REFERENCES entries(id) ON DELETE CASCADE,
|
|
116
|
+
schema_version INTEGER NOT NULL,
|
|
117
|
+
document_json TEXT NOT NULL,
|
|
118
|
+
source_path TEXT NOT NULL,
|
|
119
|
+
source_hash TEXT NOT NULL,
|
|
120
|
+
updated_at TEXT NOT NULL
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
CREATE INDEX IF NOT EXISTS idx_workflow_documents_source_path
|
|
124
|
+
ON workflow_documents(source_path);
|
|
125
|
+
`);
|
|
126
|
+
// Set version immediately after table creation so a crash before the end of
|
|
127
|
+
// ensureSchema() does not leave the database in a versionless state on next open.
|
|
128
|
+
const versionAfterCreate = getMeta(db, "version");
|
|
129
|
+
if (!versionAfterCreate) {
|
|
130
|
+
setMeta(db, "version", String(DB_VERSION));
|
|
131
|
+
}
|
|
132
|
+
// BLOB-based embedding storage (always available, no sqlite-vec needed)
|
|
133
|
+
db.exec(`
|
|
134
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
135
|
+
id INTEGER PRIMARY KEY,
|
|
136
|
+
embedding BLOB NOT NULL,
|
|
137
|
+
FOREIGN KEY (id) REFERENCES entries(id)
|
|
138
|
+
);
|
|
139
|
+
`);
|
|
140
|
+
// FTS5 table — multi-column with per-field weighting via bm25()
|
|
141
|
+
const ftsExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_fts'").get();
|
|
142
|
+
if (!ftsExists) {
|
|
143
|
+
db.exec(`
|
|
144
|
+
CREATE VIRTUAL TABLE entries_fts USING fts5(
|
|
145
|
+
entry_id UNINDEXED,
|
|
146
|
+
name,
|
|
147
|
+
description,
|
|
148
|
+
tags,
|
|
149
|
+
hints,
|
|
150
|
+
content,
|
|
151
|
+
tokenize='porter unicode61'
|
|
152
|
+
);
|
|
153
|
+
`);
|
|
154
|
+
}
|
|
155
|
+
// Usage events table — created by ensureUsageEventsSchema() at runtime.
|
|
156
|
+
// Utility scores table (aggregated per-entry utility metrics)
|
|
157
|
+
db.exec(`
|
|
158
|
+
CREATE TABLE IF NOT EXISTS utility_scores (
|
|
159
|
+
entry_id INTEGER PRIMARY KEY,
|
|
160
|
+
utility REAL NOT NULL DEFAULT 0,
|
|
161
|
+
show_count INTEGER NOT NULL DEFAULT 0,
|
|
162
|
+
search_count INTEGER NOT NULL DEFAULT 0,
|
|
163
|
+
select_rate REAL NOT NULL DEFAULT 0,
|
|
164
|
+
last_used_at TEXT,
|
|
165
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
166
|
+
FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
|
|
167
|
+
);
|
|
168
|
+
`);
|
|
169
|
+
// Per-project scoped utility scores — tracks usage per (entry, cwd-anchor)
|
|
170
|
+
// so assets useful in project A don't pollute rankings in project B.
|
|
171
|
+
// The global utility_scores table is preserved as a fallback / cold-start aid.
|
|
172
|
+
db.exec(`
|
|
173
|
+
CREATE TABLE IF NOT EXISTS utility_scores_scoped (
|
|
174
|
+
entry_id INTEGER NOT NULL,
|
|
175
|
+
scope_key TEXT NOT NULL,
|
|
176
|
+
utility REAL NOT NULL DEFAULT 0,
|
|
177
|
+
last_used_at INTEGER NOT NULL,
|
|
178
|
+
PRIMARY KEY (entry_id, scope_key)
|
|
179
|
+
);
|
|
180
|
+
CREATE INDEX IF NOT EXISTS idx_utility_scores_scoped_entry_id
|
|
181
|
+
ON utility_scores_scoped(entry_id);
|
|
182
|
+
`);
|
|
183
|
+
db.exec(`
|
|
184
|
+
CREATE TABLE IF NOT EXISTS index_dir_state (
|
|
185
|
+
dir_path TEXT PRIMARY KEY,
|
|
186
|
+
file_set_hash TEXT NOT NULL,
|
|
187
|
+
file_mtime_max_ms REAL NOT NULL,
|
|
188
|
+
reason TEXT NOT NULL,
|
|
189
|
+
updated_at TEXT NOT NULL
|
|
190
|
+
);
|
|
191
|
+
`);
|
|
192
|
+
// LLM enrichment result cache. Stores a SHA-256 body hash and the JSON
|
|
193
|
+
// result for each asset so that subsequent `akm index --enrich` runs can
|
|
194
|
+
// skip the LLM call when the body hasn't changed. The cache is keyed by
|
|
195
|
+
// a stable asset_ref string (e.g. the absolute file path for graph/memory
|
|
196
|
+
// passes, or `entryKey:passId` for the metadata-enhance pass).
|
|
197
|
+
// Entries are cleaned up when assets are removed or --re-enrich is used.
|
|
198
|
+
db.exec(`
|
|
199
|
+
CREATE TABLE IF NOT EXISTS llm_enrichment_cache (
|
|
200
|
+
asset_ref TEXT NOT NULL,
|
|
201
|
+
cache_variant TEXT NOT NULL,
|
|
202
|
+
body_hash TEXT NOT NULL,
|
|
203
|
+
result_json TEXT NOT NULL,
|
|
204
|
+
updated_at INTEGER NOT NULL,
|
|
205
|
+
PRIMARY KEY (asset_ref, cache_variant)
|
|
206
|
+
);
|
|
207
|
+
|
|
208
|
+
CREATE INDEX IF NOT EXISTS idx_llm_cache_updated
|
|
209
|
+
ON llm_enrichment_cache(updated_at);
|
|
210
|
+
`);
|
|
211
|
+
// Graph extraction tables — schema v4 ((stash_root, file_path, body_hash) PK).
|
|
212
|
+
//
|
|
213
|
+
// graph_files is self-keyed on (stash_root, file_path, body_hash) and is NO
|
|
214
|
+
// LONGER tied to entries.id. This is the #624-P1 win: deleting and
|
|
215
|
+
// re-inserting an entries row during a reindex no longer cascade-wipes the
|
|
216
|
+
// extracted graph — as long as the file's body_hash is unchanged, the graph
|
|
217
|
+
// data survives. body_hash is part of the PK so a content change yields a
|
|
218
|
+
// distinct key; a UNIQUE index on (stash_root, file_path) still enforces
|
|
219
|
+
// exactly one graph_files row per path (delete-then-insert on a hash change).
|
|
220
|
+
//
|
|
221
|
+
// graph_file_entities and graph_file_relations carry (stash_root, file_path,
|
|
222
|
+
// body_hash) and declare a composite FK -> graph_files ON DELETE CASCADE so
|
|
223
|
+
// child rows are removed when a graph_files row is replaced.
|
|
224
|
+
//
|
|
225
|
+
// #624-P1 targeted migration: an existing DB may still hold the OLD graph_files
|
|
226
|
+
// (entry_id PK). SQLite can't ALTER a primary key, so we RENAME the 3 graph
|
|
227
|
+
// tables aside (→ *_legacy) here — ONLY the graph tables, never the index/
|
|
228
|
+
// embeddings — then the CREATE block below builds the new shape, then
|
|
229
|
+
// migrateGraphDataFromLegacy() copies the data across so the graph is PRESERVED
|
|
230
|
+
// (not re-extracted).
|
|
231
|
+
migrateGraphFilesSchema(db);
|
|
232
|
+
db.exec(`
|
|
233
|
+
CREATE TABLE IF NOT EXISTS graph_meta (
|
|
234
|
+
stash_root TEXT PRIMARY KEY,
|
|
235
|
+
schema_version INTEGER NOT NULL,
|
|
236
|
+
generated_at TEXT NOT NULL,
|
|
237
|
+
considered_files INTEGER NOT NULL DEFAULT 0,
|
|
238
|
+
extracted_files INTEGER NOT NULL DEFAULT 0,
|
|
239
|
+
entity_count INTEGER NOT NULL DEFAULT 0,
|
|
240
|
+
relation_count INTEGER NOT NULL DEFAULT 0,
|
|
241
|
+
extraction_coverage REAL NOT NULL DEFAULT 0,
|
|
242
|
+
density REAL NOT NULL DEFAULT 0,
|
|
243
|
+
extractor_id TEXT,
|
|
244
|
+
extraction_run_id TEXT,
|
|
245
|
+
model TEXT,
|
|
246
|
+
prompt_version TEXT,
|
|
247
|
+
batch_size INTEGER,
|
|
248
|
+
cache_hits INTEGER NOT NULL DEFAULT 0,
|
|
249
|
+
cache_misses INTEGER NOT NULL DEFAULT 0,
|
|
250
|
+
truncation_count INTEGER NOT NULL DEFAULT 0,
|
|
251
|
+
failure_count INTEGER NOT NULL DEFAULT 0
|
|
252
|
+
);
|
|
253
|
+
|
|
254
|
+
CREATE TABLE IF NOT EXISTS graph_files (
|
|
255
|
+
stash_root TEXT NOT NULL,
|
|
256
|
+
file_path TEXT NOT NULL,
|
|
257
|
+
file_order INTEGER NOT NULL,
|
|
258
|
+
file_type TEXT NOT NULL,
|
|
259
|
+
body_hash TEXT NOT NULL,
|
|
260
|
+
confidence REAL,
|
|
261
|
+
status TEXT NOT NULL DEFAULT 'extracted',
|
|
262
|
+
reason TEXT,
|
|
263
|
+
extraction_run_id TEXT,
|
|
264
|
+
PRIMARY KEY (stash_root, file_path, body_hash)
|
|
265
|
+
);
|
|
266
|
+
|
|
267
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_files_path
|
|
268
|
+
ON graph_files(stash_root, file_path);
|
|
269
|
+
|
|
270
|
+
CREATE INDEX IF NOT EXISTS idx_graph_files_stash_order
|
|
271
|
+
ON graph_files(stash_root, file_order);
|
|
272
|
+
|
|
273
|
+
CREATE TABLE IF NOT EXISTS graph_file_entities (
|
|
274
|
+
stash_root TEXT NOT NULL,
|
|
275
|
+
file_path TEXT NOT NULL,
|
|
276
|
+
body_hash TEXT NOT NULL,
|
|
277
|
+
entity_order INTEGER NOT NULL,
|
|
278
|
+
entity_norm TEXT NOT NULL,
|
|
279
|
+
entity TEXT NOT NULL,
|
|
280
|
+
PRIMARY KEY (stash_root, file_path, body_hash, entity_order),
|
|
281
|
+
FOREIGN KEY (stash_root, file_path, body_hash)
|
|
282
|
+
REFERENCES graph_files(stash_root, file_path, body_hash) ON DELETE CASCADE
|
|
283
|
+
);
|
|
284
|
+
|
|
285
|
+
CREATE INDEX IF NOT EXISTS idx_graph_file_entities_entity_norm
|
|
286
|
+
ON graph_file_entities(stash_root, entity_norm);
|
|
287
|
+
|
|
288
|
+
CREATE TABLE IF NOT EXISTS graph_file_relations (
|
|
289
|
+
stash_root TEXT NOT NULL,
|
|
290
|
+
file_path TEXT NOT NULL,
|
|
291
|
+
body_hash TEXT NOT NULL,
|
|
292
|
+
relation_order INTEGER NOT NULL,
|
|
293
|
+
from_entity_norm TEXT NOT NULL,
|
|
294
|
+
from_entity TEXT NOT NULL,
|
|
295
|
+
to_entity_norm TEXT NOT NULL,
|
|
296
|
+
to_entity TEXT NOT NULL,
|
|
297
|
+
relation_type TEXT,
|
|
298
|
+
confidence REAL,
|
|
299
|
+
PRIMARY KEY (stash_root, file_path, body_hash, relation_order),
|
|
300
|
+
FOREIGN KEY (stash_root, file_path, body_hash)
|
|
301
|
+
REFERENCES graph_files(stash_root, file_path, body_hash) ON DELETE CASCADE
|
|
302
|
+
);
|
|
303
|
+
|
|
304
|
+
-- #624-P3: lazy graph-extraction queue. Standalone table (NO FK to
|
|
305
|
+
-- graph_files — a queued file by definition has no graph row yet).
|
|
306
|
+
-- Idempotent on (stash_root, file_path); drained highest-priority-first.
|
|
307
|
+
-- CREATE TABLE IF NOT EXISTS is the forward migration (no DB_VERSION bump).
|
|
308
|
+
CREATE TABLE IF NOT EXISTS graph_extraction_queue (
|
|
309
|
+
stash_root TEXT NOT NULL,
|
|
310
|
+
file_path TEXT NOT NULL,
|
|
311
|
+
body_hash TEXT NOT NULL,
|
|
312
|
+
queued_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
313
|
+
priority INTEGER NOT NULL DEFAULT 0,
|
|
314
|
+
PRIMARY KEY (stash_root, file_path)
|
|
315
|
+
);
|
|
316
|
+
|
|
317
|
+
CREATE INDEX IF NOT EXISTS idx_graph_extraction_queue_drain
|
|
318
|
+
ON graph_extraction_queue(stash_root, priority DESC, queued_at);
|
|
319
|
+
`);
|
|
320
|
+
// #624-P1 migration step 2: copy any renamed-aside legacy graph data into the
|
|
321
|
+
// new-shape tables (just created above), then drop the legacy tables. No-op
|
|
322
|
+
// unless migrateGraphFilesSchema renamed a legacy graph_files this open.
|
|
323
|
+
migrateGraphDataFromLegacy(db);
|
|
324
|
+
// FTS-dirty queue. Created here (not lazily on first upsert) so the
|
|
325
|
+
// per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
|
|
326
|
+
// every call — that DDL would fire thousands of times during a full
|
|
327
|
+
// index. See `markFtsDirty` and `rebuildFts({ incremental: true })`.
|
|
328
|
+
db.exec(`
|
|
329
|
+
CREATE TABLE IF NOT EXISTS entries_fts_dirty (
|
|
330
|
+
entry_id INTEGER PRIMARY KEY
|
|
331
|
+
);
|
|
332
|
+
`);
|
|
333
|
+
// sqlite-vec table
|
|
334
|
+
//
|
|
335
|
+
// Dimension contract:
|
|
336
|
+
// - When `embeddingDim` is `undefined`, the caller did NOT request a
|
|
337
|
+
// specific dim. Do not touch `index_meta.embeddingDim` and do not run
|
|
338
|
+
// the dim-change wipe — fall back to the stored dim (or the static
|
|
339
|
+
// default) only when we have to materialise the vec table for the
|
|
340
|
+
// first time. Without this guard, registry-side and other dim-unaware
|
|
341
|
+
// `openDatabase()` callers would silently overwrite the dim-aware
|
|
342
|
+
// improve/index value and oscillate the stored dim.
|
|
343
|
+
// - When `embeddingDim` is a number, the caller explicitly asked for
|
|
344
|
+
// that dim and owns the dim-change/backup/wipe semantics.
|
|
345
|
+
const dimExplicit = embeddingDim !== undefined;
|
|
346
|
+
const effectiveDim = embeddingDim ?? (Number(getMeta(db, "embeddingDim")) || EMBEDDING_DIM);
|
|
347
|
+
if (isVecAvailable(db)) {
|
|
348
|
+
// Check if stored embedding dimension differs from configured one
|
|
349
|
+
if (dimExplicit) {
|
|
350
|
+
const storedDim = getMeta(db, "embeddingDim");
|
|
351
|
+
if (storedDim && storedDim !== String(embeddingDim)) {
|
|
352
|
+
// Stored vectors are incompatible with the new dimension. Drop the vec
|
|
353
|
+
// table so the block below recreates it at the new width; the BLOB rows
|
|
354
|
+
// go too. Regenerable from markdown — re-embedded by the next index.
|
|
355
|
+
purgeEmbeddings(db, { dropVecTable: true });
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
const vecExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_vec'").get();
|
|
359
|
+
if (!vecExists) {
|
|
360
|
+
if (!Number.isInteger(effectiveDim) || effectiveDim <= 0 || effectiveDim > 4096) {
|
|
361
|
+
throw new Error(`Invalid embedding dimension: ${effectiveDim}`);
|
|
362
|
+
}
|
|
363
|
+
db.exec(`
|
|
364
|
+
CREATE VIRTUAL TABLE entries_vec USING vec0(
|
|
365
|
+
id INTEGER PRIMARY KEY,
|
|
366
|
+
embedding FLOAT[${effectiveDim}]
|
|
367
|
+
);
|
|
368
|
+
`);
|
|
369
|
+
}
|
|
370
|
+
if (dimExplicit) {
|
|
371
|
+
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
else {
|
|
375
|
+
// Also purge BLOB embeddings on dimension change (JS fallback path).
|
|
376
|
+
// When sqlite-vec is unavailable, entries_vec doesn't exist but the BLOB
|
|
377
|
+
// embeddings table still stores vectors. If the configured dimension
|
|
378
|
+
// changes, those stored BLOBs become silently incompatible.
|
|
379
|
+
if (dimExplicit) {
|
|
380
|
+
const storedDim = getMeta(db, "embeddingDim");
|
|
381
|
+
if (storedDim && storedDim !== String(embeddingDim)) {
|
|
382
|
+
// JS-fallback path: no vec table, just clear the stale BLOB vectors.
|
|
383
|
+
purgeEmbeddings(db);
|
|
384
|
+
}
|
|
385
|
+
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
// Usage telemetry table
|
|
389
|
+
ensureUsageEventsSchema(db);
|
|
390
|
+
// Registry index cache table — caches remote registry index documents so
|
|
391
|
+
// `akm search` does not hit the network on every invocation.
|
|
392
|
+
db.exec(REGISTRY_INDEX_CACHE_DDL);
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Phase 5A / DB v17 schema guard.
|
|
396
|
+
*
|
|
397
|
+
* Ensures the `entries.derived_from` column + index exist on the open
|
|
398
|
+
* connection. Called from `ensureSchema()` after the entries CREATE so that
|
|
399
|
+
* legacy databases (created against a pre-v17 binary) still gain the new column
|
|
400
|
+
* without data loss. Idempotent: a `PRAGMA table_info` lookup gates the ALTER.
|
|
401
|
+
*/
|
|
402
|
+
function ensureDerivedFromColumn(db) {
|
|
403
|
+
bestEffort(() => {
|
|
404
|
+
const cols = db.prepare("PRAGMA table_info(entries)").all();
|
|
405
|
+
const hasColumn = cols.some((c) => c.name === "derived_from");
|
|
406
|
+
if (!hasColumn) {
|
|
407
|
+
db.exec("ALTER TABLE entries ADD COLUMN derived_from TEXT");
|
|
408
|
+
}
|
|
409
|
+
// Index creation is idempotent on its own; safe to call unconditionally.
|
|
410
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_entries_derived_from ON entries(derived_from)");
|
|
411
|
+
}, "entries table may not exist on a brand-new DB before CREATE — caller is responsible");
|
|
412
|
+
}
|
|
413
|
+
/**
|
|
414
|
+
* Returns true when a table exists in the current database.
|
|
415
|
+
*/
|
|
416
|
+
function tableExists(db, name) {
|
|
417
|
+
const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1").get(name);
|
|
418
|
+
return row !== undefined && row !== null;
|
|
419
|
+
}
|
|
420
|
+
/**
|
|
421
|
+
* #624-P1 targeted graph-schema migration — STEP 1 of 2 (rename).
|
|
422
|
+
*
|
|
423
|
+
* graph_files was re-keyed from `entry_id INTEGER PRIMARY KEY REFERENCES
|
|
424
|
+
* entries(id)` to a self-contained `(stash_root, file_path, body_hash)` PK.
|
|
425
|
+
* SQLite cannot ALTER a primary key, so an existing DB carrying the OLD shape
|
|
426
|
+
* has its 3 graph tables RENAMED to `*_legacy` here; ensureSchema's CREATE block
|
|
427
|
+
* then builds the new-shape tables, and {@link migrateGraphDataFromLegacy} COPIES
|
|
428
|
+
* the data across before dropping the legacy tables. The graph is preserved —
|
|
429
|
+
* NOT re-extracted (re-extraction is ~19s/file of LLM work).
|
|
430
|
+
*
|
|
431
|
+
* Crucially this is GRAPH-SCOPED: it touches ONLY the graph tables, never the
|
|
432
|
+
* index / embeddings / enrichment cache. So users keep their (expensive)
|
|
433
|
+
* embeddings instead of being forced into a full re-embed by a DB_VERSION bump.
|
|
434
|
+
*
|
|
435
|
+
* Detection: the old schema has an `entry_id` column on graph_files. Fresh DBs
|
|
436
|
+
* (no graph_files yet) and already-migrated DBs (no entry_id column) are no-ops.
|
|
437
|
+
* Idempotent.
|
|
438
|
+
*/
|
|
439
|
+
function migrateGraphFilesSchema(db) {
|
|
440
|
+
bestEffort(() => {
|
|
441
|
+
const cols = db.prepare("PRAGMA table_info(graph_files)").all();
|
|
442
|
+
const isLegacyShape = cols.some((c) => c.name === "entry_id");
|
|
443
|
+
if (!isLegacyShape)
|
|
444
|
+
return;
|
|
445
|
+
// A previous interrupted migration may have left *_legacy behind — drop those
|
|
446
|
+
// husks first so the rename below doesn't collide.
|
|
447
|
+
db.exec("DROP TABLE IF EXISTS graph_file_relations_legacy");
|
|
448
|
+
db.exec("DROP TABLE IF EXISTS graph_file_entities_legacy");
|
|
449
|
+
db.exec("DROP TABLE IF EXISTS graph_files_legacy");
|
|
450
|
+
// Rename the 3 entry_id-keyed tables aside. graph_meta is unchanged (stash_root
|
|
451
|
+
// key) so it is left in place. ALTER … RENAME auto-updates child FK refs in
|
|
452
|
+
// SQLite ≥3.25, which is fine — the legacy children are dropped after the copy.
|
|
453
|
+
db.exec("ALTER TABLE graph_files RENAME TO graph_files_legacy");
|
|
454
|
+
if (tableExists(db, "graph_file_entities")) {
|
|
455
|
+
db.exec("ALTER TABLE graph_file_entities RENAME TO graph_file_entities_legacy");
|
|
456
|
+
}
|
|
457
|
+
if (tableExists(db, "graph_file_relations")) {
|
|
458
|
+
db.exec("ALTER TABLE graph_file_relations RENAME TO graph_file_relations_legacy");
|
|
459
|
+
}
|
|
460
|
+
}, "graph_files may not exist on a brand-new DB before CREATE — caller is responsible");
|
|
461
|
+
}
|
|
462
|
+
/**
|
|
463
|
+
* #624-P1 targeted graph-schema migration — STEP 2 of 2 (copy + drop legacy).
|
|
464
|
+
*
|
|
465
|
+
* Runs AFTER the graph CREATE TABLE block, so the new-shape tables exist. Copies
|
|
466
|
+
* every legacy row into the re-keyed tables — the old tables already carry
|
|
467
|
+
* (stash_root, file_path, body_hash) next to entry_id, so the projection is a
|
|
468
|
+
* straight column copy (children JOIN back to graph_files_legacy to resolve the
|
|
469
|
+
* composite key from their entry_id). Then drops the `*_legacy` tables.
|
|
470
|
+
*
|
|
471
|
+
* Best-effort: a copy failure (e.g. a pre-body_hash legacy schema) is tolerated,
|
|
472
|
+
* and the legacy tables are dropped regardless so they never linger. Rows whose
|
|
473
|
+
* body_hash is null/empty can't form the new PK and are skipped (they re-extract).
|
|
474
|
+
*/
|
|
475
|
+
function migrateGraphDataFromLegacy(db) {
|
|
476
|
+
if (!tableExists(db, "graph_files_legacy"))
|
|
477
|
+
return;
|
|
478
|
+
let migratedFiles = 0;
|
|
479
|
+
bestEffort(() => {
|
|
480
|
+
db.transaction(() => {
|
|
481
|
+
const res = db
|
|
482
|
+
.prepare(`INSERT OR IGNORE INTO graph_files
|
|
483
|
+
(stash_root, file_path, body_hash, file_order, file_type, confidence, status, reason, extraction_run_id)
|
|
484
|
+
SELECT stash_root, file_path, body_hash, file_order, file_type, confidence, status, reason, extraction_run_id
|
|
485
|
+
FROM graph_files_legacy
|
|
486
|
+
WHERE body_hash IS NOT NULL AND body_hash != ''`)
|
|
487
|
+
.run();
|
|
488
|
+
migratedFiles = Number(res.changes);
|
|
489
|
+
if (tableExists(db, "graph_file_entities_legacy")) {
|
|
490
|
+
db.exec(`INSERT OR IGNORE INTO graph_file_entities
|
|
491
|
+
(stash_root, file_path, body_hash, entity_order, entity_norm, entity)
|
|
492
|
+
SELECT gf.stash_root, gf.file_path, gf.body_hash, e.entity_order, e.entity_norm, e.entity
|
|
493
|
+
FROM graph_file_entities_legacy e
|
|
494
|
+
JOIN graph_files_legacy gf ON gf.entry_id = e.entry_id
|
|
495
|
+
WHERE gf.body_hash IS NOT NULL AND gf.body_hash != ''`);
|
|
496
|
+
}
|
|
497
|
+
if (tableExists(db, "graph_file_relations_legacy")) {
|
|
498
|
+
db.exec(`INSERT OR IGNORE INTO graph_file_relations
|
|
499
|
+
(stash_root, file_path, body_hash, relation_order, from_entity_norm, from_entity, to_entity_norm, to_entity, relation_type, confidence)
|
|
500
|
+
SELECT gf.stash_root, gf.file_path, gf.body_hash, r.relation_order, r.from_entity_norm, r.from_entity, r.to_entity_norm, r.to_entity, r.relation_type, r.confidence
|
|
501
|
+
FROM graph_file_relations_legacy r
|
|
502
|
+
JOIN graph_files_legacy gf ON gf.entry_id = r.entry_id
|
|
503
|
+
WHERE gf.body_hash IS NOT NULL AND gf.body_hash != ''`);
|
|
504
|
+
}
|
|
505
|
+
})();
|
|
506
|
+
}, "graph data migration is best-effort; legacy tables are dropped regardless below");
|
|
507
|
+
// Always drop the legacy tables (children first), migrated or not.
|
|
508
|
+
bestEffort(() => {
|
|
509
|
+
db.exec("DROP TABLE IF EXISTS graph_file_relations_legacy");
|
|
510
|
+
db.exec("DROP TABLE IF EXISTS graph_file_entities_legacy");
|
|
511
|
+
db.exec("DROP TABLE IF EXISTS graph_files_legacy");
|
|
512
|
+
}, "drop legacy graph tables after migration");
|
|
513
|
+
if (migratedFiles > 0) {
|
|
514
|
+
warn(`[akm] graph index re-keyed (#624): migrated ${migratedFiles} extracted file(s) to the new schema — no re-extraction needed. Index + embeddings untouched.`);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
@@ -185,7 +185,7 @@ async function runInlineReindex(stashDir) {
|
|
|
185
185
|
}
|
|
186
186
|
catch (error) {
|
|
187
187
|
warn("Auto-index failed, proceeding with existing index:", error instanceof Error ? error.message : String(error));
|
|
188
|
-
return
|
|
188
|
+
return false;
|
|
189
189
|
}
|
|
190
190
|
}
|
|
191
191
|
/**
|
|
@@ -200,7 +200,8 @@ async function runInlineReindex(stashDir) {
|
|
|
200
200
|
* trigger and waits for it. Use this for callers like `improve` whose
|
|
201
201
|
* planning logic depends on a current `entries` table in the same process.
|
|
202
202
|
*
|
|
203
|
-
* Returns `true`
|
|
203
|
+
* Returns `true` only when an inline index run succeeds.
|
|
204
|
+
* A rebuild attempt that fails (throws) resolves to `false`.
|
|
204
205
|
*/
|
|
205
206
|
export async function ensureIndex(stashDir, options = {}) {
|
|
206
207
|
if (options.mode === "blocking") {
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* MemRL feedback → utility policy, extracted from indexer/db/db.ts.
|
|
6
|
+
*
|
|
7
|
+
* This is the domain/policy math (arXiv:2601.03192) that decides how a batch of
|
|
8
|
+
* positive/negative feedback signals moves an asset's utility score. It is pure
|
|
9
|
+
* — no database access — so the bounded-step behaviour is unit-testable in
|
|
10
|
+
* isolation; the DB read/write stays with `applyFeedbackToUtilityScore` in db.ts.
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* MemRL learning rate for feedback-driven utility updates (F-5 / #386).
|
|
14
|
+
*
|
|
15
|
+
* Follows the bounded-step formula from MemRL (arXiv:2601.03192):
|
|
16
|
+
* next = clamp(current + lr × (reward − current), 0, 1)
|
|
17
|
+
*
|
|
18
|
+
* This replaces the unbounded `-0.03 × negativeCount` delta that could
|
|
19
|
+
* silently remove high-utility assets from the improvement loop.
|
|
20
|
+
*/
|
|
21
|
+
export const FEEDBACK_LR = 0.1;
|
|
22
|
+
/**
|
|
23
|
+
* Positive reward signal for a single positive feedback event.
|
|
24
|
+
* Reward 1.0 means "fully correct / helpful".
|
|
25
|
+
*/
|
|
26
|
+
const FEEDBACK_REWARD_POSITIVE = 1.0;
|
|
27
|
+
/**
|
|
28
|
+
* Negative reward signal for a single negative feedback event.
|
|
29
|
+
* Reward 0.0 means "not helpful" (lowest MemRL signal).
|
|
30
|
+
*/
|
|
31
|
+
const FEEDBACK_REWARD_NEGATIVE = 0.0;
|
|
32
|
+
/**
|
|
33
|
+
* Maximum total negative utility delta allowed in a single
|
|
34
|
+
* `applyFeedbackToUtilityScore` call regardless of negativeCount.
|
|
35
|
+
*
|
|
36
|
+
* This caps the per-day negative impact (the function is called once per
|
|
37
|
+
* feedback event — spamming 10 negatives in one session can move utility
|
|
38
|
+
* at most `MAX_NEG_DELTA_PER_CALL`). The cap prevents a noisy negative-
|
|
39
|
+
* feedback stream from silently destroying a high-utility asset's ranking.
|
|
40
|
+
*/
|
|
41
|
+
export const MAX_NEG_DELTA_PER_CALL = 0.15;
|
|
42
|
+
/**
|
|
43
|
+
* Utility threshold below which a review-needed escalation is triggered.
|
|
44
|
+
* When a previously high-utility asset (≥ HIGH_UTILITY_THRESHOLD) drops
|
|
45
|
+
* below this value, the caller should create an escalation proposal.
|
|
46
|
+
*/
|
|
47
|
+
export const UTILITY_REVIEW_THRESHOLD = 0.5;
|
|
48
|
+
/**
|
|
49
|
+
* Utility level considered "high" — assets above this are tracked for
|
|
50
|
+
* threshold-crossing escalation.
|
|
51
|
+
*/
|
|
52
|
+
export const HIGH_UTILITY_THRESHOLD = 0.5;
|
|
53
|
+
/**
|
|
54
|
+
* Compute the next utility from accumulated feedback counts using the MemRL
|
|
55
|
+
* bounded-step EMA formula (F-5 / #386, arXiv:2601.03192):
|
|
56
|
+
*
|
|
57
|
+
* reward = weighted average of positive and negative signals
|
|
58
|
+
* nextUtil = clamp(currentUtil + lr × (reward − currentUtil), 0, 1)
|
|
59
|
+
*
|
|
60
|
+
* The negative impact is additionally capped at {@link MAX_NEG_DELTA_PER_CALL}
|
|
61
|
+
* to prevent a noisy feedback stream from silently erasing a high-utility asset.
|
|
62
|
+
*
|
|
63
|
+
* Pure: no DB access. When both counts are zero, utility is unchanged.
|
|
64
|
+
*/
|
|
65
|
+
export function computeNextUtility(previousUtility, positiveCount, negativeCount) {
|
|
66
|
+
if (positiveCount === 0 && negativeCount === 0) {
|
|
67
|
+
return { previousUtility, nextUtility: previousUtility, crossedReviewThreshold: false };
|
|
68
|
+
}
|
|
69
|
+
const total = positiveCount + negativeCount;
|
|
70
|
+
// Weighted reward: proportion of positive signals.
|
|
71
|
+
const reward = positiveCount > 0 && negativeCount === 0
|
|
72
|
+
? FEEDBACK_REWARD_POSITIVE
|
|
73
|
+
: negativeCount > 0 && positiveCount === 0
|
|
74
|
+
? FEEDBACK_REWARD_NEGATIVE
|
|
75
|
+
: (positiveCount * FEEDBACK_REWARD_POSITIVE + negativeCount * FEEDBACK_REWARD_NEGATIVE) / total;
|
|
76
|
+
// MemRL bounded-step EMA: lr × (reward − current)
|
|
77
|
+
let delta = FEEDBACK_LR * (reward - previousUtility);
|
|
78
|
+
// Per-call negative cap: if delta is negative (net negative feedback), cap it.
|
|
79
|
+
if (delta < 0) {
|
|
80
|
+
delta = Math.max(delta, -MAX_NEG_DELTA_PER_CALL);
|
|
81
|
+
}
|
|
82
|
+
const nextUtility = Math.max(0, Math.min(1, previousUtility + delta));
|
|
83
|
+
const crossedReviewThreshold = previousUtility >= HIGH_UTILITY_THRESHOLD && nextUtility < UTILITY_REVIEW_THRESHOLD;
|
|
84
|
+
return { previousUtility, nextUtility, crossedReviewThreshold };
|
|
85
|
+
}
|
|
@@ -46,8 +46,9 @@ import { warn, warnVerbose } from "../../core/warn.js";
|
|
|
46
46
|
import { isProcessEnabled } from "../../llm/feature-gate.js";
|
|
47
47
|
import * as graphExtract from "../../llm/graph-extract.js";
|
|
48
48
|
import { resolveIndexPassLLM } from "../../llm/index-passes.js";
|
|
49
|
-
import { computeBodyHash,
|
|
49
|
+
import { computeBodyHash, getLlmCacheEntriesByRefs, getLlmCacheEntry, upsertLlmCacheEntry, } from "../db/db.js";
|
|
50
50
|
import { drainExtractionQueue, loadStoredGraphSnapshot, replaceStoredGraph } from "../db/graph-db.js";
|
|
51
|
+
import { GRAPH_SCHEMA_VERSION } from "../db/schema.js";
|
|
51
52
|
import { walkMarkdownFiles } from "../walk/walker.js";
|
|
52
53
|
import { deduplicateGraph } from "./graph-dedup.js";
|
|
53
54
|
/** Schema version for the persisted artifact — bumps trigger a full rebuild. */
|