akm-cli 0.7.5 → 0.8.0-rc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CHANGELOG.md +1 -1
- package/dist/cli/parse-args.js +43 -0
- package/dist/cli.js +853 -479
- package/dist/commands/agent-dispatch.js +102 -0
- package/dist/commands/agent-support.js +62 -0
- package/dist/commands/config-cli.js +68 -84
- package/dist/commands/consolidate.js +823 -0
- package/dist/commands/distill-promotion-policy.js +658 -0
- package/dist/commands/distill.js +244 -52
- package/dist/commands/eval-cases.js +40 -0
- package/dist/commands/events.js +2 -23
- package/dist/commands/graph.js +222 -0
- package/dist/commands/health.js +376 -0
- package/dist/commands/help/help-accept.md +9 -0
- package/dist/commands/help/help-improve.md +53 -0
- package/dist/commands/help/help-proposals.md +15 -0
- package/dist/commands/help/help-propose.md +17 -0
- package/dist/commands/help/help-reject.md +8 -0
- package/dist/commands/history.js +3 -30
- package/dist/commands/improve.js +1170 -0
- package/dist/commands/info.js +2 -2
- package/dist/commands/init.js +2 -2
- package/dist/commands/install-audit.js +5 -1
- package/dist/commands/installed-stashes.js +118 -138
- package/dist/commands/knowledge.js +133 -0
- package/dist/commands/lint/agent-linter.js +46 -0
- package/dist/commands/lint/base-linter.js +285 -0
- package/dist/commands/lint/command-linter.js +46 -0
- package/dist/commands/lint/default-linter.js +13 -0
- package/dist/commands/lint/index.js +107 -0
- package/dist/commands/lint/knowledge-linter.js +13 -0
- package/dist/commands/lint/memory-linter.js +58 -0
- package/dist/commands/lint/registry.js +33 -0
- package/dist/commands/lint/skill-linter.js +42 -0
- package/dist/commands/lint/task-linter.js +47 -0
- package/dist/commands/lint/types.js +1 -0
- package/dist/commands/lint/workflow-linter.js +53 -0
- package/dist/commands/lint.js +1 -0
- package/dist/commands/proposal.js +8 -7
- package/dist/commands/propose.js +78 -28
- package/dist/commands/reflect.js +143 -35
- package/dist/commands/registry-search.js +2 -2
- package/dist/commands/remember.js +54 -0
- package/dist/commands/schema-repair.js +130 -0
- package/dist/commands/search.js +21 -5
- package/dist/commands/show.js +121 -17
- package/dist/commands/source-add.js +10 -10
- package/dist/commands/source-manage.js +11 -19
- package/dist/commands/tasks.js +385 -0
- package/dist/commands/url-checker.js +39 -0
- package/dist/commands/vault.js +8 -26
- package/dist/core/action-contributors.js +25 -0
- package/dist/core/asset-ref.js +4 -0
- package/dist/core/asset-registry.js +4 -16
- package/dist/core/asset-spec.js +10 -0
- package/dist/core/common.js +94 -0
- package/dist/core/concurrent.js +22 -0
- package/dist/core/config.js +222 -128
- package/dist/core/events.js +73 -126
- package/dist/core/frontmatter.js +3 -1
- package/dist/core/markdown.js +17 -0
- package/dist/core/memory-improve.js +678 -0
- package/dist/core/parse.js +155 -0
- package/dist/core/paths.js +101 -3
- package/dist/core/proposal-validators.js +61 -0
- package/dist/core/proposals.js +49 -38
- package/dist/core/state-db.js +775 -0
- package/dist/core/time.js +51 -0
- package/dist/core/warn.js +59 -1
- package/dist/indexer/db-search.js +52 -238
- package/dist/indexer/db.js +378 -1
- package/dist/indexer/ensure-index.js +61 -0
- package/dist/indexer/graph-boost.js +247 -94
- package/dist/indexer/graph-db.js +201 -0
- package/dist/indexer/graph-dedup.js +99 -0
- package/dist/indexer/graph-extraction.js +409 -76
- package/dist/indexer/index-context.js +10 -0
- package/dist/indexer/indexer.js +442 -290
- package/dist/indexer/llm-cache.js +47 -0
- package/dist/indexer/match-contributors.js +141 -0
- package/dist/indexer/matchers.js +24 -190
- package/dist/indexer/memory-inference.js +63 -29
- package/dist/indexer/metadata-contributors.js +26 -0
- package/dist/indexer/metadata.js +194 -175
- package/dist/indexer/path-resolver.js +89 -0
- package/dist/indexer/ranking-contributors.js +204 -0
- package/dist/indexer/ranking.js +74 -0
- package/dist/indexer/search-hit-enrichers.js +22 -0
- package/dist/indexer/search-source.js +24 -9
- package/dist/indexer/semantic-status.js +2 -16
- package/dist/indexer/walker.js +25 -0
- package/dist/integrations/agent/config.js +175 -3
- package/dist/integrations/agent/index.js +3 -1
- package/dist/integrations/agent/pipeline.js +39 -0
- package/dist/integrations/agent/profiles.js +67 -5
- package/dist/integrations/agent/prompts.js +77 -72
- package/dist/integrations/agent/runners.js +31 -0
- package/dist/integrations/agent/sdk-runner.js +120 -0
- package/dist/integrations/agent/spawn.js +71 -16
- package/dist/integrations/lockfile.js +10 -18
- package/dist/integrations/session-logs/index.js +65 -0
- package/dist/integrations/session-logs/providers/claude-code.js +56 -0
- package/dist/integrations/session-logs/providers/opencode.js +52 -0
- package/dist/integrations/session-logs/types.js +1 -0
- package/dist/llm/call-ai.js +74 -0
- package/dist/llm/client.js +61 -122
- package/dist/llm/feature-gate.js +27 -16
- package/dist/llm/graph-extract.js +297 -62
- package/dist/llm/memory-infer.js +49 -71
- package/dist/llm/metadata-enhance.js +39 -22
- package/dist/llm/prompts/graph-extract-user-prompt.md +12 -0
- package/dist/output/cli-hints-full.md +277 -0
- package/dist/output/cli-hints-short.md +65 -0
- package/dist/output/cli-hints.js +2 -318
- package/dist/output/renderers.js +190 -123
- package/dist/output/shapes.js +33 -0
- package/dist/output/text.js +239 -2
- package/dist/registry/providers/skills-sh.js +61 -49
- package/dist/registry/providers/static-index.js +44 -48
- package/dist/setup/setup.js +510 -11
- package/dist/sources/provider-factory.js +2 -1
- package/dist/sources/providers/git.js +2 -2
- package/dist/sources/website-ingest.js +4 -0
- package/dist/tasks/backends/cron.js +200 -0
- package/dist/tasks/backends/exec-utils.js +25 -0
- package/dist/tasks/backends/index.js +32 -0
- package/dist/tasks/backends/launchd-template.xml +19 -0
- package/dist/tasks/backends/launchd.js +184 -0
- package/dist/tasks/backends/schtasks-template.xml +29 -0
- package/dist/tasks/backends/schtasks.js +212 -0
- package/dist/tasks/parser.js +198 -0
- package/dist/tasks/resolveAkmBin.js +84 -0
- package/dist/tasks/runner.js +432 -0
- package/dist/tasks/schedule.js +208 -0
- package/dist/tasks/schema.js +13 -0
- package/dist/tasks/validator.js +59 -0
- package/dist/wiki/index-template.md +12 -0
- package/dist/wiki/ingest-workflow-template.md +54 -0
- package/dist/wiki/log-template.md +8 -0
- package/dist/wiki/schema-template.md +61 -0
- package/dist/wiki/wiki-templates.js +12 -0
- package/dist/wiki/wiki.js +10 -61
- package/dist/workflows/authoring.js +5 -25
- package/dist/workflows/renderer.js +8 -3
- package/dist/workflows/runs.js +59 -91
- package/dist/workflows/validator.js +1 -1
- package/dist/workflows/workflow-template.md +24 -0
- package/docs/README.md +3 -0
- package/docs/migration/release-notes/0.7.0.md +1 -1
- package/docs/migration/release-notes/0.8.0.md +43 -0
- package/package.json +3 -2
- package/dist/templates/wiki-templates.js +0 -100
package/dist/indexer/db.js
CHANGED
|
@@ -4,13 +4,15 @@ import { createRequire } from "node:module";
|
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import { parseAssetRef } from "../core/asset-ref";
|
|
6
6
|
import { getDbPath } from "../core/paths";
|
|
7
|
+
import { REGISTRY_INDEX_CACHE_DDL } from "../core/state-db";
|
|
7
8
|
import { warn } from "../core/warn";
|
|
8
9
|
import { cosineSimilarity } from "../llm/embedders/types";
|
|
9
10
|
import { buildSearchFields } from "./search-fields";
|
|
10
11
|
import { ensureUsageEventsSchema } from "./usage-events";
|
|
11
12
|
// ── Constants ───────────────────────────────────────────────────────────────
|
|
12
|
-
export const DB_VERSION =
|
|
13
|
+
export const DB_VERSION = 12;
|
|
13
14
|
export const EMBEDDING_DIM = 384;
|
|
15
|
+
export const GRAPH_SCHEMA_VERSION = 1;
|
|
14
16
|
// ── Database lifecycle ──────────────────────────────────────────────────────
|
|
15
17
|
export function openDatabase(dbPath, options) {
|
|
16
18
|
const resolvedPath = dbPath ?? getDbPath();
|
|
@@ -183,6 +185,81 @@ function ensureSchema(db, embeddingDim) {
|
|
|
183
185
|
reason TEXT NOT NULL,
|
|
184
186
|
updated_at TEXT NOT NULL
|
|
185
187
|
);
|
|
188
|
+
`);
|
|
189
|
+
// LLM enrichment result cache. Stores a SHA-256 body hash and the JSON
|
|
190
|
+
// result for each asset so that subsequent `akm index --enrich` runs can
|
|
191
|
+
// skip the LLM call when the body hasn't changed. The cache is keyed by
|
|
192
|
+
// a stable asset_ref string (e.g. the absolute file path for graph/memory
|
|
193
|
+
// passes, or `entryKey:passId` for the metadata-enhance pass).
|
|
194
|
+
// Entries are cleaned up when assets are removed or --re-enrich is used.
|
|
195
|
+
db.exec(`
|
|
196
|
+
CREATE TABLE IF NOT EXISTS llm_enrichment_cache (
|
|
197
|
+
asset_ref TEXT PRIMARY KEY,
|
|
198
|
+
body_hash TEXT NOT NULL,
|
|
199
|
+
result_json TEXT NOT NULL,
|
|
200
|
+
updated_at INTEGER NOT NULL
|
|
201
|
+
);
|
|
202
|
+
|
|
203
|
+
CREATE INDEX IF NOT EXISTS idx_llm_cache_updated
|
|
204
|
+
ON llm_enrichment_cache(updated_at);
|
|
205
|
+
`);
|
|
206
|
+
db.exec(`
|
|
207
|
+
CREATE TABLE IF NOT EXISTS graph_meta (
|
|
208
|
+
stash_root TEXT PRIMARY KEY,
|
|
209
|
+
schema_version INTEGER NOT NULL,
|
|
210
|
+
generated_at TEXT NOT NULL,
|
|
211
|
+
considered_files INTEGER NOT NULL DEFAULT 0,
|
|
212
|
+
extracted_files INTEGER NOT NULL DEFAULT 0,
|
|
213
|
+
entity_count INTEGER NOT NULL DEFAULT 0,
|
|
214
|
+
relation_count INTEGER NOT NULL DEFAULT 0,
|
|
215
|
+
extraction_coverage REAL NOT NULL DEFAULT 0,
|
|
216
|
+
density REAL NOT NULL DEFAULT 0
|
|
217
|
+
);
|
|
218
|
+
|
|
219
|
+
CREATE TABLE IF NOT EXISTS graph_files (
|
|
220
|
+
stash_root TEXT NOT NULL,
|
|
221
|
+
file_path TEXT NOT NULL,
|
|
222
|
+
file_order INTEGER NOT NULL,
|
|
223
|
+
file_type TEXT NOT NULL,
|
|
224
|
+
body_hash TEXT,
|
|
225
|
+
confidence REAL,
|
|
226
|
+
PRIMARY KEY (stash_root, file_path),
|
|
227
|
+
FOREIGN KEY (stash_root) REFERENCES graph_meta(stash_root) ON DELETE CASCADE
|
|
228
|
+
);
|
|
229
|
+
|
|
230
|
+
CREATE INDEX IF NOT EXISTS idx_graph_files_stash_order
|
|
231
|
+
ON graph_files(stash_root, file_order);
|
|
232
|
+
|
|
233
|
+
CREATE TABLE IF NOT EXISTS graph_file_entities (
|
|
234
|
+
stash_root TEXT NOT NULL,
|
|
235
|
+
file_path TEXT NOT NULL,
|
|
236
|
+
entity_order INTEGER NOT NULL,
|
|
237
|
+
entity TEXT NOT NULL,
|
|
238
|
+
PRIMARY KEY (stash_root, file_path, entity_order),
|
|
239
|
+
FOREIGN KEY (stash_root, file_path)
|
|
240
|
+
REFERENCES graph_files(stash_root, file_path)
|
|
241
|
+
ON DELETE CASCADE
|
|
242
|
+
);
|
|
243
|
+
|
|
244
|
+
CREATE INDEX IF NOT EXISTS idx_graph_file_entities_lookup
|
|
245
|
+
ON graph_file_entities(stash_root, file_path, entity_order);
|
|
246
|
+
|
|
247
|
+
CREATE TABLE IF NOT EXISTS graph_file_relations (
|
|
248
|
+
stash_root TEXT NOT NULL,
|
|
249
|
+
file_path TEXT NOT NULL,
|
|
250
|
+
relation_order INTEGER NOT NULL,
|
|
251
|
+
from_entity TEXT NOT NULL,
|
|
252
|
+
to_entity TEXT NOT NULL,
|
|
253
|
+
relation_type TEXT,
|
|
254
|
+
confidence REAL,
|
|
255
|
+
PRIMARY KEY (stash_root, file_path, relation_order),
|
|
256
|
+
FOREIGN KEY (stash_root, file_path)
|
|
257
|
+
REFERENCES graph_files(stash_root, file_path)
|
|
258
|
+
ON DELETE CASCADE
|
|
259
|
+
);
|
|
260
|
+
|
|
261
|
+
CREATE INDEX IF NOT EXISTS idx_graph_file_relations_lookup
|
|
262
|
+
ON graph_file_relations(stash_root, file_path, relation_order);
|
|
186
263
|
`);
|
|
187
264
|
// FTS-dirty queue. Created here (not lazily on first upsert) so the
|
|
188
265
|
// per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
|
|
@@ -247,6 +324,10 @@ function ensureSchema(db, embeddingDim) {
|
|
|
247
324
|
}
|
|
248
325
|
// Usage telemetry table
|
|
249
326
|
ensureUsageEventsSchema(db);
|
|
327
|
+
// Registry index cache table — caches remote registry index documents so
|
|
328
|
+
// `akm search` does not hit the network on every invocation. The DDL is
|
|
329
|
+
// defined in state-db.ts and shared here to avoid duplication.
|
|
330
|
+
db.exec(REGISTRY_INDEX_CACHE_DDL);
|
|
250
331
|
// Restore usage_events backed up by the version-upgrade path above.
|
|
251
332
|
restoreUsageEventsBackup(db, usageBackup);
|
|
252
333
|
}
|
|
@@ -282,6 +363,16 @@ function handleVersionUpgrade(db) {
|
|
|
282
363
|
db.exec("DROP TABLE IF EXISTS entries_vec");
|
|
283
364
|
db.exec("DROP TABLE IF EXISTS entries_fts");
|
|
284
365
|
db.exec("DROP TABLE IF EXISTS index_dir_state");
|
|
366
|
+
db.exec("DROP TABLE IF EXISTS llm_enrichment_cache");
|
|
367
|
+
db.exec("DROP INDEX IF EXISTS idx_llm_cache_updated");
|
|
368
|
+
db.exec("DROP TABLE IF EXISTS graph_file_relations");
|
|
369
|
+
db.exec("DROP TABLE IF EXISTS graph_file_entities");
|
|
370
|
+
db.exec("DROP TABLE IF EXISTS graph_files");
|
|
371
|
+
db.exec("DROP TABLE IF EXISTS graph_meta");
|
|
372
|
+
db.exec("DROP TABLE IF EXISTS graph_relations");
|
|
373
|
+
db.exec("DROP TABLE IF EXISTS graph_entities");
|
|
374
|
+
db.exec("DROP TABLE IF EXISTS graph_nodes");
|
|
375
|
+
db.exec("DROP TABLE IF EXISTS graph_stashes");
|
|
285
376
|
db.exec("DROP INDEX IF EXISTS idx_entries_dir");
|
|
286
377
|
db.exec("DROP INDEX IF EXISTS idx_entries_type");
|
|
287
378
|
db.exec("DROP TABLE IF EXISTS entries");
|
|
@@ -953,3 +1044,289 @@ export function upsertUtilityScore(db, entryId, data) {
|
|
|
953
1044
|
updated_at = datetime('now')
|
|
954
1045
|
`).run(entryId, data.utility, data.showCount, data.searchCount, data.selectRate, data.lastUsedAt ?? null);
|
|
955
1046
|
}
|
|
1047
|
+
/**
|
|
1048
|
+
* Look up a cached LLM result for the given asset_ref.
|
|
1049
|
+
*
|
|
1050
|
+
* Returns `undefined` when no entry exists OR when the stored body_hash
|
|
1051
|
+
* doesn't match `currentBodyHash` (body has changed since the result was
|
|
1052
|
+
* cached). In both cases the caller should invoke the LLM and write a new
|
|
1053
|
+
* cache entry.
|
|
1054
|
+
*/
|
|
1055
|
+
export function getLlmCacheEntry(db, assetRef, currentBodyHash) {
|
|
1056
|
+
const row = db
|
|
1057
|
+
.prepare("SELECT asset_ref, body_hash, result_json, updated_at FROM llm_enrichment_cache WHERE asset_ref = ?")
|
|
1058
|
+
.get(assetRef);
|
|
1059
|
+
if (!row)
|
|
1060
|
+
return undefined;
|
|
1061
|
+
// Hash mismatch → body changed, treat as cache miss.
|
|
1062
|
+
if (row.body_hash !== currentBodyHash)
|
|
1063
|
+
return undefined;
|
|
1064
|
+
return {
|
|
1065
|
+
assetRef: row.asset_ref,
|
|
1066
|
+
bodyHash: row.body_hash,
|
|
1067
|
+
resultJson: row.result_json,
|
|
1068
|
+
updatedAt: row.updated_at,
|
|
1069
|
+
};
|
|
1070
|
+
}
|
|
1071
|
+
/**
|
|
1072
|
+
* Insert or update a cached LLM result for the given asset_ref.
|
|
1073
|
+
*/
|
|
1074
|
+
export function upsertLlmCacheEntry(db, assetRef, bodyHash, resultJson) {
|
|
1075
|
+
db.prepare(`INSERT INTO llm_enrichment_cache (asset_ref, body_hash, result_json, updated_at)
|
|
1076
|
+
VALUES (?, ?, ?, ?)
|
|
1077
|
+
ON CONFLICT(asset_ref) DO UPDATE SET
|
|
1078
|
+
body_hash = excluded.body_hash,
|
|
1079
|
+
result_json = excluded.result_json,
|
|
1080
|
+
updated_at = excluded.updated_at`).run(assetRef, bodyHash, resultJson, Date.now());
|
|
1081
|
+
}
|
|
1082
|
+
/**
|
|
1083
|
+
* Delete LLM cache entries whose asset_ref is no longer present in the
|
|
1084
|
+
* `entries` table. Should be called during the cleanup phase of each index
|
|
1085
|
+
* run to prevent the cache from growing unboundedly as assets are removed.
|
|
1086
|
+
*
|
|
1087
|
+
* The join uses a LIKE match against the entries `file_path` column because
|
|
1088
|
+
* graph/memory cache refs are absolute file paths, while enrichment cache
|
|
1089
|
+
* refs are entry_key strings — we preserve any entry that still has a
|
|
1090
|
+
* corresponding row in either the entries table (by entry_key) or that
|
|
1091
|
+
* matches a live file_path.
|
|
1092
|
+
*/
|
|
1093
|
+
export function clearStaleCacheEntries(db) {
|
|
1094
|
+
try {
|
|
1095
|
+
db.exec(`
|
|
1096
|
+
DELETE FROM llm_enrichment_cache
|
|
1097
|
+
WHERE asset_ref NOT IN (SELECT file_path FROM entries)
|
|
1098
|
+
AND asset_ref NOT IN (SELECT entry_key FROM entries)
|
|
1099
|
+
`);
|
|
1100
|
+
}
|
|
1101
|
+
catch {
|
|
1102
|
+
/* ignore — table may not exist in very old DBs opened without ensureSchema */
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
/**
|
|
1106
|
+
* Compute a stable SHA-256 hex digest of a UTF-8 string using Bun's native
|
|
1107
|
+
* hashing. Used as the body_hash key in `llm_enrichment_cache`.
|
|
1108
|
+
*
|
|
1109
|
+
* Bun.CryptoHasher is synchronous and allocation-free compared to Web Crypto,
|
|
1110
|
+
* making it suitable for use inside tight per-asset loops.
|
|
1111
|
+
*/
|
|
1112
|
+
export function computeBodyHash(body) {
|
|
1113
|
+
const hasher = new Bun.CryptoHasher("sha256");
|
|
1114
|
+
hasher.update(body);
|
|
1115
|
+
return hasher.digest("hex");
|
|
1116
|
+
}
|
|
1117
|
+
/**
|
|
1118
|
+
* Count search and show events for the given entry refs.
|
|
1119
|
+
* Returns a Map<ref, count> with only refs that have at least one event.
|
|
1120
|
+
* Used by the improve loop to find high-retrieval assets without feedback.
|
|
1121
|
+
*/
|
|
1122
|
+
export function getRetrievalCounts(db, refs) {
|
|
1123
|
+
if (refs.length === 0)
|
|
1124
|
+
return new Map();
|
|
1125
|
+
const result = new Map();
|
|
1126
|
+
// Chunk to stay within SQLITE_MAX_VARIABLE_NUMBER (same pattern as getUtilityScoresByIds).
|
|
1127
|
+
for (let i = 0; i < refs.length; i += SQLITE_CHUNK_SIZE) {
|
|
1128
|
+
const chunk = refs.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
1129
|
+
const placeholders = chunk.map(() => "?").join(", ");
|
|
1130
|
+
const rows = db
|
|
1131
|
+
.prepare(`SELECT entry_ref, COUNT(*) AS cnt FROM usage_events
|
|
1132
|
+
WHERE event_type IN ('search','show') AND entry_ref IN (${placeholders})
|
|
1133
|
+
GROUP BY entry_ref`)
|
|
1134
|
+
.all(...chunk);
|
|
1135
|
+
for (const r of rows)
|
|
1136
|
+
result.set(r.entry_ref, r.cnt);
|
|
1137
|
+
}
|
|
1138
|
+
return result;
|
|
1139
|
+
}
|
|
1140
|
+
/**
|
|
1141
|
+
* Apply a MemRL reward signal to a batch of entries via exponential moving
|
|
1142
|
+
* average (EMA): next = clamp(current + lr * (reward - current), 0, 1).
|
|
1143
|
+
*
|
|
1144
|
+
* Wrapped in a single transaction so all bumps succeed or fail together.
|
|
1145
|
+
* The indexer (`akm index`) will overwrite these values at next reindex run;
|
|
1146
|
+
* bumps are intentionally temporary hints between index runs, not permanent
|
|
1147
|
+
* overrides.
|
|
1148
|
+
*/
|
|
1149
|
+
export function bumpUtilityScoresBatch(db, entryIds, reward, lr = 0.1) {
|
|
1150
|
+
if (entryIds.length === 0)
|
|
1151
|
+
return;
|
|
1152
|
+
db.transaction(() => {
|
|
1153
|
+
const scoreMap = getUtilityScoresByIds(db, entryIds);
|
|
1154
|
+
const now = new Date().toISOString();
|
|
1155
|
+
const stmt = db.prepare(`INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
|
|
1156
|
+
VALUES (?, ?, 0, 0, 0, ?, ?)
|
|
1157
|
+
ON CONFLICT(entry_id) DO UPDATE SET
|
|
1158
|
+
utility = excluded.utility,
|
|
1159
|
+
updated_at = excluded.updated_at`);
|
|
1160
|
+
for (const entryId of entryIds) {
|
|
1161
|
+
const existing = scoreMap.get(entryId);
|
|
1162
|
+
const current = existing?.utility ?? 0;
|
|
1163
|
+
const next = Math.max(0, Math.min(1, current + lr * (reward - current)));
|
|
1164
|
+
stmt.run(entryId, next, now, now);
|
|
1165
|
+
}
|
|
1166
|
+
})();
|
|
1167
|
+
}
|
|
1168
|
+
// ── Indexer-phase helpers (moved from indexer.ts) ────────────────────────────
|
|
1169
|
+
/**
|
|
1170
|
+
* Return all entries that do not yet have an embedding row.
|
|
1171
|
+
* Used by the embedding phase to determine which entries need vectors generated.
|
|
1172
|
+
*/
|
|
1173
|
+
export function getAllEntriesForEmbedding(db) {
|
|
1174
|
+
return db
|
|
1175
|
+
.prepare(`
|
|
1176
|
+
SELECT e.id, e.search_text AS searchText, e.entry_key AS entryKey, e.file_path AS filePath FROM entries e
|
|
1177
|
+
WHERE NOT EXISTS (SELECT 1 FROM embeddings b WHERE b.id = e.id)
|
|
1178
|
+
AND e.entry_type != 'vault'
|
|
1179
|
+
`)
|
|
1180
|
+
.all();
|
|
1181
|
+
}
|
|
1182
|
+
/**
|
|
1183
|
+
* Upsert a workflow document record for an indexed entry.
|
|
1184
|
+
* Persists the parsed workflow AST as JSON alongside a FNV-1a hash of the
|
|
1185
|
+
* source content for future incremental fast-paths.
|
|
1186
|
+
*/
|
|
1187
|
+
export function upsertWorkflowDocument(db, entryId, doc, content) {
|
|
1188
|
+
const sourceHash = computeSourceHash(content);
|
|
1189
|
+
db.prepare(`INSERT INTO workflow_documents (entry_id, schema_version, document_json, source_path, source_hash, updated_at)
|
|
1190
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
1191
|
+
ON CONFLICT(entry_id) DO UPDATE SET
|
|
1192
|
+
schema_version = excluded.schema_version,
|
|
1193
|
+
document_json = excluded.document_json,
|
|
1194
|
+
source_path = excluded.source_path,
|
|
1195
|
+
source_hash = excluded.source_hash,
|
|
1196
|
+
updated_at = excluded.updated_at`).run(entryId, doc.schemaVersion, JSON.stringify(doc), doc.source.path, sourceHash, new Date().toISOString());
|
|
1197
|
+
}
|
|
1198
|
+
/**
|
|
1199
|
+
* Compute a cheap FNV-1a hash of a buffer for source-identity tracking.
|
|
1200
|
+
* Not security-sensitive; used as an incremental fast-path skip key.
|
|
1201
|
+
*/
|
|
1202
|
+
export function computeSourceHash(content) {
|
|
1203
|
+
let hash = 0x811c9dc5;
|
|
1204
|
+
for (let i = 0; i < content.length; i++) {
|
|
1205
|
+
hash ^= content[i];
|
|
1206
|
+
hash = Math.imul(hash, 0x01000193);
|
|
1207
|
+
}
|
|
1208
|
+
return (hash >>> 0).toString(16);
|
|
1209
|
+
}
|
|
1210
|
+
/**
|
|
1211
|
+
* Return distinct zero-result search queries from the `usage_events` table
|
|
1212
|
+
* within the given lookback window.
|
|
1213
|
+
*
|
|
1214
|
+
* Reads from `usage_events` (event_type = 'search') where the metadata JSON
|
|
1215
|
+
* blob contains `resultCount = 0`. The `search_events` table never existed;
|
|
1216
|
+
* all errors are caught and an empty array is returned so callers never need
|
|
1217
|
+
* to guard against DB schema differences.
|
|
1218
|
+
*/
|
|
1219
|
+
export function getZeroResultSearches(db, sinceDays = 30) {
|
|
1220
|
+
const since = new Date(Date.now() - sinceDays * 24 * 60 * 60 * 1000).toISOString();
|
|
1221
|
+
try {
|
|
1222
|
+
const rows = db
|
|
1223
|
+
.prepare(`SELECT DISTINCT json_extract(metadata, '$.query') AS query
|
|
1224
|
+
FROM usage_events
|
|
1225
|
+
WHERE event_type = 'search'
|
|
1226
|
+
AND created_at >= ?
|
|
1227
|
+
AND json_extract(metadata, '$.resultCount') = 0
|
|
1228
|
+
ORDER BY created_at DESC LIMIT 20`)
|
|
1229
|
+
.all(since);
|
|
1230
|
+
return rows.map((r) => r.query).filter((q) => q !== null);
|
|
1231
|
+
}
|
|
1232
|
+
catch {
|
|
1233
|
+
return []; // table may not exist in older DBs
|
|
1234
|
+
}
|
|
1235
|
+
}
|
|
1236
|
+
/**
|
|
1237
|
+
* Look up an entry by its integer numeric id.
|
|
1238
|
+
* Returns null when no matching row is found.
|
|
1239
|
+
*/
|
|
1240
|
+
export function getEntryByRef(db, type, name) {
|
|
1241
|
+
return db
|
|
1242
|
+
.prepare("SELECT id FROM entries WHERE entry_type = ? AND entry_key LIKE ?")
|
|
1243
|
+
.get(type, `%${type}:${name}`);
|
|
1244
|
+
}
|
|
1245
|
+
/**
|
|
1246
|
+
* Upsert a utility score adjustment derived from accumulated feedback events.
|
|
1247
|
+
*
|
|
1248
|
+
* - positiveDelta: +0.05 per positive event
|
|
1249
|
+
* - negativeDelta: -0.03 per negative event
|
|
1250
|
+
* - Score is clamped to [0.0, 1.0]
|
|
1251
|
+
* - A new row starts at 0.5 + delta so the first positive feedback immediately
|
|
1252
|
+
* lifts the entry above the neutral midpoint.
|
|
1253
|
+
*/
|
|
1254
|
+
export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negativeCount) {
|
|
1255
|
+
if (positiveCount === 0 && negativeCount === 0)
|
|
1256
|
+
return;
|
|
1257
|
+
const delta = positiveCount * 0.05 - negativeCount * 0.03;
|
|
1258
|
+
const now = new Date().toISOString();
|
|
1259
|
+
db.prepare(`
|
|
1260
|
+
INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
|
|
1261
|
+
VALUES (?, MAX(0.0, MIN(1.0, 0.5 + ?)), 0, 0, 0, ?, ?)
|
|
1262
|
+
ON CONFLICT(entry_id) DO UPDATE SET
|
|
1263
|
+
utility = MAX(0.0, MIN(1.0, utility + ?)),
|
|
1264
|
+
updated_at = ?
|
|
1265
|
+
`).run(entryId, delta, now, now, delta, now);
|
|
1266
|
+
}
|
|
1267
|
+
/**
|
|
1268
|
+
* Re-link detached usage_events to their current entry_ids via entry_ref.
|
|
1269
|
+
*
|
|
1270
|
+
* After a full rebuild, entry IDs change. This query matches events to their
|
|
1271
|
+
* new entry rows using the stable `entry_ref` ("type:name") column so usage
|
|
1272
|
+
* history survives a full reindex.
|
|
1273
|
+
*/
|
|
1274
|
+
export function relinkUsageEvents(db) {
|
|
1275
|
+
try {
|
|
1276
|
+
db.exec(`
|
|
1277
|
+
UPDATE usage_events SET entry_id = (
|
|
1278
|
+
SELECT e.id FROM entries e
|
|
1279
|
+
WHERE substr(e.entry_key, length(e.entry_key) - length(usage_events.entry_ref)) = ':' || usage_events.entry_ref
|
|
1280
|
+
LIMIT 1
|
|
1281
|
+
)
|
|
1282
|
+
WHERE entry_id IS NULL AND entry_ref IS NOT NULL
|
|
1283
|
+
`);
|
|
1284
|
+
}
|
|
1285
|
+
catch {
|
|
1286
|
+
/* ignore if table doesn't exist yet */
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
// ── registry_index_cache helpers ─────────────────────────────────────────────
|
|
1290
|
+
/**
|
|
1291
|
+
* Upsert a registry index cache entry in index.db.
|
|
1292
|
+
*
|
|
1293
|
+
* @param db - Open index.db connection (from openDatabase / openExistingDatabase).
|
|
1294
|
+
* @param registryUrl - Canonical URL of the registry (used as primary key).
|
|
1295
|
+
* @param indexJson - Serialised registry index document (JSON string).
|
|
1296
|
+
* @param opts.etag - HTTP ETag from the response (optional).
|
|
1297
|
+
* @param opts.lastModified - HTTP Last-Modified from the response (optional).
|
|
1298
|
+
*/
|
|
1299
|
+
export function upsertRegistryIndexCache(db, registryUrl, indexJson, opts) {
|
|
1300
|
+
db.prepare(`
|
|
1301
|
+
INSERT INTO registry_index_cache (registry_url, fetched_at, etag, last_modified, index_json)
|
|
1302
|
+
VALUES (?, ?, ?, ?, ?)
|
|
1303
|
+
ON CONFLICT(registry_url) DO UPDATE SET
|
|
1304
|
+
fetched_at = excluded.fetched_at,
|
|
1305
|
+
etag = excluded.etag,
|
|
1306
|
+
last_modified = excluded.last_modified,
|
|
1307
|
+
index_json = excluded.index_json
|
|
1308
|
+
`).run(registryUrl, new Date().toISOString(), opts?.etag ?? null, opts?.lastModified ?? null, indexJson);
|
|
1309
|
+
}
|
|
1310
|
+
/**
|
|
1311
|
+
* Look up a cached registry index entry from index.db.
|
|
1312
|
+
* Returns undefined when not found or when the entry is older than `maxAgeMs`.
|
|
1313
|
+
*
|
|
1314
|
+
* TTL check: if `Date.now() - new Date(fetched_at).getTime() > maxAgeMs` the
|
|
1315
|
+
* entry is considered a cache miss and undefined is returned.
|
|
1316
|
+
*
|
|
1317
|
+
* @param db - Open index.db connection.
|
|
1318
|
+
* @param registryUrl - Canonical URL of the registry (primary key).
|
|
1319
|
+
* @param maxAgeMs - Maximum age in milliseconds before the entry is stale (default: 1 hour).
|
|
1320
|
+
*/
|
|
1321
|
+
export function getRegistryIndexCache(db, registryUrl, maxAgeMs = 3_600_000 /* 1 hour */) {
|
|
1322
|
+
const row = db
|
|
1323
|
+
.prepare(`SELECT fetched_at, etag, last_modified, index_json
|
|
1324
|
+
FROM registry_index_cache WHERE registry_url = ?`)
|
|
1325
|
+
.get(registryUrl);
|
|
1326
|
+
if (!row)
|
|
1327
|
+
return undefined;
|
|
1328
|
+
const fetchedAt = Date.parse(row.fetched_at);
|
|
1329
|
+
if (Number.isNaN(fetchedAt) || Date.now() - fetchedAt > maxAgeMs)
|
|
1330
|
+
return undefined;
|
|
1331
|
+
return { indexJson: row.index_json, etag: row.etag, lastModified: row.last_modified };
|
|
1332
|
+
}
|
|
@@ -9,9 +9,67 @@
|
|
|
9
9
|
* behind a single entry point.
|
|
10
10
|
*/
|
|
11
11
|
import fs from "node:fs";
|
|
12
|
+
import path from "node:path";
|
|
13
|
+
import { ASSET_SPECS, TYPE_DIRS } from "../core/asset-spec";
|
|
12
14
|
import { getDbPath } from "../core/paths";
|
|
13
15
|
import { warn } from "../core/warn";
|
|
14
16
|
import { closeDatabase, getEntryCount, getMeta, openExistingDatabase } from "./db";
|
|
17
|
+
function getIndexableFiles(root, spec) {
|
|
18
|
+
if (!fs.existsSync(root))
|
|
19
|
+
return [];
|
|
20
|
+
const files = [];
|
|
21
|
+
const stack = [root];
|
|
22
|
+
while (stack.length > 0) {
|
|
23
|
+
const current = stack.pop();
|
|
24
|
+
if (!current)
|
|
25
|
+
continue;
|
|
26
|
+
let entries;
|
|
27
|
+
try {
|
|
28
|
+
entries = fs.readdirSync(current, { withFileTypes: true });
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
for (const entry of entries) {
|
|
34
|
+
if (entry.name === ".stash.json")
|
|
35
|
+
continue;
|
|
36
|
+
const fullPath = path.join(current, entry.name);
|
|
37
|
+
if (entry.isSymbolicLink())
|
|
38
|
+
continue;
|
|
39
|
+
if (entry.isDirectory()) {
|
|
40
|
+
if (entry.name.startsWith("."))
|
|
41
|
+
continue;
|
|
42
|
+
stack.push(fullPath);
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
if (entry.isFile() && spec.isRelevantFile(entry.name)) {
|
|
46
|
+
files.push(fullPath);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return files;
|
|
51
|
+
}
|
|
52
|
+
function hasNewerIndexableFiles(stashDir, builtAt) {
|
|
53
|
+
if (!builtAt)
|
|
54
|
+
return true;
|
|
55
|
+
const builtAtMs = new Date(builtAt).getTime();
|
|
56
|
+
if (!Number.isFinite(builtAtMs))
|
|
57
|
+
return true;
|
|
58
|
+
for (const [type, spec] of Object.entries(ASSET_SPECS)) {
|
|
59
|
+
const typeRoot = path.join(stashDir, TYPE_DIRS[type] ?? spec.stashDir);
|
|
60
|
+
const files = getIndexableFiles(typeRoot, spec);
|
|
61
|
+
for (const file of files) {
|
|
62
|
+
try {
|
|
63
|
+
if (fs.statSync(file).mtimeMs > builtAtMs)
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
15
73
|
/**
|
|
16
74
|
* Check whether the local index is stale relative to the given stash directory.
|
|
17
75
|
* Returns `true` when the index is missing, empty, or was built against a
|
|
@@ -27,6 +85,9 @@ export function isIndexStale(stashDir) {
|
|
|
27
85
|
const entryCount = getEntryCount(db);
|
|
28
86
|
if (entryCount === 0)
|
|
29
87
|
return true;
|
|
88
|
+
const builtAt = getMeta(db, "builtAt");
|
|
89
|
+
if (hasNewerIndexableFiles(stashDir, builtAt))
|
|
90
|
+
return true;
|
|
30
91
|
const storedStashDir = getMeta(db, "stashDir");
|
|
31
92
|
if (storedStashDir !== stashDir) {
|
|
32
93
|
// Check if the incoming stashDir appears in the stored stashDirs array
|