akm-cli 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/indexer.js CHANGED
@@ -1,9 +1,11 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import { resolveStashDir } from "./common";
4
- import { closeDatabase, DB_VERSION, deleteEntriesByDir, deleteEntriesByStashDir, getEntriesByDir, getEntryCount, getMeta, isVecAvailable, openDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, warnIfVecMissing, } from "./db";
4
+ import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, getEntriesByDir, getEntryCount, getMeta, isVecAvailable, openDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertUtilityScore, warnIfVecMissing, } from "./db";
5
5
  import { generateMetadataFlat, loadStashFile } from "./metadata";
6
6
  import { getDbPath } from "./paths";
7
+ import { buildSearchText } from "./search-fields";
8
+ import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
7
9
  import { walkStashFlat } from "./walker";
8
10
  import { warn } from "./warn";
9
11
  // ── Indexer ──────────────────────────────────────────────────────────────────
@@ -12,7 +14,10 @@ export async function akmIndex(options) {
12
14
  // Load config and resolve all stash sources
13
15
  const { loadConfig } = await import("./config.js");
14
16
  const config = loadConfig();
15
- const { resolveAllStashDirs } = await import("./search-source.js");
17
+ // Ensure git stash caches are extracted before resolving stash dirs,
18
+ // so their content directories exist on disk for the walker to discover.
19
+ const { ensureGitCaches, resolveAllStashDirs } = await import("./search-source.js");
20
+ await ensureGitCaches(config);
16
21
  const allStashDirs = resolveAllStashDirs(stashDir);
17
22
  const t0 = Date.now();
18
23
  // Open database — pass embedding dimension from config if available
@@ -26,7 +31,7 @@ export async function akmIndex(options) {
26
31
  const isIncremental = !options?.full && prevStashDir === stashDir && !!prevBuiltAt;
27
32
  const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
28
33
  if (options?.full || !isIncremental) {
29
- // HI-5: the delete is now merged into the insert transaction inside
34
+ // The delete is now merged into the insert transaction inside
30
35
  // indexEntries() so that a reader never sees an empty database between
31
36
  // the wipe and the re-inserts. The doFullDelete flag signals this path.
32
37
  }
@@ -59,9 +64,9 @@ export async function akmIndex(options) {
59
64
  const tWalkStart = Date.now();
60
65
  // Walk stash dirs and index entries.
61
66
  // doFullDelete=true merges the wipe into the same transaction as the
62
- // inserts (HI-5) so readers never see an empty database mid-rebuild.
67
+ // inserts so readers never see an empty database mid-rebuild.
63
68
  const doFullDelete = options?.full || !isIncremental;
64
- const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm } = await indexEntries(db, allStashDirs, stashDir, isIncremental, builtAtMs, doFullDelete);
69
+ const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm } = await indexEntries(db, allStashDirs, isIncremental, builtAtMs, doFullDelete);
65
70
  const tWalkEnd = Date.now();
66
71
  // Enhance entries with LLM if configured
67
72
  await enhanceDirsWithLlm(db, config, dirsNeedingLlm);
@@ -69,11 +74,12 @@ export async function akmIndex(options) {
69
74
  // Rebuild FTS after all inserts
70
75
  rebuildFts(db);
71
76
  const tFtsEnd = Date.now();
77
+ // Recompute utility scores from usage_events after FTS rebuild
78
+ recomputeUtilityScores(db);
72
79
  // Generate embeddings if semantic search is enabled
73
80
  const hasEmbeddings = await generateEmbeddingsForDb(db, config);
74
81
  const tEmbedEnd = Date.now();
75
82
  // Update metadata
76
- setMeta(db, "version", String(DB_VERSION));
77
83
  setMeta(db, "builtAt", new Date().toISOString());
78
84
  setMeta(db, "stashDir", stashDir);
79
85
  setMeta(db, "stashDirs", JSON.stringify(allStashDirs));
@@ -104,7 +110,7 @@ export async function akmIndex(options) {
104
110
  }
105
111
  }
106
112
  // ── Extracted helpers for indexing ────────────────────────────────────────────
107
- async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtMs, doFullDelete = false) {
113
+ async function indexEntries(db, allStashDirs, isIncremental, builtAtMs, doFullDelete = false) {
108
114
  let scannedDirs = 0;
109
115
  let skippedDirs = 0;
110
116
  let generatedCount = 0;
@@ -170,7 +176,7 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
170
176
  // occurrence wins.
171
177
  const indexedAssetIdentities = new Set();
172
178
  const insertTransaction = db.transaction(() => {
173
- // HI-5: Perform the full-rebuild wipe as the FIRST step of the insert
179
+ // Perform the full-rebuild wipe as the FIRST step of the insert
174
180
  // transaction so delete and re-insert are atomic — a concurrent reader
175
181
  // never observes an empty database between the two operations.
176
182
  if (doFullDelete) {
@@ -189,6 +195,8 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
189
195
  }
190
196
  }
191
197
  db.exec("DELETE FROM entries_fts");
198
+ db.exec("DELETE FROM utility_scores");
199
+ db.exec("DELETE FROM usage_events");
192
200
  db.exec("DELETE FROM entries");
193
201
  }
194
202
  for (const { dirPath, currentStashDir, files, stash, skip } of dirRecords) {
@@ -235,8 +243,8 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
235
243
  if (generatedEntries.length === 0)
236
244
  continue;
237
245
  const generatedStash = { entries: generatedEntries };
238
- const enhanced = await enhanceStashWithLlm(config.llm, generatedStash, dirPath, files);
239
- // HI-2: Re-upsert the enhanced entries in a single transaction so a crash
246
+ const enhanced = await enhanceStashWithLlm(config.llm, generatedStash, files);
247
+ // Re-upsert the enhanced entries in a single transaction so a crash
240
248
  // cannot leave half the entries updated and the rest stale.
241
249
  db.transaction(() => {
242
250
  for (const entry of enhanced.entries) {
@@ -258,7 +266,7 @@ async function generateEmbeddingsForDb(db, config) {
258
266
  return true;
259
267
  const texts = allEntries.map((e) => e.searchText);
260
268
  const embeddings = await embedBatch(texts, config.embedding);
261
- // HI-3: Wrap all embedding upserts in a single transaction so partial
269
+ // Wrap all embedding upserts in a single transaction so partial
262
270
  // state is rolled back on failure rather than leaving the table half-filled.
263
271
  db.transaction(() => {
264
272
  for (let i = 0; i < allEntries.length; i++) {
@@ -289,7 +297,6 @@ function attachFileSize(entry, entryPath) {
289
297
  return entry;
290
298
  }
291
299
  }
292
- /** Set of all known type directory names */
293
300
  function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
294
301
  // Check if file set changed (additions or deletions)
295
302
  const prevFileNames = new Set(previousEntries.map((ie) => ie.entry.filename).filter((e) => !!e));
@@ -321,7 +328,7 @@ function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
321
328
  }
322
329
  return false;
323
330
  }
324
- async function enhanceStashWithLlm(llmConfig, stash, _dirPath, files) {
331
+ async function enhanceStashWithLlm(llmConfig, stash, files) {
325
332
  const { enhanceMetadata } = await import("./llm.js");
326
333
  const enhanced = [];
327
334
  for (const entry of stash.entries) {
@@ -391,30 +398,74 @@ export function matchEntryToFile(entryName, fileMap, files) {
391
398
  // Fallback to first file, or null if no files are available
392
399
  return files[0] || null;
393
400
  }
394
- export function buildSearchText(entry) {
395
- const parts = [entry.name.replace(/[-_]/g, " ")];
396
- if (entry.description)
397
- parts.push(entry.description);
398
- if (entry.tags)
399
- parts.push(entry.tags.join(" "));
400
- if (entry.examples)
401
- parts.push(entry.examples.join(" "));
402
- if (entry.aliases)
403
- parts.push(entry.aliases.join(" "));
404
- if (entry.searchHints)
405
- parts.push(entry.searchHints.join(" "));
406
- if (entry.usage)
407
- parts.push(entry.usage.join(" "));
408
- if (entry.intent) {
409
- if (entry.intent.when)
410
- parts.push(entry.intent.when);
411
- if (entry.intent.input)
412
- parts.push(entry.intent.input);
413
- if (entry.intent.output)
414
- parts.push(entry.intent.output);
401
+ export { buildSearchFields, buildSearchText } from "./search-fields";
402
+ // ── Utility score recomputation ──────────────────────────────────────────────
403
+ /** Retention window for usage events: events older than this are purged. */
404
+ const USAGE_EVENT_RETENTION_DAYS = 90;
405
+ /**
406
+ * Recompute utility scores for all entries based on usage_events data.
407
+ *
408
+ * For each entry:
409
+ * - Count search appearances (event_type = 'search')
410
+ * - Count show events (event_type = 'show')
411
+ * - Compute select_rate = showCount / searchCount, clamped to [0, 1]
412
+ * - Update utility via EMA: utility = previousUtility * 0.7 + selectRate * 0.3
413
+ *
414
+ * Also purges usage_events older than 90 days and ensures the M-1
415
+ * usage_events table exists before querying.
416
+ *
417
+ * Called during `akm index` after FTS rebuild.
418
+ */
419
+ export function recomputeUtilityScores(db) {
420
+ const EMA_DECAY = 0.7;
421
+ // Ensure usage_events table exists before querying
422
+ ensureUsageEventsSchema(db);
423
+ // Purge stale usage events (90-day retention)
424
+ purgeOldUsageEvents(db, USAGE_EVENT_RETENTION_DAYS);
425
+ // Time-proportional decay: apply one round of EMA per elapsed day so
426
+ // indexing frequency doesn't affect how fast scores decay.
427
+ const lastComputedAt = getMeta(db, "last_utility_computed_at");
428
+ let elapsedDays = 1; // default for first run
429
+ if (lastComputedAt) {
430
+ const ms = Date.now() - new Date(lastComputedAt).getTime();
431
+ elapsedDays = Math.max(1, ms / (1000 * 60 * 60 * 24));
432
+ }
433
+ const emaDecay = EMA_DECAY ** elapsedDays;
434
+ const emaNew = 1 - emaDecay; // complement so weights still sum to 1
435
+ // Single aggregate query instead of N+1 per-entry queries.
436
+ // Only processes entries that actually have usage events.
437
+ const usageRows = db
438
+ .prepare(`
439
+ SELECT entry_id,
440
+ SUM(CASE WHEN event_type = 'search' THEN 1 ELSE 0 END) AS search_count,
441
+ SUM(CASE WHEN event_type = 'show' THEN 1 ELSE 0 END) AS show_count,
442
+ MAX(created_at) AS last_used_at
443
+ FROM usage_events
444
+ WHERE entry_id IS NOT NULL
445
+ GROUP BY entry_id
446
+ `)
447
+ .all();
448
+ if (usageRows.length === 0) {
449
+ setMeta(db, "last_utility_computed_at", new Date().toISOString());
450
+ return;
451
+ }
452
+ // Batch-load existing utility scores
453
+ const existingScores = new Map();
454
+ const scoreRows = db.prepare("SELECT entry_id, utility FROM utility_scores").all();
455
+ for (const row of scoreRows) {
456
+ existingScores.set(row.entry_id, row.utility);
415
457
  }
416
- if (entry.toc) {
417
- parts.push(entry.toc.map((h) => h.text).join(" "));
458
+ for (const row of usageRows) {
459
+ const selectRate = row.search_count > 0 ? Math.min(1, row.show_count / row.search_count) : 0;
460
+ const prevUtility = existingScores.get(row.entry_id) ?? 0;
461
+ const utility = prevUtility * emaDecay + selectRate * emaNew;
462
+ upsertUtilityScore(db, row.entry_id, {
463
+ utility,
464
+ showCount: row.show_count,
465
+ searchCount: row.search_count,
466
+ selectRate,
467
+ lastUsedAt: row.last_used_at ?? undefined,
468
+ });
418
469
  }
419
- return parts.join(" ").toLowerCase();
470
+ setMeta(db, "last_utility_computed_at", new Date().toISOString());
420
471
  }
package/dist/info.js ADDED
@@ -0,0 +1,92 @@
1
+ import fs from "node:fs";
2
+ import { getAssetTypes } from "./asset-spec";
3
+ import { loadConfig } from "./config";
4
+ import { closeDatabase, getEntryCount, getMeta, isVecAvailable, openDatabase } from "./db";
5
+ import { getDbPath } from "./paths";
6
+ import { pkgVersion } from "./version";
7
+ /**
8
+ * Assemble system info describing the current capabilities, configuration,
9
+ * and index state. Used by `akm info`.
10
+ *
11
+ * @param options.dbPath - Override the database path (useful for testing)
12
+ */
13
+ export function assembleInfo(options) {
14
+ const config = loadConfig();
15
+ // Asset types
16
+ const assetTypes = getAssetTypes();
17
+ // Search modes
18
+ const searchModes = ["fts"];
19
+ if (config.semanticSearch) {
20
+ searchModes.push("semantic", "hybrid");
21
+ }
22
+ // Registries (strip sensitive fields like apiKey from options)
23
+ const registries = (config.registries ?? []).map((r) => ({
24
+ url: r.url,
25
+ ...(r.name ? { name: r.name } : {}),
26
+ ...(r.provider ? { provider: r.provider } : {}),
27
+ ...(r.enabled !== undefined ? { enabled: r.enabled } : {}),
28
+ }));
29
+ // Stash providers
30
+ const stashProviders = (config.stashes ?? []).map((s) => ({
31
+ type: s.type,
32
+ ...(s.name ? { name: s.name } : {}),
33
+ ...(s.path ? { path: s.path } : {}),
34
+ ...(s.url ? { url: s.url } : {}),
35
+ ...(s.enabled !== undefined ? { enabled: s.enabled } : {}),
36
+ }));
37
+ // Index stats
38
+ const indexStats = readIndexStats(options?.dbPath);
39
+ return {
40
+ schemaVersion: 1,
41
+ version: pkgVersion,
42
+ assetTypes,
43
+ searchModes,
44
+ registries,
45
+ stashProviders,
46
+ indexStats,
47
+ };
48
+ }
49
+ function readIndexStats(dbPath) {
50
+ const resolvedPath = dbPath ?? getDbPath();
51
+ // If no index file exists, return zeros
52
+ if (!fs.existsSync(resolvedPath)) {
53
+ return {
54
+ entryCount: 0,
55
+ lastBuiltAt: null,
56
+ hasEmbeddings: false,
57
+ vecAvailable: false,
58
+ };
59
+ }
60
+ let db;
61
+ try {
62
+ db = openDatabase(resolvedPath);
63
+ const entryCount = getEntryCount(db);
64
+ const lastBuiltAt = getMeta(db, "builtAt") ?? null;
65
+ const vecAvailable = isVecAvailable(db);
66
+ const hasEmbeddings = getMeta(db, "hasEmbeddings") === "1";
67
+ return {
68
+ entryCount,
69
+ lastBuiltAt,
70
+ hasEmbeddings,
71
+ vecAvailable,
72
+ };
73
+ }
74
+ catch {
75
+ return {
76
+ entryCount: 0,
77
+ lastBuiltAt: null,
78
+ hasEmbeddings: false,
79
+ vecAvailable: false,
80
+ };
81
+ }
82
+ finally {
83
+ if (db) {
84
+ try {
85
+ closeDatabase(db);
86
+ }
87
+ catch {
88
+ /* ignore */
89
+ }
90
+ }
91
+ }
92
+ }