akm-cli 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/indexer.js CHANGED
@@ -1,18 +1,24 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
- import { resolveStashDir } from "./common";
4
- import { closeDatabase, DB_VERSION, deleteEntriesByDir, deleteEntriesByStashDir, getEntriesByDir, getEntryCount, getMeta, isVecAvailable, openDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, warnIfVecMissing, } from "./db";
3
+ import { isHttpUrl, resolveStashDir } from "./common";
4
+ import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getMeta, isVecAvailable, openDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertUtilityScore, warnIfVecMissing, } from "./db";
5
5
  import { generateMetadataFlat, loadStashFile } from "./metadata";
6
6
  import { getDbPath } from "./paths";
7
+ import { buildSearchText } from "./search-fields";
8
+ import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
7
9
  import { walkStashFlat } from "./walker";
8
10
  import { warn } from "./warn";
9
11
  // ── Indexer ──────────────────────────────────────────────────────────────────
10
12
  export async function akmIndex(options) {
11
13
  const stashDir = options?.stashDir || resolveStashDir();
14
+ const onProgress = options?.onProgress ?? (() => { });
12
15
  // Load config and resolve all stash sources
13
16
  const { loadConfig } = await import("./config.js");
14
17
  const config = loadConfig();
15
- const { resolveAllStashDirs } = await import("./search-source.js");
18
+ // Ensure git stash caches are extracted before resolving stash dirs,
19
+ // so their content directories exist on disk for the walker to discover.
20
+ const { ensureGitCaches, resolveAllStashDirs } = await import("./search-source.js");
21
+ await ensureGitCaches(config);
16
22
  const allStashDirs = resolveAllStashDirs(stashDir);
17
23
  const t0 = Date.now();
18
24
  // Open database — pass embedding dimension from config if available
@@ -25,8 +31,19 @@ export async function akmIndex(options) {
25
31
  const prevBuiltAt = getMeta(db, "builtAt");
26
32
  const isIncremental = !options?.full && prevStashDir === stashDir && !!prevBuiltAt;
27
33
  const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
34
+ onProgress({
35
+ phase: "summary",
36
+ message: buildIndexSummaryMessage({
37
+ mode: isIncremental ? "incremental" : "full",
38
+ stashSources: allStashDirs.length,
39
+ semanticSearch: config.semanticSearch,
40
+ embeddingProvider: getEmbeddingProvider(config.embedding),
41
+ llmEnabled: !!config.llm,
42
+ vecAvailable: isVecAvailable(db),
43
+ }),
44
+ });
28
45
  if (options?.full || !isIncremental) {
29
- // HI-5: the delete is now merged into the insert transaction inside
46
+ // The delete is now merged into the insert transaction inside
30
47
  // indexEntries() so that a reader never sees an empty database between
31
48
  // the wipe and the re-inserts. The doFullDelete flag signals this path.
32
49
  }
@@ -59,21 +76,33 @@ export async function akmIndex(options) {
59
76
  const tWalkStart = Date.now();
60
77
  // Walk stash dirs and index entries.
61
78
  // doFullDelete=true merges the wipe into the same transaction as the
62
- // inserts (HI-5) so readers never see an empty database mid-rebuild.
79
+ // inserts so readers never see an empty database mid-rebuild.
63
80
  const doFullDelete = options?.full || !isIncremental;
64
- const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm } = await indexEntries(db, allStashDirs, stashDir, isIncremental, builtAtMs, doFullDelete);
81
+ const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm } = await indexEntries(db, allStashDirs, isIncremental, builtAtMs, doFullDelete);
82
+ onProgress({
83
+ phase: "scan",
84
+ message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
85
+ });
65
86
  const tWalkEnd = Date.now();
66
87
  // Enhance entries with LLM if configured
67
88
  await enhanceDirsWithLlm(db, config, dirsNeedingLlm);
89
+ onProgress({
90
+ phase: "llm",
91
+ message: config.llm
92
+ ? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
93
+ : "LLM enhancement disabled.",
94
+ });
68
95
  const tLlmEnd = Date.now();
69
96
  // Rebuild FTS after all inserts
70
97
  rebuildFts(db);
98
+ onProgress({ phase: "fts", message: "Rebuilt full-text search index." });
71
99
  const tFtsEnd = Date.now();
100
+ // Recompute utility scores from usage_events after FTS rebuild
101
+ recomputeUtilityScores(db);
72
102
  // Generate embeddings if semantic search is enabled
73
- const hasEmbeddings = await generateEmbeddingsForDb(db, config);
103
+ const hasEmbeddings = await generateEmbeddingsForDb(db, config, onProgress);
74
104
  const tEmbedEnd = Date.now();
75
105
  // Update metadata
76
- setMeta(db, "version", String(DB_VERSION));
77
106
  setMeta(db, "builtAt", new Date().toISOString());
78
107
  setMeta(db, "stashDir", stashDir);
79
108
  setMeta(db, "stashDirs", JSON.stringify(allStashDirs));
@@ -82,6 +111,8 @@ export async function akmIndex(options) {
82
111
  // Warn on every index run if using JS fallback with many entries
83
112
  warnIfVecMissing(db);
84
113
  const tEnd = Date.now();
114
+ const verification = verifyIndexState(db, config, totalEntries);
115
+ onProgress({ phase: "verify", message: verification.message });
85
116
  return {
86
117
  stashDir,
87
118
  totalEntries,
@@ -90,6 +121,7 @@ export async function akmIndex(options) {
90
121
  mode: isIncremental ? "incremental" : "full",
91
122
  directoriesScanned: scannedDirs,
92
123
  directoriesSkipped: skippedDirs,
124
+ verification,
93
125
  timing: {
94
126
  totalMs: tEnd - t0,
95
127
  walkMs: tWalkEnd - tWalkStart,
@@ -104,7 +136,7 @@ export async function akmIndex(options) {
104
136
  }
105
137
  }
106
138
  // ── Extracted helpers for indexing ────────────────────────────────────────────
107
- async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtMs, doFullDelete = false) {
139
+ async function indexEntries(db, allStashDirs, isIncremental, builtAtMs, doFullDelete = false) {
108
140
  let scannedDirs = 0;
109
141
  let skippedDirs = 0;
110
142
  let generatedCount = 0;
@@ -170,7 +202,7 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
170
202
  // occurrence wins.
171
203
  const indexedAssetIdentities = new Set();
172
204
  const insertTransaction = db.transaction(() => {
173
- // HI-5: Perform the full-rebuild wipe as the FIRST step of the insert
205
+ // Perform the full-rebuild wipe as the FIRST step of the insert
174
206
  // transaction so delete and re-insert are atomic — a concurrent reader
175
207
  // never observes an empty database between the two operations.
176
208
  if (doFullDelete) {
@@ -189,6 +221,8 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
189
221
  }
190
222
  }
191
223
  db.exec("DELETE FROM entries_fts");
224
+ db.exec("DELETE FROM utility_scores");
225
+ db.exec("DELETE FROM usage_events");
192
226
  db.exec("DELETE FROM entries");
193
227
  }
194
228
  for (const { dirPath, currentStashDir, files, stash, skip } of dirRecords) {
@@ -235,8 +269,8 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
235
269
  if (generatedEntries.length === 0)
236
270
  continue;
237
271
  const generatedStash = { entries: generatedEntries };
238
- const enhanced = await enhanceStashWithLlm(config.llm, generatedStash, dirPath, files);
239
- // HI-2: Re-upsert the enhanced entries in a single transaction so a crash
272
+ const enhanced = await enhanceStashWithLlm(config.llm, generatedStash, files);
273
+ // Re-upsert the enhanced entries in a single transaction so a crash
240
274
  // cannot leave half the entries updated and the rest stale.
241
275
  db.transaction(() => {
242
276
  for (const entry of enhanced.entries) {
@@ -248,27 +282,43 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
248
282
  })();
249
283
  }
250
284
  }
251
- async function generateEmbeddingsForDb(db, config) {
252
- if (!config.semanticSearch)
285
+ async function generateEmbeddingsForDb(db, config, onProgress) {
286
+ if (!config.semanticSearch) {
287
+ onProgress({ phase: "embeddings", message: "Semantic search disabled; skipping embeddings." });
253
288
  return false;
289
+ }
254
290
  try {
255
291
  const { embedBatch } = await import("./embedder.js");
256
292
  const allEntries = getAllEntriesForEmbedding(db);
257
- if (allEntries.length === 0)
293
+ if (allEntries.length === 0) {
294
+ onProgress({ phase: "embeddings", message: "Embeddings already up to date." });
258
295
  return true;
296
+ }
297
+ onProgress({
298
+ phase: "embeddings",
299
+ message: `Generating embeddings for ${allEntries.length} entr${allEntries.length === 1 ? "y" : "ies"}.`,
300
+ });
259
301
  const texts = allEntries.map((e) => e.searchText);
260
302
  const embeddings = await embedBatch(texts, config.embedding);
261
- // HI-3: Wrap all embedding upserts in a single transaction so partial
303
+ // Wrap all embedding upserts in a single transaction so partial
262
304
  // state is rolled back on failure rather than leaving the table half-filled.
263
305
  db.transaction(() => {
264
306
  for (let i = 0; i < allEntries.length; i++) {
265
307
  upsertEmbedding(db, allEntries[i].id, embeddings[i]);
266
308
  }
267
309
  })();
310
+ onProgress({
311
+ phase: "embeddings",
312
+ message: `Stored ${embeddings.length} embedding${embeddings.length === 1 ? "" : "s"}.`,
313
+ });
268
314
  return true;
269
315
  }
270
316
  catch (error) {
271
317
  warn("Embedding generation failed, continuing without:", error instanceof Error ? error.message : String(error));
318
+ onProgress({
319
+ phase: "embeddings",
320
+ message: `Embedding generation failed: ${error instanceof Error ? error.message : String(error)}`,
321
+ });
272
322
  return false;
273
323
  }
274
324
  }
@@ -289,7 +339,69 @@ function attachFileSize(entry, entryPath) {
289
339
  return entry;
290
340
  }
291
341
  }
292
- /** Set of all known type directory names */
342
+ function buildIndexSummaryMessage(options) {
343
+ const stashSourceLabel = options.stashSources === 1 ? "stash source" : "stash sources";
344
+ const semanticDetail = getSemanticSearchLabel(options.semanticSearch, options.embeddingProvider, options.vecAvailable);
345
+ return `Starting ${options.mode} index (${options.stashSources} ${stashSourceLabel}, semantic search: ${semanticDetail}, LLM: ${options.llmEnabled ? "enabled" : "disabled"}).`;
346
+ }
347
+ function getEmbeddingProvider(embedding) {
348
+ return isHttpUrl(embedding?.endpoint) ? "remote" : "local";
349
+ }
350
+ function getSemanticSearchLabel(semanticSearch, embeddingProvider, vecAvailable) {
351
+ if (!semanticSearch)
352
+ return "disabled";
353
+ return `${embeddingProvider} embeddings, ${vecAvailable ? "sqlite-vec" : "JS fallback"}`;
354
+ }
355
+ function verifyIndexState(db, config, totalEntries) {
356
+ const embeddingCount = getEmbeddingCount(db);
357
+ const vecAvailable = isVecAvailable(db);
358
+ const embeddingProvider = getEmbeddingProvider(config.embedding);
359
+ if (totalEntries === 0) {
360
+ return {
361
+ ok: true,
362
+ message: "Index ready. No assets were found yet.",
363
+ semanticSearchEnabled: config.semanticSearch,
364
+ embeddingProvider,
365
+ entryCount: totalEntries,
366
+ embeddingCount,
367
+ vecAvailable,
368
+ };
369
+ }
370
+ if (!config.semanticSearch) {
371
+ return {
372
+ ok: true,
373
+ message: "Keyword index ready. Semantic search is disabled.",
374
+ semanticSearchEnabled: false,
375
+ embeddingProvider,
376
+ entryCount: totalEntries,
377
+ embeddingCount,
378
+ vecAvailable,
379
+ };
380
+ }
381
+ if (embeddingCount >= totalEntries) {
382
+ return {
383
+ ok: true,
384
+ message: `Semantic search ready (${embeddingCount}/${totalEntries} embeddings, ${vecAvailable ? "sqlite-vec active" : "JS fallback active"}).`,
385
+ semanticSearchEnabled: true,
386
+ embeddingProvider,
387
+ entryCount: totalEntries,
388
+ embeddingCount,
389
+ vecAvailable,
390
+ };
391
+ }
392
+ return {
393
+ ok: false,
394
+ message: `Semantic search verification failed (${embeddingCount}/${totalEntries} embeddings available).`,
395
+ guidance: embeddingProvider === "remote"
396
+ ? "Check your embedding endpoint and credentials, then retry `akm index --full --verbose`."
397
+ : "Retry `akm index --full --verbose`. If it still fails, confirm local model downloads are permitted and see docs/configuration.md for local embedding dependency setup.",
398
+ semanticSearchEnabled: true,
399
+ embeddingProvider,
400
+ entryCount: totalEntries,
401
+ embeddingCount,
402
+ vecAvailable,
403
+ };
404
+ }
293
405
  function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
294
406
  // Check if file set changed (additions or deletions)
295
407
  const prevFileNames = new Set(previousEntries.map((ie) => ie.entry.filename).filter((e) => !!e));
@@ -321,7 +433,7 @@ function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
321
433
  }
322
434
  return false;
323
435
  }
324
- async function enhanceStashWithLlm(llmConfig, stash, _dirPath, files) {
436
+ async function enhanceStashWithLlm(llmConfig, stash, files) {
325
437
  const { enhanceMetadata } = await import("./llm.js");
326
438
  const enhanced = [];
327
439
  for (const entry of stash.entries) {
@@ -391,30 +503,74 @@ export function matchEntryToFile(entryName, fileMap, files) {
391
503
  // Fallback to first file, or null if no files are available
392
504
  return files[0] || null;
393
505
  }
394
- export function buildSearchText(entry) {
395
- const parts = [entry.name.replace(/[-_]/g, " ")];
396
- if (entry.description)
397
- parts.push(entry.description);
398
- if (entry.tags)
399
- parts.push(entry.tags.join(" "));
400
- if (entry.examples)
401
- parts.push(entry.examples.join(" "));
402
- if (entry.aliases)
403
- parts.push(entry.aliases.join(" "));
404
- if (entry.searchHints)
405
- parts.push(entry.searchHints.join(" "));
406
- if (entry.usage)
407
- parts.push(entry.usage.join(" "));
408
- if (entry.intent) {
409
- if (entry.intent.when)
410
- parts.push(entry.intent.when);
411
- if (entry.intent.input)
412
- parts.push(entry.intent.input);
413
- if (entry.intent.output)
414
- parts.push(entry.intent.output);
506
+ export { buildSearchFields, buildSearchText } from "./search-fields";
507
+ // ── Utility score recomputation ──────────────────────────────────────────────
508
+ /** Retention window for usage events: events older than this are purged. */
509
+ const USAGE_EVENT_RETENTION_DAYS = 90;
510
+ /**
511
+ * Recompute utility scores for all entries based on usage_events data.
512
+ *
513
+ * For each entry:
514
+ * - Count search appearances (event_type = 'search')
515
+ * - Count show events (event_type = 'show')
516
+ * - Compute select_rate = showCount / searchCount, clamped to [0, 1]
517
+ * - Update utility via EMA: utility = previousUtility * 0.7 + selectRate * 0.3
518
+ *
519
+ * Also purges usage_events older than 90 days and ensures the M-1
520
+ * usage_events table exists before querying.
521
+ *
522
+ * Called during `akm index` after FTS rebuild.
523
+ */
524
+ export function recomputeUtilityScores(db) {
525
+ const EMA_DECAY = 0.7;
526
+ // Ensure usage_events table exists before querying
527
+ ensureUsageEventsSchema(db);
528
+ // Purge stale usage events (90-day retention)
529
+ purgeOldUsageEvents(db, USAGE_EVENT_RETENTION_DAYS);
530
+ // Time-proportional decay: apply one round of EMA per elapsed day so
531
+ // indexing frequency doesn't affect how fast scores decay.
532
+ const lastComputedAt = getMeta(db, "last_utility_computed_at");
533
+ let elapsedDays = 1; // default for first run
534
+ if (lastComputedAt) {
535
+ const ms = Date.now() - new Date(lastComputedAt).getTime();
536
+ elapsedDays = Math.max(1, ms / (1000 * 60 * 60 * 24));
537
+ }
538
+ const emaDecay = EMA_DECAY ** elapsedDays;
539
+ const emaNew = 1 - emaDecay; // complement so weights still sum to 1
540
+ // Single aggregate query instead of N+1 per-entry queries.
541
+ // Only processes entries that actually have usage events.
542
+ const usageRows = db
543
+ .prepare(`
544
+ SELECT entry_id,
545
+ SUM(CASE WHEN event_type = 'search' THEN 1 ELSE 0 END) AS search_count,
546
+ SUM(CASE WHEN event_type = 'show' THEN 1 ELSE 0 END) AS show_count,
547
+ MAX(created_at) AS last_used_at
548
+ FROM usage_events
549
+ WHERE entry_id IS NOT NULL
550
+ GROUP BY entry_id
551
+ `)
552
+ .all();
553
+ if (usageRows.length === 0) {
554
+ setMeta(db, "last_utility_computed_at", new Date().toISOString());
555
+ return;
556
+ }
557
+ // Batch-load existing utility scores
558
+ const existingScores = new Map();
559
+ const scoreRows = db.prepare("SELECT entry_id, utility FROM utility_scores").all();
560
+ for (const row of scoreRows) {
561
+ existingScores.set(row.entry_id, row.utility);
415
562
  }
416
- if (entry.toc) {
417
- parts.push(entry.toc.map((h) => h.text).join(" "));
563
+ for (const row of usageRows) {
564
+ const selectRate = row.search_count > 0 ? Math.min(1, row.show_count / row.search_count) : 0;
565
+ const prevUtility = existingScores.get(row.entry_id) ?? 0;
566
+ const utility = prevUtility * emaDecay + selectRate * emaNew;
567
+ upsertUtilityScore(db, row.entry_id, {
568
+ utility,
569
+ showCount: row.show_count,
570
+ searchCount: row.search_count,
571
+ selectRate,
572
+ lastUsedAt: row.last_used_at ?? undefined,
573
+ });
418
574
  }
419
- return parts.join(" ").toLowerCase();
575
+ setMeta(db, "last_utility_computed_at", new Date().toISOString());
420
576
  }
package/dist/info.js ADDED
@@ -0,0 +1,92 @@
1
+ import fs from "node:fs";
2
+ import { getAssetTypes } from "./asset-spec";
3
+ import { loadConfig } from "./config";
4
+ import { closeDatabase, getEntryCount, getMeta, isVecAvailable, openDatabase } from "./db";
5
+ import { getDbPath } from "./paths";
6
+ import { pkgVersion } from "./version";
7
+ /**
8
+ * Assemble system info describing the current capabilities, configuration,
9
+ * and index state. Used by `akm info`.
10
+ *
11
+ * @param options.dbPath - Override the database path (useful for testing)
12
+ */
13
+ export function assembleInfo(options) {
14
+ const config = loadConfig();
15
+ // Asset types
16
+ const assetTypes = getAssetTypes();
17
+ // Search modes
18
+ const searchModes = ["fts"];
19
+ if (config.semanticSearch) {
20
+ searchModes.push("semantic", "hybrid");
21
+ }
22
+ // Registries (strip sensitive fields like apiKey from options)
23
+ const registries = (config.registries ?? []).map((r) => ({
24
+ url: r.url,
25
+ ...(r.name ? { name: r.name } : {}),
26
+ ...(r.provider ? { provider: r.provider } : {}),
27
+ ...(r.enabled !== undefined ? { enabled: r.enabled } : {}),
28
+ }));
29
+ // Stash providers
30
+ const stashProviders = (config.stashes ?? []).map((s) => ({
31
+ type: s.type,
32
+ ...(s.name ? { name: s.name } : {}),
33
+ ...(s.path ? { path: s.path } : {}),
34
+ ...(s.url ? { url: s.url } : {}),
35
+ ...(s.enabled !== undefined ? { enabled: s.enabled } : {}),
36
+ }));
37
+ // Index stats
38
+ const indexStats = readIndexStats(options?.dbPath);
39
+ return {
40
+ schemaVersion: 1,
41
+ version: pkgVersion,
42
+ assetTypes,
43
+ searchModes,
44
+ registries,
45
+ stashProviders,
46
+ indexStats,
47
+ };
48
+ }
49
+ function readIndexStats(dbPath) {
50
+ const resolvedPath = dbPath ?? getDbPath();
51
+ // If no index file exists, return zeros
52
+ if (!fs.existsSync(resolvedPath)) {
53
+ return {
54
+ entryCount: 0,
55
+ lastBuiltAt: null,
56
+ hasEmbeddings: false,
57
+ vecAvailable: false,
58
+ };
59
+ }
60
+ let db;
61
+ try {
62
+ db = openDatabase(resolvedPath);
63
+ const entryCount = getEntryCount(db);
64
+ const lastBuiltAt = getMeta(db, "builtAt") ?? null;
65
+ const vecAvailable = isVecAvailable(db);
66
+ const hasEmbeddings = getMeta(db, "hasEmbeddings") === "1";
67
+ return {
68
+ entryCount,
69
+ lastBuiltAt,
70
+ hasEmbeddings,
71
+ vecAvailable,
72
+ };
73
+ }
74
+ catch {
75
+ return {
76
+ entryCount: 0,
77
+ lastBuiltAt: null,
78
+ hasEmbeddings: false,
79
+ vecAvailable: false,
80
+ };
81
+ }
82
+ finally {
83
+ if (db) {
84
+ try {
85
+ closeDatabase(db);
86
+ }
87
+ catch {
88
+ /* ignore */
89
+ }
90
+ }
91
+ }
92
+ }