akm-cli 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/asset-registry.js +48 -0
- package/dist/asset-spec.js +11 -32
- package/dist/cli.js +161 -57
- package/dist/completions.js +4 -2
- package/dist/config.js +34 -6
- package/dist/db.js +178 -22
- package/dist/embedder.js +94 -13
- package/dist/file-context.js +3 -0
- package/dist/indexer.js +88 -37
- package/dist/info.js +92 -0
- package/dist/local-search.js +190 -90
- package/dist/manifest.js +172 -0
- package/dist/metadata.js +165 -2
- package/dist/providers/skills-sh.js +21 -12
- package/dist/providers/static-index.js +3 -1
- package/dist/registry-build-index.js +12 -1
- package/dist/registry-resolve.js +10 -7
- package/dist/search-fields.js +69 -0
- package/dist/search-source.js +42 -0
- package/dist/stash-clone.js +3 -1
- package/dist/stash-provider-factory.js +0 -2
- package/dist/stash-providers/filesystem.js +4 -5
- package/dist/stash-providers/git.js +140 -0
- package/dist/stash-providers/index.js +1 -1
- package/dist/stash-providers/openviking.js +36 -25
- package/dist/stash-providers/provider-utils.js +11 -0
- package/dist/stash-search.js +106 -90
- package/dist/stash-show.js +125 -9
- package/dist/usage-events.js +73 -0
- package/dist/version.js +20 -0
- package/dist/walker.js +1 -2
- package/package.json +3 -2
- package/dist/stash-providers/context-hub.js +0 -390
package/dist/indexer.js
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { resolveStashDir } from "./common";
|
|
4
|
-
import { closeDatabase,
|
|
4
|
+
import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, getEntriesByDir, getEntryCount, getMeta, isVecAvailable, openDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertUtilityScore, warnIfVecMissing, } from "./db";
|
|
5
5
|
import { generateMetadataFlat, loadStashFile } from "./metadata";
|
|
6
6
|
import { getDbPath } from "./paths";
|
|
7
|
+
import { buildSearchText } from "./search-fields";
|
|
8
|
+
import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
|
|
7
9
|
import { walkStashFlat } from "./walker";
|
|
8
10
|
import { warn } from "./warn";
|
|
9
11
|
// ── Indexer ──────────────────────────────────────────────────────────────────
|
|
@@ -12,7 +14,10 @@ export async function akmIndex(options) {
|
|
|
12
14
|
// Load config and resolve all stash sources
|
|
13
15
|
const { loadConfig } = await import("./config.js");
|
|
14
16
|
const config = loadConfig();
|
|
15
|
-
|
|
17
|
+
// Ensure git stash caches are extracted before resolving stash dirs,
|
|
18
|
+
// so their content directories exist on disk for the walker to discover.
|
|
19
|
+
const { ensureGitCaches, resolveAllStashDirs } = await import("./search-source.js");
|
|
20
|
+
await ensureGitCaches(config);
|
|
16
21
|
const allStashDirs = resolveAllStashDirs(stashDir);
|
|
17
22
|
const t0 = Date.now();
|
|
18
23
|
// Open database — pass embedding dimension from config if available
|
|
@@ -26,7 +31,7 @@ export async function akmIndex(options) {
|
|
|
26
31
|
const isIncremental = !options?.full && prevStashDir === stashDir && !!prevBuiltAt;
|
|
27
32
|
const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
|
|
28
33
|
if (options?.full || !isIncremental) {
|
|
29
|
-
//
|
|
34
|
+
// The delete is now merged into the insert transaction inside
|
|
30
35
|
// indexEntries() so that a reader never sees an empty database between
|
|
31
36
|
// the wipe and the re-inserts. The doFullDelete flag signals this path.
|
|
32
37
|
}
|
|
@@ -59,9 +64,9 @@ export async function akmIndex(options) {
|
|
|
59
64
|
const tWalkStart = Date.now();
|
|
60
65
|
// Walk stash dirs and index entries.
|
|
61
66
|
// doFullDelete=true merges the wipe into the same transaction as the
|
|
62
|
-
// inserts
|
|
67
|
+
// inserts so readers never see an empty database mid-rebuild.
|
|
63
68
|
const doFullDelete = options?.full || !isIncremental;
|
|
64
|
-
const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm } = await indexEntries(db, allStashDirs,
|
|
69
|
+
const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm } = await indexEntries(db, allStashDirs, isIncremental, builtAtMs, doFullDelete);
|
|
65
70
|
const tWalkEnd = Date.now();
|
|
66
71
|
// Enhance entries with LLM if configured
|
|
67
72
|
await enhanceDirsWithLlm(db, config, dirsNeedingLlm);
|
|
@@ -69,11 +74,12 @@ export async function akmIndex(options) {
|
|
|
69
74
|
// Rebuild FTS after all inserts
|
|
70
75
|
rebuildFts(db);
|
|
71
76
|
const tFtsEnd = Date.now();
|
|
77
|
+
// Recompute utility scores from usage_events after FTS rebuild
|
|
78
|
+
recomputeUtilityScores(db);
|
|
72
79
|
// Generate embeddings if semantic search is enabled
|
|
73
80
|
const hasEmbeddings = await generateEmbeddingsForDb(db, config);
|
|
74
81
|
const tEmbedEnd = Date.now();
|
|
75
82
|
// Update metadata
|
|
76
|
-
setMeta(db, "version", String(DB_VERSION));
|
|
77
83
|
setMeta(db, "builtAt", new Date().toISOString());
|
|
78
84
|
setMeta(db, "stashDir", stashDir);
|
|
79
85
|
setMeta(db, "stashDirs", JSON.stringify(allStashDirs));
|
|
@@ -104,7 +110,7 @@ export async function akmIndex(options) {
|
|
|
104
110
|
}
|
|
105
111
|
}
|
|
106
112
|
// ── Extracted helpers for indexing ────────────────────────────────────────────
|
|
107
|
-
async function indexEntries(db, allStashDirs,
|
|
113
|
+
async function indexEntries(db, allStashDirs, isIncremental, builtAtMs, doFullDelete = false) {
|
|
108
114
|
let scannedDirs = 0;
|
|
109
115
|
let skippedDirs = 0;
|
|
110
116
|
let generatedCount = 0;
|
|
@@ -170,7 +176,7 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
|
|
|
170
176
|
// occurrence wins.
|
|
171
177
|
const indexedAssetIdentities = new Set();
|
|
172
178
|
const insertTransaction = db.transaction(() => {
|
|
173
|
-
//
|
|
179
|
+
// Perform the full-rebuild wipe as the FIRST step of the insert
|
|
174
180
|
// transaction so delete and re-insert are atomic — a concurrent reader
|
|
175
181
|
// never observes an empty database between the two operations.
|
|
176
182
|
if (doFullDelete) {
|
|
@@ -189,6 +195,8 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
|
|
|
189
195
|
}
|
|
190
196
|
}
|
|
191
197
|
db.exec("DELETE FROM entries_fts");
|
|
198
|
+
db.exec("DELETE FROM utility_scores");
|
|
199
|
+
db.exec("DELETE FROM usage_events");
|
|
192
200
|
db.exec("DELETE FROM entries");
|
|
193
201
|
}
|
|
194
202
|
for (const { dirPath, currentStashDir, files, stash, skip } of dirRecords) {
|
|
@@ -235,8 +243,8 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
|
|
|
235
243
|
if (generatedEntries.length === 0)
|
|
236
244
|
continue;
|
|
237
245
|
const generatedStash = { entries: generatedEntries };
|
|
238
|
-
const enhanced = await enhanceStashWithLlm(config.llm, generatedStash,
|
|
239
|
-
//
|
|
246
|
+
const enhanced = await enhanceStashWithLlm(config.llm, generatedStash, files);
|
|
247
|
+
// Re-upsert the enhanced entries in a single transaction so a crash
|
|
240
248
|
// cannot leave half the entries updated and the rest stale.
|
|
241
249
|
db.transaction(() => {
|
|
242
250
|
for (const entry of enhanced.entries) {
|
|
@@ -258,7 +266,7 @@ async function generateEmbeddingsForDb(db, config) {
|
|
|
258
266
|
return true;
|
|
259
267
|
const texts = allEntries.map((e) => e.searchText);
|
|
260
268
|
const embeddings = await embedBatch(texts, config.embedding);
|
|
261
|
-
//
|
|
269
|
+
// Wrap all embedding upserts in a single transaction so partial
|
|
262
270
|
// state is rolled back on failure rather than leaving the table half-filled.
|
|
263
271
|
db.transaction(() => {
|
|
264
272
|
for (let i = 0; i < allEntries.length; i++) {
|
|
@@ -289,7 +297,6 @@ function attachFileSize(entry, entryPath) {
|
|
|
289
297
|
return entry;
|
|
290
298
|
}
|
|
291
299
|
}
|
|
292
|
-
/** Set of all known type directory names */
|
|
293
300
|
function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
|
|
294
301
|
// Check if file set changed (additions or deletions)
|
|
295
302
|
const prevFileNames = new Set(previousEntries.map((ie) => ie.entry.filename).filter((e) => !!e));
|
|
@@ -321,7 +328,7 @@ function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
|
|
|
321
328
|
}
|
|
322
329
|
return false;
|
|
323
330
|
}
|
|
324
|
-
async function enhanceStashWithLlm(llmConfig, stash,
|
|
331
|
+
async function enhanceStashWithLlm(llmConfig, stash, files) {
|
|
325
332
|
const { enhanceMetadata } = await import("./llm.js");
|
|
326
333
|
const enhanced = [];
|
|
327
334
|
for (const entry of stash.entries) {
|
|
@@ -391,30 +398,74 @@ export function matchEntryToFile(entryName, fileMap, files) {
|
|
|
391
398
|
// Fallback to first file, or null if no files are available
|
|
392
399
|
return files[0] || null;
|
|
393
400
|
}
|
|
394
|
-
export
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
401
|
+
export { buildSearchFields, buildSearchText } from "./search-fields";
|
|
402
|
+
// ── Utility score recomputation ──────────────────────────────────────────────
|
|
403
|
+
/** Retention window for usage events: events older than this are purged. */
|
|
404
|
+
const USAGE_EVENT_RETENTION_DAYS = 90;
|
|
405
|
+
/**
|
|
406
|
+
* Recompute utility scores for all entries based on usage_events data.
|
|
407
|
+
*
|
|
408
|
+
* For each entry:
|
|
409
|
+
* - Count search appearances (event_type = 'search')
|
|
410
|
+
* - Count show events (event_type = 'show')
|
|
411
|
+
* - Compute select_rate = showCount / searchCount, clamped to [0, 1]
|
|
412
|
+
* - Update utility via EMA: utility = previousUtility * 0.7 + selectRate * 0.3
|
|
413
|
+
*
|
|
414
|
+
* Also purges usage_events older than 90 days and ensures the M-1
|
|
415
|
+
* usage_events table exists before querying.
|
|
416
|
+
*
|
|
417
|
+
* Called during `akm index` after FTS rebuild.
|
|
418
|
+
*/
|
|
419
|
+
export function recomputeUtilityScores(db) {
|
|
420
|
+
const EMA_DECAY = 0.7;
|
|
421
|
+
// Ensure usage_events table exists before querying
|
|
422
|
+
ensureUsageEventsSchema(db);
|
|
423
|
+
// Purge stale usage events (90-day retention)
|
|
424
|
+
purgeOldUsageEvents(db, USAGE_EVENT_RETENTION_DAYS);
|
|
425
|
+
// Time-proportional decay: apply one round of EMA per elapsed day so
|
|
426
|
+
// indexing frequency doesn't affect how fast scores decay.
|
|
427
|
+
const lastComputedAt = getMeta(db, "last_utility_computed_at");
|
|
428
|
+
let elapsedDays = 1; // default for first run
|
|
429
|
+
if (lastComputedAt) {
|
|
430
|
+
const ms = Date.now() - new Date(lastComputedAt).getTime();
|
|
431
|
+
elapsedDays = Math.max(1, ms / (1000 * 60 * 60 * 24));
|
|
432
|
+
}
|
|
433
|
+
const emaDecay = EMA_DECAY ** elapsedDays;
|
|
434
|
+
const emaNew = 1 - emaDecay; // complement so weights still sum to 1
|
|
435
|
+
// Single aggregate query instead of N+1 per-entry queries.
|
|
436
|
+
// Only processes entries that actually have usage events.
|
|
437
|
+
const usageRows = db
|
|
438
|
+
.prepare(`
|
|
439
|
+
SELECT entry_id,
|
|
440
|
+
SUM(CASE WHEN event_type = 'search' THEN 1 ELSE 0 END) AS search_count,
|
|
441
|
+
SUM(CASE WHEN event_type = 'show' THEN 1 ELSE 0 END) AS show_count,
|
|
442
|
+
MAX(created_at) AS last_used_at
|
|
443
|
+
FROM usage_events
|
|
444
|
+
WHERE entry_id IS NOT NULL
|
|
445
|
+
GROUP BY entry_id
|
|
446
|
+
`)
|
|
447
|
+
.all();
|
|
448
|
+
if (usageRows.length === 0) {
|
|
449
|
+
setMeta(db, "last_utility_computed_at", new Date().toISOString());
|
|
450
|
+
return;
|
|
451
|
+
}
|
|
452
|
+
// Batch-load existing utility scores
|
|
453
|
+
const existingScores = new Map();
|
|
454
|
+
const scoreRows = db.prepare("SELECT entry_id, utility FROM utility_scores").all();
|
|
455
|
+
for (const row of scoreRows) {
|
|
456
|
+
existingScores.set(row.entry_id, row.utility);
|
|
415
457
|
}
|
|
416
|
-
|
|
417
|
-
|
|
458
|
+
for (const row of usageRows) {
|
|
459
|
+
const selectRate = row.search_count > 0 ? Math.min(1, row.show_count / row.search_count) : 0;
|
|
460
|
+
const prevUtility = existingScores.get(row.entry_id) ?? 0;
|
|
461
|
+
const utility = prevUtility * emaDecay + selectRate * emaNew;
|
|
462
|
+
upsertUtilityScore(db, row.entry_id, {
|
|
463
|
+
utility,
|
|
464
|
+
showCount: row.show_count,
|
|
465
|
+
searchCount: row.search_count,
|
|
466
|
+
selectRate,
|
|
467
|
+
lastUsedAt: row.last_used_at ?? undefined,
|
|
468
|
+
});
|
|
418
469
|
}
|
|
419
|
-
|
|
470
|
+
setMeta(db, "last_utility_computed_at", new Date().toISOString());
|
|
420
471
|
}
|
package/dist/info.js
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import { getAssetTypes } from "./asset-spec";
|
|
3
|
+
import { loadConfig } from "./config";
|
|
4
|
+
import { closeDatabase, getEntryCount, getMeta, isVecAvailable, openDatabase } from "./db";
|
|
5
|
+
import { getDbPath } from "./paths";
|
|
6
|
+
import { pkgVersion } from "./version";
|
|
7
|
+
/**
|
|
8
|
+
* Assemble system info describing the current capabilities, configuration,
|
|
9
|
+
* and index state. Used by `akm info`.
|
|
10
|
+
*
|
|
11
|
+
* @param options.dbPath - Override the database path (useful for testing)
|
|
12
|
+
*/
|
|
13
|
+
export function assembleInfo(options) {
|
|
14
|
+
const config = loadConfig();
|
|
15
|
+
// Asset types
|
|
16
|
+
const assetTypes = getAssetTypes();
|
|
17
|
+
// Search modes
|
|
18
|
+
const searchModes = ["fts"];
|
|
19
|
+
if (config.semanticSearch) {
|
|
20
|
+
searchModes.push("semantic", "hybrid");
|
|
21
|
+
}
|
|
22
|
+
// Registries (strip sensitive fields like apiKey from options)
|
|
23
|
+
const registries = (config.registries ?? []).map((r) => ({
|
|
24
|
+
url: r.url,
|
|
25
|
+
...(r.name ? { name: r.name } : {}),
|
|
26
|
+
...(r.provider ? { provider: r.provider } : {}),
|
|
27
|
+
...(r.enabled !== undefined ? { enabled: r.enabled } : {}),
|
|
28
|
+
}));
|
|
29
|
+
// Stash providers
|
|
30
|
+
const stashProviders = (config.stashes ?? []).map((s) => ({
|
|
31
|
+
type: s.type,
|
|
32
|
+
...(s.name ? { name: s.name } : {}),
|
|
33
|
+
...(s.path ? { path: s.path } : {}),
|
|
34
|
+
...(s.url ? { url: s.url } : {}),
|
|
35
|
+
...(s.enabled !== undefined ? { enabled: s.enabled } : {}),
|
|
36
|
+
}));
|
|
37
|
+
// Index stats
|
|
38
|
+
const indexStats = readIndexStats(options?.dbPath);
|
|
39
|
+
return {
|
|
40
|
+
schemaVersion: 1,
|
|
41
|
+
version: pkgVersion,
|
|
42
|
+
assetTypes,
|
|
43
|
+
searchModes,
|
|
44
|
+
registries,
|
|
45
|
+
stashProviders,
|
|
46
|
+
indexStats,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
function readIndexStats(dbPath) {
|
|
50
|
+
const resolvedPath = dbPath ?? getDbPath();
|
|
51
|
+
// If no index file exists, return zeros
|
|
52
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
53
|
+
return {
|
|
54
|
+
entryCount: 0,
|
|
55
|
+
lastBuiltAt: null,
|
|
56
|
+
hasEmbeddings: false,
|
|
57
|
+
vecAvailable: false,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
let db;
|
|
61
|
+
try {
|
|
62
|
+
db = openDatabase(resolvedPath);
|
|
63
|
+
const entryCount = getEntryCount(db);
|
|
64
|
+
const lastBuiltAt = getMeta(db, "builtAt") ?? null;
|
|
65
|
+
const vecAvailable = isVecAvailable(db);
|
|
66
|
+
const hasEmbeddings = getMeta(db, "hasEmbeddings") === "1";
|
|
67
|
+
return {
|
|
68
|
+
entryCount,
|
|
69
|
+
lastBuiltAt,
|
|
70
|
+
hasEmbeddings,
|
|
71
|
+
vecAvailable,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
return {
|
|
76
|
+
entryCount: 0,
|
|
77
|
+
lastBuiltAt: null,
|
|
78
|
+
hasEmbeddings: false,
|
|
79
|
+
vecAvailable: false,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
finally {
|
|
83
|
+
if (db) {
|
|
84
|
+
try {
|
|
85
|
+
closeDatabase(db);
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
/* ignore */
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|