npm - akm-cli - Versions diffs - 0.1.3 → 0.2.0 - Mend

akm-cli 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/dist/asset-registry.js +48 -0
package/dist/asset-spec.js +11 -32
package/dist/cli.js +161 -57
package/dist/completions.js +4 -2
package/dist/config.js +34 -6
package/dist/db.js +178 -22
package/dist/embedder.js +94 -13
package/dist/file-context.js +3 -0
package/dist/indexer.js +88 -37
package/dist/info.js +92 -0
package/dist/local-search.js +190 -90
package/dist/manifest.js +172 -0
package/dist/metadata.js +165 -2
package/dist/providers/skills-sh.js +21 -12
package/dist/providers/static-index.js +3 -1
package/dist/registry-build-index.js +12 -1
package/dist/registry-resolve.js +10 -7
package/dist/search-fields.js +69 -0
package/dist/search-source.js +42 -0
package/dist/stash-clone.js +3 -1
package/dist/stash-provider-factory.js +0 -2
package/dist/stash-providers/filesystem.js +4 -5
package/dist/stash-providers/git.js +140 -0
package/dist/stash-providers/index.js +1 -1
package/dist/stash-providers/openviking.js +36 -25
package/dist/stash-providers/provider-utils.js +11 -0
package/dist/stash-search.js +106 -90
package/dist/stash-show.js +125 -9
package/dist/usage-events.js +73 -0
package/dist/version.js +20 -0
package/dist/walker.js +1 -2
package/package.json +3 -2
package/dist/stash-providers/context-hub.js +0 -390

package/dist/db.js CHANGED Viewed

@@ -4,9 +4,11 @@ import { createRequire } from "node:module";
 import path from "node:path";
 import { cosineSimilarity } from "./embedder";
 import { getDbPath } from "./paths";
+import { buildSearchFields } from "./search-fields";
+import { ensureUsageEventsSchema } from "./usage-events";
 import { warn } from "./warn";
 // ── Constants ───────────────────────────────────────────────────────────────
-export const DB_VERSION = 6;
+export const DB_VERSION = 8;
 export const EMBEDDING_DIM = 384;
 // ── Database lifecycle ──────────────────────────────────────────────────────
 export function openDatabase(dbPath, options) {
@@ -83,6 +85,8 @@ function ensureSchema(db, embeddingDim) {
     // Check stored version — if it differs from DB_VERSION, drop and recreate all tables
     const storedVersion = getMeta(db, "version");
     if (storedVersion && storedVersion !== String(DB_VERSION)) {
+        db.exec("DROP TABLE IF EXISTS utility_scores");
+        db.exec("DROP TABLE IF EXISTS usage_events");
         db.exec("DROP TABLE IF EXISTS embeddings");
         db.exec("DROP TABLE IF EXISTS entries_vec");
         db.exec("DROP TABLE IF EXISTS entries_fts");
@@ -120,17 +124,35 @@ function ensureSchema(db, embeddingDim) {
       FOREIGN KEY (id) REFERENCES entries(id)
     );
   `);
-    // FTS5 table — standalone with explicit entry_id for joining
+    // FTS5 table — multi-column with per-field weighting via bm25()
     const ftsExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_fts'").get();
     if (!ftsExists) {
         db.exec(`
       CREATE VIRTUAL TABLE entries_fts USING fts5(
         entry_id UNINDEXED,
-        search_text,
+        name,
+        description,
+        tags,
+        hints,
+        content,
         tokenize='porter unicode61'
       );
     `);
     }
+    // Usage events table — created by ensureUsageEventsSchema() at runtime.
+    // Utility scores table (aggregated per-entry utility metrics)
+    db.exec(`
+    CREATE TABLE IF NOT EXISTS utility_scores (
+      entry_id     INTEGER PRIMARY KEY,
+      utility      REAL NOT NULL DEFAULT 0,
+      show_count   INTEGER NOT NULL DEFAULT 0,
+      search_count INTEGER NOT NULL DEFAULT 0,
+      select_rate  REAL NOT NULL DEFAULT 0,
+      last_used_at TEXT,
+      updated_at   TEXT NOT NULL DEFAULT (datetime('now')),
+      FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
+    );
+  `);
     // sqlite-vec table
     if (isVecAvailable(db)) {
         // Check if stored embedding dimension differs from configured one
@@ -142,7 +164,7 @@ function ensureSchema(db, embeddingDim) {
             catch {
                 /* ignore */
             }
-            // CR-2: Delete stale BLOB embeddings so they don't produce silently wrong
+            // Delete stale BLOB embeddings so they don't produce silently wrong
             // similarity scores against the new-dimension vec table.
             try {
                 db.exec("DELETE FROM embeddings");
@@ -165,6 +187,8 @@ function ensureSchema(db, embeddingDim) {
         }
         setMeta(db, "embeddingDim", String(embeddingDim));
     }
+    // Usage telemetry table
+    ensureUsageEventsSchema(db);
 }
 // ── Meta helpers ────────────────────────────────────────────────────────────
 export function getMeta(db, key) {
@@ -231,7 +255,7 @@ function deleteRelatedRows(db, ids) {
         catch {
             /* ignore */
         }
-        // HI-1: Also delete from FTS table so orphaned FTS rows don't remain
+        // Also delete from FTS table so orphaned FTS rows don't remain
         try {
             db.prepare(`DELETE FROM entries_fts WHERE entry_id IN (${placeholders})`).run(...chunk);
         }
@@ -246,16 +270,48 @@ function deleteRelatedRows(db, ids) {
                 /* ignore */
             }
         }
+        // Clean up utility scores before deleting entries
+        try {
+            db.prepare(`DELETE FROM utility_scores WHERE entry_id IN (${placeholders})`).run(...chunk);
+        }
+        catch {
+            /* ignore */
+        }
+        // Clean up usage events before deleting entries
+        try {
+            db.prepare(`DELETE FROM usage_events WHERE entry_id IN (${placeholders})`).run(...chunk);
+        }
+        catch {
+            /* ignore */
+        }
     }
 }
 export function rebuildFts(db) {
-    // CR-1: Wrap DELETE + INSERT in a single transaction so the FTS table is
+    // Wrap DELETE + INSERT in a single transaction so the FTS table is
     // never left empty between the two statements if a crash occurs.
-    // HI-14: Store the integer id directly (FTS5 stores all content as text
+    // Store the integer id directly (FTS5 stores all content as text
     // internally; the join in searchFts compares numerically without CAST).
+    //
+    // Insert into separate FTS5 columns by extracting per-field text from
+    // the entry_json using buildSearchFields(). The entries.search_text column
+    // is kept as a concatenated fallback for embedding generation.
     db.transaction(() => {
         db.exec("DELETE FROM entries_fts");
-        db.exec("INSERT INTO entries_fts (entry_id, search_text) SELECT id, search_text FROM entries");
+        const rows = db.prepare("SELECT id, entry_json FROM entries").all();
+        const insertStmt = db.prepare("INSERT INTO entries_fts (entry_id, name, description, tags, hints, content) VALUES (?, ?, ?, ?, ?, ?)");
+        for (const row of rows) {
+            let entry;
+            let fields;
+            try {
+                entry = JSON.parse(row.entry_json);
+                fields = buildSearchFields(entry);
+            }
+            catch {
+                warn(`[db] rebuildFts: skipping entry id=${row.id} — invalid entry_json`);
+                continue;
+            }
+            insertStmt.run(row.id, fields.name, fields.description, fields.tags, fields.hints, fields.content);
+        }
     })();
 }
 // ── Vector operations ───────────────────────────────────────────────────────
@@ -284,8 +340,8 @@ export function searchVec(db, queryEmbedding, k) {
                 .all(buf, k);
         }
         catch (err) {
-            // MD-5: Log the failure so it's visible in diagnostics
-            console.warn("[db] searchVec (sqlite-vec path) failed:", err instanceof Error ? err.message : String(err));
+            // Log the failure so it's visible in diagnostics
+            warn("[db] searchVec (sqlite-vec path) failed:", err instanceof Error ? err.message : String(err));
             return [];
         }
     }
@@ -321,7 +377,7 @@ function searchBlobVec(db, queryEmbedding, k) {
     }
     catch (err) {
         // MD-5: Log the failure so it's visible in diagnostics
-        console.warn("[db] searchBlobVec (JS fallback) failed:", err instanceof Error ? err.message : String(err));
+        warn("[db] searchBlobVec (JS fallback) failed:", err instanceof Error ? err.message : String(err));
         return [];
     }
 }
@@ -330,13 +386,49 @@ export function searchFts(db, query, limit, entryType) {
     const ftsQuery = sanitizeFtsQuery(query);
     if (!ftsQuery)
         return [];
+    // Try the exact AND query first
+    const exactResults = runFtsQuery(db, ftsQuery, limit, entryType);
+    if (exactResults.length > 0)
+        return exactResults;
+    // Exact match returned zero results — try prefix fallback.
+    // Append FTS5 `*` suffix to each token that is >= 3 characters long.
+    // Short tokens (1-2 chars) are excluded from prefix expansion because
+    // they produce too many false positives.
+    const prefixQuery = buildPrefixQuery(ftsQuery);
+    if (!prefixQuery)
+        return [];
+    return runFtsQuery(db, prefixQuery, limit, entryType);
+}
+/**
+ * Build a prefix query from an FTS5 query string by appending `*` to each
+ * token that is 3+ characters long. Tokens shorter than 3 characters are
+ * kept as-is (no prefix expansion) to avoid overly broad matches.
+ *
+ * Returns null if no tokens qualify for prefix expansion.
+ */
+function buildPrefixQuery(ftsQuery) {
+    const tokens = ftsQuery.split(/\s+/).filter(Boolean);
+    let hasPrefix = false;
+    const prefixTokens = tokens.map((t) => {
+        if (t.length >= 3) {
+            hasPrefix = true;
+            return `${t}*`;
+        }
+        return t;
+    });
+    if (!hasPrefix)
+        return null;
+    return prefixTokens.join(" ");
+}
+function runFtsQuery(db, ftsQuery, limit, entryType) {
     let sql;
     let params;
-    // HI-14: Join on integer entry_id directly (no CAST needed; we store integer)
+    // Join on integer entry_id directly (no CAST needed; we store integer)
+    // Use bm25() with per-column weights: entry_id(0), name(10), description(5), tags(3), hints(2), content(1)
     if (entryType && entryType !== "any") {
         sql = `
       SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
-             bm25(entries_fts) AS bm25Score
+             bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
       FROM entries_fts f
       JOIN entries e ON e.id = f.entry_id
       WHERE entries_fts MATCH ?
@@ -349,7 +441,7 @@ export function searchFts(db, query, limit, entryType) {
     else {
         sql = `
       SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
-             bm25(entries_fts) AS bm25Score
+             bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
       FROM entries_fts f
       JOIN entries e ON e.id = f.entry_id
       WHERE entries_fts MATCH ?
@@ -360,7 +452,7 @@ export function searchFts(db, query, limit, entryType) {
     }
     try {
         const rows = db.prepare(sql).all(...params);
-        // CR-6: Guard against corrupt JSON — skip the row rather than crashing
+        // Guard against corrupt JSON — skip the row rather than crashing
         const results = [];
         for (const row of rows) {
             let entry;
@@ -368,7 +460,7 @@ export function searchFts(db, query, limit, entryType) {
                 entry = JSON.parse(row.entry_json);
             }
             catch {
-                console.warn(`[db] searchFts: skipping entry id=${row.id} — corrupt entry_json`);
+                warn(`[db] searchFts: skipping entry id=${row.id} — corrupt entry_json`);
                 continue;
             }
             results.push({
@@ -416,7 +508,7 @@ export function getAllEntries(db, entryType) {
         params = [];
     }
     const rows = db.prepare(sql).all(...params);
-    // CR-6: Guard against corrupt JSON — skip the row rather than crashing
+    // Guard against corrupt JSON — skip the row rather than crashing
     const entries = [];
     for (const row of rows) {
         let entry;
@@ -424,7 +516,7 @@ export function getAllEntries(db, entryType) {
             entry = JSON.parse(row.entry_json);
         }
         catch {
-            console.warn(`[db] getAllEntries: skipping entry id=${row.id} — corrupt entry_json`);
+            warn(`[db] getAllEntries: skipping entry id=${row.id} — corrupt entry_json`);
             continue;
         }
         entries.push({
@@ -447,13 +539,13 @@ export function getEntryById(db, id) {
     const row = db.prepare("SELECT file_path, entry_json FROM entries WHERE id = ?").get(id);
     if (!row)
         return undefined;
-    // CR-6: Guard against corrupt JSON
+    // Guard against corrupt JSON
     let entry;
     try {
         entry = JSON.parse(row.entry_json);
     }
     catch {
-        console.warn(`[db] getEntryById: skipping entry id=${id} — corrupt entry_json`);
+        warn(`[db] getEntryById: skipping entry id=${id} — corrupt entry_json`);
         return undefined;
     }
     return { filePath: row.file_path, entry };
@@ -462,7 +554,7 @@ export function getEntriesByDir(db, dirPath) {
     const rows = db
         .prepare("SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE dir_path = ?")
         .all(dirPath);
-    // CR-6: Guard against corrupt JSON — skip the row rather than crashing
+    // Guard against corrupt JSON — skip the row rather than crashing
     const entries = [];
     for (const row of rows) {
         let entry;
@@ -470,7 +562,7 @@ export function getEntriesByDir(db, dirPath) {
             entry = JSON.parse(row.entry_json);
         }
         catch {
-            console.warn(`[db] getEntriesByDir: skipping entry id=${row.id} — corrupt entry_json`);
+            warn(`[db] getEntriesByDir: skipping entry id=${row.id} — corrupt entry_json`);
             continue;
         }
         entries.push({
@@ -485,3 +577,67 @@ export function getEntriesByDir(db, dirPath) {
     }
     return entries;
 }
+/**
+ * Get the utility score for an entry, or undefined if none exists.
+ */
+export function getUtilityScore(db, entryId) {
+    const row = db
+        .prepare("SELECT entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at FROM utility_scores WHERE entry_id = ?")
+        .get(entryId);
+    if (!row)
+        return undefined;
+    return {
+        entryId: row.entry_id,
+        utility: row.utility,
+        showCount: row.show_count,
+        searchCount: row.search_count,
+        selectRate: row.select_rate,
+        lastUsedAt: row.last_used_at ?? undefined,
+        updatedAt: row.updated_at,
+    };
+}
+/**
+ * Batch-load utility scores for multiple entry IDs in a single query.
+ * Returns a Map keyed by entry_id for O(1) lookup.
+ */
+export function getUtilityScoresByIds(db, ids) {
+    if (ids.length === 0)
+        return new Map();
+    const result = new Map();
+    // Process in chunks to stay within SQLITE_MAX_VARIABLE_NUMBER
+    for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
+        const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
+        const placeholders = chunk.map(() => "?").join(",");
+        const rows = db
+            .prepare(`SELECT entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at FROM utility_scores WHERE entry_id IN (${placeholders})`)
+            .all(...chunk);
+        for (const row of rows) {
+            result.set(row.entry_id, {
+                entryId: row.entry_id,
+                utility: row.utility,
+                showCount: row.show_count,
+                searchCount: row.search_count,
+                selectRate: row.select_rate,
+                lastUsedAt: row.last_used_at ?? undefined,
+                updatedAt: row.updated_at,
+            });
+        }
+    }
+    return result;
+}
+/**
+ * Insert or update a utility score for an entry.
+ */
+export function upsertUtilityScore(db, entryId, data) {
+    db.prepare(`
+    INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
+    VALUES (?, ?, ?, ?, ?, ?, datetime('now'))
+    ON CONFLICT(entry_id) DO UPDATE SET
+      utility = excluded.utility,
+      show_count = excluded.show_count,
+      search_count = excluded.search_count,
+      select_rate = excluded.select_rate,
+      last_used_at = excluded.last_used_at,
+      updated_at = datetime('now')
+  `).run(entryId, data.utility, data.showCount, data.searchCount, data.selectRate, data.lastUsedAt ?? null);
+}

package/dist/embedder.js CHANGED Viewed

@@ -1,10 +1,34 @@
 import { fetchWithTimeout } from "./common";
 import { warn } from "./warn";
+// ── Default local model ─────────────────────────────────────────────────────
+/**
+ * Default local transformer model for embeddings.
+ * `bge-small-en-v1.5` scores higher on MTEB benchmarks than the previous
+ * `all-MiniLM-L6-v2` at the same 384-dimension footprint.
+ */
+export const DEFAULT_LOCAL_MODEL = "Xenova/bge-small-en-v1.5";
+/**
+ * Return the local model name that will be used for embedding.
+ * When `overrideModel` is provided it takes precedence; otherwise
+ * the default model is returned.
+ */
+function getLocalModelName(overrideModel) {
+    return overrideModel || DEFAULT_LOCAL_MODEL;
+}
 // Cache the promise itself (not the resolved result) so concurrent calls share
 // the same initialisation work and never download the model twice.
+// The cache is keyed by model name so switching models gets a fresh pipeline.
 let localEmbedderPromise;
-async function getLocalEmbedder() {
+let localEmbedderModelName;
+async function getLocalEmbedder(modelName) {
+    const resolvedModel = getLocalModelName(modelName);
+    // If the cached pipeline was created for a different model, discard it.
+    if (localEmbedderPromise && localEmbedderModelName !== resolvedModel) {
+        localEmbedderPromise = undefined;
+        localEmbedderModelName = undefined;
+    }
     if (!localEmbedderPromise) {
+        localEmbedderModelName = resolvedModel;
         localEmbedderPromise = (async () => {
             let pipeline;
             try {
@@ -15,18 +39,23 @@ async function getLocalEmbedder() {
                 throw new Error("Semantic search requires @xenova/transformers. Install it with: npm install @xenova/transformers");
             }
             const pipelineFn = pipeline;
-            return pipelineFn("feature-extraction", "Xenova/all-MiniLM-L6-v2");
+            return pipelineFn("feature-extraction", resolvedModel);
         })();
         // HI-13: Clear the cached promise on failure so the next call retries
         // instead of permanently rejecting every subsequent call with the same error.
         localEmbedderPromise.catch(() => {
             localEmbedderPromise = undefined;
+            localEmbedderModelName = undefined;
         });
     }
     return localEmbedderPromise;
 }
-async function embedLocal(text) {
-    const model = await getLocalEmbedder();
+export function resetLocalEmbedder() {
+    localEmbedderPromise = undefined;
+    localEmbedderModelName = undefined;
+}
+async function embedLocal(text, modelName) {
+    const model = await getLocalEmbedder(modelName);
     const result = await model(text, { pooling: "mean", normalize: true });
     return Array.from(result.data);
 }
@@ -71,17 +100,68 @@ async function embedRemote(text, config) {
     }
     return l2Normalize(json.data[0].embedding);
 }
+// ── Helpers ──────────────────────────────────────────────────────────────────
+/** Check whether an EmbeddingConnectionConfig has a valid remote endpoint. */
+function hasRemoteEndpoint(config) {
+    return !!config.endpoint && (config.endpoint.startsWith("http://") || config.endpoint.startsWith("https://"));
+}
+// ── LRU embedding cache ─────────────────────────────────────────────────────
+// Caches query embeddings to avoid redundant computation for repeated queries.
+// Uses a simple Map with LRU eviction (delete + re-insert to move to end).
+const EMBED_CACHE_MAX = 100;
+const embedCache = new Map();
+/**
+ * Build a cache key from query text and optional config.
+ * Different endpoints/models should not share cached embeddings.
+ * apiKey deliberately excluded: same endpoint+model produce identical embeddings regardless of auth
+ */
+function embedCacheKey(text, config) {
+    if (!config)
+        return `local::${text}`;
+    const endpoint = config.endpoint || "";
+    const model = config.model || config.localModel || "";
+    return `${endpoint}:${model}:${text}`;
+}
+/**
+ * Clear the embedding cache. Call when the embedding model changes
+ * or when you want to force fresh embeddings.
+ */
+export function clearEmbeddingCache() {
+    embedCache.clear();
+}
 // ── Public API ──────────────────────────────────────────────────────────────
 /**
  * Generate an embedding for the given text.
- * If embeddingConfig is provided, uses the configured OpenAI-compatible endpoint.
- * Otherwise falls back to local @xenova/transformers.
+ * If embeddingConfig has a remote endpoint, uses the configured OpenAI-compatible endpoint.
+ * Otherwise falls back to local @xenova/transformers using the model from
+ * `embeddingConfig.localModel` or `DEFAULT_LOCAL_MODEL`.
+ *
+ * Results are cached in an LRU cache (max ~100 entries) keyed by query text
+ * and embedding config. Repeated identical queries return the cached vector.
  */
 export async function embed(text, embeddingConfig) {
-    if (embeddingConfig) {
-        return embedRemote(text, embeddingConfig);
+    const key = embedCacheKey(text, embeddingConfig);
+    // Check cache first
+    const cached = embedCache.get(key);
+    if (cached) {
+        // Move to end (most recently used) for LRU ordering
+        embedCache.delete(key);
+        embedCache.set(key, cached);
+        return cached;
+    }
+    // Compute the embedding
+    const result = embeddingConfig && hasRemoteEndpoint(embeddingConfig)
+        ? await embedRemote(text, embeddingConfig)
+        : await embedLocal(text, embeddingConfig?.localModel);
+    // Evict oldest entry if at capacity
+    if (embedCache.size >= EMBED_CACHE_MAX) {
+        const oldest = embedCache.keys().next().value;
+        if (oldest !== undefined) {
+            embedCache.delete(oldest);
+        }
     }
-    return embedLocal(text);
+    embedCache.set(key, result);
+    return result;
 }
 // ── Batch embedding ─────────────────────────────────────────────────────────
 /**
@@ -92,13 +172,14 @@ export async function embed(text, embeddingConfig) {
 export async function embedBatch(texts, embeddingConfig) {
     if (texts.length === 0)
         return [];
-    if (embeddingConfig) {
+    if (embeddingConfig && hasRemoteEndpoint(embeddingConfig)) {
         return embedRemoteBatch(texts, embeddingConfig);
     }
     // Local transformer: process sequentially (pipeline handles one at a time)
+    const localModel = embeddingConfig?.localModel;
     const results = [];
     for (const text of texts) {
-        results.push(await embedLocal(text));
+        results.push(await embedLocal(text, localModel));
     }
     return results;
 }
@@ -164,7 +245,7 @@ export function cosineSimilarity(a, b) {
 }
 // ── Availability check ──────────────────────────────────────────────────────
 export async function isEmbeddingAvailable(embeddingConfig) {
-    if (embeddingConfig) {
+    if (embeddingConfig && hasRemoteEndpoint(embeddingConfig)) {
         try {
             await embedRemote("test", embeddingConfig);
             return true;
@@ -174,7 +255,7 @@ export async function isEmbeddingAvailable(embeddingConfig) {
         }
     }
     try {
-        await getLocalEmbedder();
+        await getLocalEmbedder(embeddingConfig?.localModel);
         return true;
     }
     catch {

package/dist/file-context.js CHANGED Viewed

@@ -69,6 +69,9 @@ const matchers = [];
 /** Renderer lookup by name. */
 const renderers = new Map();
 let builtinsPromise;
+export function resetBuiltinsCache() {
+    builtinsPromise = undefined;
+}
 /**
  * Ensure that built-in matchers and renderers are registered.
  * Called lazily on first use of runMatchers/getRenderer.