npm - akm-cli - Versions diffs - 0.7.5 → 0.8.0-rc2 - Mend

akm-cli 0.7.5 → 0.8.0-rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

package/.github/CHANGELOG.md +1 -1
package/dist/cli/parse-args.js +43 -0
package/dist/cli.js +853 -479
package/dist/commands/agent-dispatch.js +102 -0
package/dist/commands/agent-support.js +62 -0
package/dist/commands/config-cli.js +68 -84
package/dist/commands/consolidate.js +823 -0
package/dist/commands/distill-promotion-policy.js +658 -0
package/dist/commands/distill.js +244 -52
package/dist/commands/eval-cases.js +40 -0
package/dist/commands/events.js +2 -23
package/dist/commands/graph.js +222 -0
package/dist/commands/health.js +376 -0
package/dist/commands/help/help-accept.md +9 -0
package/dist/commands/help/help-improve.md +53 -0
package/dist/commands/help/help-proposals.md +15 -0
package/dist/commands/help/help-propose.md +17 -0
package/dist/commands/help/help-reject.md +8 -0
package/dist/commands/history.js +3 -30
package/dist/commands/improve.js +1170 -0
package/dist/commands/info.js +2 -2
package/dist/commands/init.js +2 -2
package/dist/commands/install-audit.js +5 -1
package/dist/commands/installed-stashes.js +118 -138
package/dist/commands/knowledge.js +133 -0
package/dist/commands/lint/agent-linter.js +46 -0
package/dist/commands/lint/base-linter.js +285 -0
package/dist/commands/lint/command-linter.js +46 -0
package/dist/commands/lint/default-linter.js +13 -0
package/dist/commands/lint/index.js +107 -0
package/dist/commands/lint/knowledge-linter.js +13 -0
package/dist/commands/lint/memory-linter.js +58 -0
package/dist/commands/lint/registry.js +33 -0
package/dist/commands/lint/skill-linter.js +42 -0
package/dist/commands/lint/task-linter.js +47 -0
package/dist/commands/lint/types.js +1 -0
package/dist/commands/lint/workflow-linter.js +53 -0
package/dist/commands/lint.js +1 -0
package/dist/commands/proposal.js +8 -7
package/dist/commands/propose.js +78 -28
package/dist/commands/reflect.js +143 -35
package/dist/commands/registry-search.js +2 -2
package/dist/commands/remember.js +54 -0
package/dist/commands/schema-repair.js +130 -0
package/dist/commands/search.js +21 -5
package/dist/commands/show.js +121 -17
package/dist/commands/source-add.js +10 -10
package/dist/commands/source-manage.js +11 -19
package/dist/commands/tasks.js +385 -0
package/dist/commands/url-checker.js +39 -0
package/dist/commands/vault.js +8 -26
package/dist/core/action-contributors.js +25 -0
package/dist/core/asset-ref.js +4 -0
package/dist/core/asset-registry.js +4 -16
package/dist/core/asset-spec.js +10 -0
package/dist/core/common.js +94 -0
package/dist/core/concurrent.js +22 -0
package/dist/core/config.js +222 -128
package/dist/core/events.js +73 -126
package/dist/core/frontmatter.js +3 -1
package/dist/core/markdown.js +17 -0
package/dist/core/memory-improve.js +678 -0
package/dist/core/parse.js +155 -0
package/dist/core/paths.js +101 -3
package/dist/core/proposal-validators.js +61 -0
package/dist/core/proposals.js +49 -38
package/dist/core/state-db.js +775 -0
package/dist/core/time.js +51 -0
package/dist/core/warn.js +59 -1
package/dist/indexer/db-search.js +52 -238
package/dist/indexer/db.js +378 -1
package/dist/indexer/ensure-index.js +61 -0
package/dist/indexer/graph-boost.js +247 -94
package/dist/indexer/graph-db.js +201 -0
package/dist/indexer/graph-dedup.js +99 -0
package/dist/indexer/graph-extraction.js +409 -76
package/dist/indexer/index-context.js +10 -0
package/dist/indexer/indexer.js +442 -290
package/dist/indexer/llm-cache.js +47 -0
package/dist/indexer/match-contributors.js +141 -0
package/dist/indexer/matchers.js +24 -190
package/dist/indexer/memory-inference.js +63 -29
package/dist/indexer/metadata-contributors.js +26 -0
package/dist/indexer/metadata.js +194 -175
package/dist/indexer/path-resolver.js +89 -0
package/dist/indexer/ranking-contributors.js +204 -0
package/dist/indexer/ranking.js +74 -0
package/dist/indexer/search-hit-enrichers.js +22 -0
package/dist/indexer/search-source.js +24 -9
package/dist/indexer/semantic-status.js +2 -16
package/dist/indexer/walker.js +25 -0
package/dist/integrations/agent/config.js +175 -3
package/dist/integrations/agent/index.js +3 -1
package/dist/integrations/agent/pipeline.js +39 -0
package/dist/integrations/agent/profiles.js +67 -5
package/dist/integrations/agent/prompts.js +77 -72
package/dist/integrations/agent/runners.js +31 -0
package/dist/integrations/agent/sdk-runner.js +120 -0
package/dist/integrations/agent/spawn.js +71 -16
package/dist/integrations/lockfile.js +10 -18
package/dist/integrations/session-logs/index.js +65 -0
package/dist/integrations/session-logs/providers/claude-code.js +56 -0
package/dist/integrations/session-logs/providers/opencode.js +52 -0
package/dist/integrations/session-logs/types.js +1 -0
package/dist/llm/call-ai.js +74 -0
package/dist/llm/client.js +61 -122
package/dist/llm/feature-gate.js +27 -16
package/dist/llm/graph-extract.js +297 -62
package/dist/llm/memory-infer.js +49 -71
package/dist/llm/metadata-enhance.js +39 -22
package/dist/llm/prompts/graph-extract-user-prompt.md +12 -0
package/dist/output/cli-hints-full.md +277 -0
package/dist/output/cli-hints-short.md +65 -0
package/dist/output/cli-hints.js +2 -318
package/dist/output/renderers.js +190 -123
package/dist/output/shapes.js +33 -0
package/dist/output/text.js +239 -2
package/dist/registry/providers/skills-sh.js +61 -49
package/dist/registry/providers/static-index.js +44 -48
package/dist/setup/setup.js +510 -11
package/dist/sources/provider-factory.js +2 -1
package/dist/sources/providers/git.js +2 -2
package/dist/sources/website-ingest.js +4 -0
package/dist/tasks/backends/cron.js +200 -0
package/dist/tasks/backends/exec-utils.js +25 -0
package/dist/tasks/backends/index.js +32 -0
package/dist/tasks/backends/launchd-template.xml +19 -0
package/dist/tasks/backends/launchd.js +184 -0
package/dist/tasks/backends/schtasks-template.xml +29 -0
package/dist/tasks/backends/schtasks.js +212 -0
package/dist/tasks/parser.js +198 -0
package/dist/tasks/resolveAkmBin.js +84 -0
package/dist/tasks/runner.js +432 -0
package/dist/tasks/schedule.js +208 -0
package/dist/tasks/schema.js +13 -0
package/dist/tasks/validator.js +59 -0
package/dist/wiki/index-template.md +12 -0
package/dist/wiki/ingest-workflow-template.md +54 -0
package/dist/wiki/log-template.md +8 -0
package/dist/wiki/schema-template.md +61 -0
package/dist/wiki/wiki-templates.js +12 -0
package/dist/wiki/wiki.js +10 -61
package/dist/workflows/authoring.js +5 -25
package/dist/workflows/renderer.js +8 -3
package/dist/workflows/runs.js +59 -91
package/dist/workflows/validator.js +1 -1
package/dist/workflows/workflow-template.md +24 -0
package/docs/README.md +3 -0
package/docs/migration/release-notes/0.7.0.md +1 -1
package/docs/migration/release-notes/0.8.0.md +43 -0
package/package.json +3 -2
package/dist/templates/wiki-templates.js +0 -100

package/dist/indexer/db.js CHANGED Viewed

@@ -4,13 +4,15 @@ import { createRequire } from "node:module";
 import path from "node:path";
 import { parseAssetRef } from "../core/asset-ref";
 import { getDbPath } from "../core/paths";
+import { REGISTRY_INDEX_CACHE_DDL } from "../core/state-db";
 import { warn } from "../core/warn";
 import { cosineSimilarity } from "../llm/embedders/types";
 import { buildSearchFields } from "./search-fields";
 import { ensureUsageEventsSchema } from "./usage-events";
 // ── Constants ───────────────────────────────────────────────────────────────
-export const DB_VERSION = 10;
+export const DB_VERSION = 12;
 export const EMBEDDING_DIM = 384;
+export const GRAPH_SCHEMA_VERSION = 1;
 // ── Database lifecycle ──────────────────────────────────────────────────────
 export function openDatabase(dbPath, options) {
     const resolvedPath = dbPath ?? getDbPath();
@@ -183,6 +185,81 @@ function ensureSchema(db, embeddingDim) {
       reason            TEXT NOT NULL,
       updated_at        TEXT NOT NULL
     );
+  `);
+    // LLM enrichment result cache. Stores a SHA-256 body hash and the JSON
+    // result for each asset so that subsequent `akm index --enrich` runs can
+    // skip the LLM call when the body hasn't changed. The cache is keyed by
+    // a stable asset_ref string (e.g. the absolute file path for graph/memory
+    // passes, or `entryKey:passId` for the metadata-enhance pass).
+    // Entries are cleaned up when assets are removed or --re-enrich is used.
+    db.exec(`
+    CREATE TABLE IF NOT EXISTS llm_enrichment_cache (
+      asset_ref   TEXT PRIMARY KEY,
+      body_hash   TEXT NOT NULL,
+      result_json TEXT NOT NULL,
+      updated_at  INTEGER NOT NULL
+    );
+     CREATE INDEX IF NOT EXISTS idx_llm_cache_updated
+       ON llm_enrichment_cache(updated_at);
+  `);
+    db.exec(`
+    CREATE TABLE IF NOT EXISTS graph_meta (
+      stash_root          TEXT PRIMARY KEY,
+      schema_version      INTEGER NOT NULL,
+      generated_at        TEXT NOT NULL,
+      considered_files    INTEGER NOT NULL DEFAULT 0,
+      extracted_files     INTEGER NOT NULL DEFAULT 0,
+      entity_count        INTEGER NOT NULL DEFAULT 0,
+      relation_count      INTEGER NOT NULL DEFAULT 0,
+      extraction_coverage REAL NOT NULL DEFAULT 0,
+      density             REAL NOT NULL DEFAULT 0
+    );
+    CREATE TABLE IF NOT EXISTS graph_files (
+      stash_root  TEXT NOT NULL,
+      file_path   TEXT NOT NULL,
+      file_order  INTEGER NOT NULL,
+      file_type   TEXT NOT NULL,
+      body_hash   TEXT,
+      confidence  REAL,
+      PRIMARY KEY (stash_root, file_path),
+      FOREIGN KEY (stash_root) REFERENCES graph_meta(stash_root) ON DELETE CASCADE
+    );
+    CREATE INDEX IF NOT EXISTS idx_graph_files_stash_order
+      ON graph_files(stash_root, file_order);
+    CREATE TABLE IF NOT EXISTS graph_file_entities (
+      stash_root   TEXT NOT NULL,
+      file_path    TEXT NOT NULL,
+      entity_order INTEGER NOT NULL,
+      entity       TEXT NOT NULL,
+      PRIMARY KEY (stash_root, file_path, entity_order),
+      FOREIGN KEY (stash_root, file_path)
+        REFERENCES graph_files(stash_root, file_path)
+        ON DELETE CASCADE
+    );
+    CREATE INDEX IF NOT EXISTS idx_graph_file_entities_lookup
+      ON graph_file_entities(stash_root, file_path, entity_order);
+    CREATE TABLE IF NOT EXISTS graph_file_relations (
+      stash_root      TEXT NOT NULL,
+      file_path       TEXT NOT NULL,
+      relation_order  INTEGER NOT NULL,
+      from_entity     TEXT NOT NULL,
+      to_entity       TEXT NOT NULL,
+      relation_type   TEXT,
+      confidence      REAL,
+      PRIMARY KEY (stash_root, file_path, relation_order),
+      FOREIGN KEY (stash_root, file_path)
+        REFERENCES graph_files(stash_root, file_path)
+        ON DELETE CASCADE
+    );
+    CREATE INDEX IF NOT EXISTS idx_graph_file_relations_lookup
+      ON graph_file_relations(stash_root, file_path, relation_order);
   `);
     // FTS-dirty queue. Created here (not lazily on first upsert) so the
     // per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
@@ -247,6 +324,10 @@ function ensureSchema(db, embeddingDim) {
     }
     // Usage telemetry table
     ensureUsageEventsSchema(db);
+    // Registry index cache table — caches remote registry index documents so
+    // `akm search` does not hit the network on every invocation. The DDL is
+    // defined in state-db.ts and shared here to avoid duplication.
+    db.exec(REGISTRY_INDEX_CACHE_DDL);
     // Restore usage_events backed up by the version-upgrade path above.
     restoreUsageEventsBackup(db, usageBackup);
 }
@@ -282,6 +363,16 @@ function handleVersionUpgrade(db) {
     db.exec("DROP TABLE IF EXISTS entries_vec");
     db.exec("DROP TABLE IF EXISTS entries_fts");
     db.exec("DROP TABLE IF EXISTS index_dir_state");
+    db.exec("DROP TABLE IF EXISTS llm_enrichment_cache");
+    db.exec("DROP INDEX IF EXISTS idx_llm_cache_updated");
+    db.exec("DROP TABLE IF EXISTS graph_file_relations");
+    db.exec("DROP TABLE IF EXISTS graph_file_entities");
+    db.exec("DROP TABLE IF EXISTS graph_files");
+    db.exec("DROP TABLE IF EXISTS graph_meta");
+    db.exec("DROP TABLE IF EXISTS graph_relations");
+    db.exec("DROP TABLE IF EXISTS graph_entities");
+    db.exec("DROP TABLE IF EXISTS graph_nodes");
+    db.exec("DROP TABLE IF EXISTS graph_stashes");
     db.exec("DROP INDEX IF EXISTS idx_entries_dir");
     db.exec("DROP INDEX IF EXISTS idx_entries_type");
     db.exec("DROP TABLE IF EXISTS entries");
@@ -953,3 +1044,289 @@ export function upsertUtilityScore(db, entryId, data) {
       updated_at = datetime('now')
   `).run(entryId, data.utility, data.showCount, data.searchCount, data.selectRate, data.lastUsedAt ?? null);
 }
+/**
+ * Look up a cached LLM result for the given asset_ref.
+ *
+ * Returns `undefined` when no entry exists OR when the stored body_hash
+ * doesn't match `currentBodyHash` (body has changed since the result was
+ * cached). In both cases the caller should invoke the LLM and write a new
+ * cache entry.
+ */
+export function getLlmCacheEntry(db, assetRef, currentBodyHash) {
+    const row = db
+        .prepare("SELECT asset_ref, body_hash, result_json, updated_at FROM llm_enrichment_cache WHERE asset_ref = ?")
+        .get(assetRef);
+    if (!row)
+        return undefined;
+    // Hash mismatch → body changed, treat as cache miss.
+    if (row.body_hash !== currentBodyHash)
+        return undefined;
+    return {
+        assetRef: row.asset_ref,
+        bodyHash: row.body_hash,
+        resultJson: row.result_json,
+        updatedAt: row.updated_at,
+    };
+}
+/**
+ * Insert or update a cached LLM result for the given asset_ref.
+ */
+export function upsertLlmCacheEntry(db, assetRef, bodyHash, resultJson) {
+    db.prepare(`INSERT INTO llm_enrichment_cache (asset_ref, body_hash, result_json, updated_at)
+     VALUES (?, ?, ?, ?)
+     ON CONFLICT(asset_ref) DO UPDATE SET
+       body_hash   = excluded.body_hash,
+       result_json = excluded.result_json,
+       updated_at  = excluded.updated_at`).run(assetRef, bodyHash, resultJson, Date.now());
+}
+/**
+ * Delete LLM cache entries whose asset_ref is no longer present in the
+ * `entries` table. Should be called during the cleanup phase of each index
+ * run to prevent the cache from growing unboundedly as assets are removed.
+ *
+ * The join uses a LIKE match against the entries `file_path` column because
+ * graph/memory cache refs are absolute file paths, while enrichment cache
+ * refs are entry_key strings — we preserve any entry that still has a
+ * corresponding row in either the entries table (by entry_key) or that
+ * matches a live file_path.
+ */
+export function clearStaleCacheEntries(db) {
+    try {
+        db.exec(`
+      DELETE FROM llm_enrichment_cache
+      WHERE asset_ref NOT IN (SELECT file_path FROM entries)
+        AND asset_ref NOT IN (SELECT entry_key FROM entries)
+    `);
+    }
+    catch {
+        /* ignore — table may not exist in very old DBs opened without ensureSchema */
+    }
+}
+/**
+ * Compute a stable SHA-256 hex digest of a UTF-8 string using Bun's native
+ * hashing. Used as the body_hash key in `llm_enrichment_cache`.
+ *
+ * Bun.CryptoHasher is synchronous and allocation-free compared to Web Crypto,
+ * making it suitable for use inside tight per-asset loops.
+ */
+export function computeBodyHash(body) {
+    const hasher = new Bun.CryptoHasher("sha256");
+    hasher.update(body);
+    return hasher.digest("hex");
+}
+/**
+ * Count search and show events for the given entry refs.
+ * Returns a Map<ref, count> with only refs that have at least one event.
+ * Used by the improve loop to find high-retrieval assets without feedback.
+ */
+export function getRetrievalCounts(db, refs) {
+    if (refs.length === 0)
+        return new Map();
+    const result = new Map();
+    // Chunk to stay within SQLITE_MAX_VARIABLE_NUMBER (same pattern as getUtilityScoresByIds).
+    for (let i = 0; i < refs.length; i += SQLITE_CHUNK_SIZE) {
+        const chunk = refs.slice(i, i + SQLITE_CHUNK_SIZE);
+        const placeholders = chunk.map(() => "?").join(", ");
+        const rows = db
+            .prepare(`SELECT entry_ref, COUNT(*) AS cnt FROM usage_events
+         WHERE event_type IN ('search','show') AND entry_ref IN (${placeholders})
+         GROUP BY entry_ref`)
+            .all(...chunk);
+        for (const r of rows)
+            result.set(r.entry_ref, r.cnt);
+    }
+    return result;
+}
+/**
+ * Apply a MemRL reward signal to a batch of entries via exponential moving
+ * average (EMA): next = clamp(current + lr * (reward - current), 0, 1).
+ *
+ * Wrapped in a single transaction so all bumps succeed or fail together.
+ * The indexer (`akm index`) will overwrite these values at next reindex run;
+ * bumps are intentionally temporary hints between index runs, not permanent
+ * overrides.
+ */
+export function bumpUtilityScoresBatch(db, entryIds, reward, lr = 0.1) {
+    if (entryIds.length === 0)
+        return;
+    db.transaction(() => {
+        const scoreMap = getUtilityScoresByIds(db, entryIds);
+        const now = new Date().toISOString();
+        const stmt = db.prepare(`INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
+       VALUES (?, ?, 0, 0, 0, ?, ?)
+       ON CONFLICT(entry_id) DO UPDATE SET
+         utility = excluded.utility,
+         updated_at = excluded.updated_at`);
+        for (const entryId of entryIds) {
+            const existing = scoreMap.get(entryId);
+            const current = existing?.utility ?? 0;
+            const next = Math.max(0, Math.min(1, current + lr * (reward - current)));
+            stmt.run(entryId, next, now, now);
+        }
+    })();
+}
+// ── Indexer-phase helpers (moved from indexer.ts) ────────────────────────────
+/**
+ * Return all entries that do not yet have an embedding row.
+ * Used by the embedding phase to determine which entries need vectors generated.
+ */
+export function getAllEntriesForEmbedding(db) {
+    return db
+        .prepare(`
+      SELECT e.id, e.search_text AS searchText, e.entry_key AS entryKey, e.file_path AS filePath FROM entries e
+      WHERE NOT EXISTS (SELECT 1 FROM embeddings b WHERE b.id = e.id)
+        AND e.entry_type != 'vault'
+    `)
+        .all();
+}
+/**
+ * Upsert a workflow document record for an indexed entry.
+ * Persists the parsed workflow AST as JSON alongside a FNV-1a hash of the
+ * source content for future incremental fast-paths.
+ */
+export function upsertWorkflowDocument(db, entryId, doc, content) {
+    const sourceHash = computeSourceHash(content);
+    db.prepare(`INSERT INTO workflow_documents (entry_id, schema_version, document_json, source_path, source_hash, updated_at)
+     VALUES (?, ?, ?, ?, ?, ?)
+     ON CONFLICT(entry_id) DO UPDATE SET
+       schema_version = excluded.schema_version,
+       document_json = excluded.document_json,
+       source_path = excluded.source_path,
+       source_hash = excluded.source_hash,
+       updated_at = excluded.updated_at`).run(entryId, doc.schemaVersion, JSON.stringify(doc), doc.source.path, sourceHash, new Date().toISOString());
+}
+/**
+ * Compute a cheap FNV-1a hash of a buffer for source-identity tracking.
+ * Not security-sensitive; used as an incremental fast-path skip key.
+ */
+export function computeSourceHash(content) {
+    let hash = 0x811c9dc5;
+    for (let i = 0; i < content.length; i++) {
+        hash ^= content[i];
+        hash = Math.imul(hash, 0x01000193);
+    }
+    return (hash >>> 0).toString(16);
+}
+/**
+ * Return distinct zero-result search queries from the `usage_events` table
+ * within the given lookback window.
+ *
+ * Reads from `usage_events` (event_type = 'search') where the metadata JSON
+ * blob contains `resultCount = 0`. The `search_events` table never existed;
+ * all errors are caught and an empty array is returned so callers never need
+ * to guard against DB schema differences.
+ */
+export function getZeroResultSearches(db, sinceDays = 30) {
+    const since = new Date(Date.now() - sinceDays * 24 * 60 * 60 * 1000).toISOString();
+    try {
+        const rows = db
+            .prepare(`SELECT DISTINCT json_extract(metadata, '$.query') AS query
+         FROM usage_events
+         WHERE event_type = 'search'
+           AND created_at >= ?
+           AND json_extract(metadata, '$.resultCount') = 0
+         ORDER BY created_at DESC LIMIT 20`)
+            .all(since);
+        return rows.map((r) => r.query).filter((q) => q !== null);
+    }
+    catch {
+        return []; // table may not exist in older DBs
+    }
+}
+/**
+ * Look up an entry by its integer numeric id.
+ * Returns null when no matching row is found.
+ */
+export function getEntryByRef(db, type, name) {
+    return db
+        .prepare("SELECT id FROM entries WHERE entry_type = ? AND entry_key LIKE ?")
+        .get(type, `%${type}:${name}`);
+}
+/**
+ * Upsert a utility score adjustment derived from accumulated feedback events.
+ *
+ * - positiveDelta: +0.05 per positive event
+ * - negativeDelta: -0.03 per negative event
+ * - Score is clamped to [0.0, 1.0]
+ * - A new row starts at 0.5 + delta so the first positive feedback immediately
+ *   lifts the entry above the neutral midpoint.
+ */
+export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negativeCount) {
+    if (positiveCount === 0 && negativeCount === 0)
+        return;
+    const delta = positiveCount * 0.05 - negativeCount * 0.03;
+    const now = new Date().toISOString();
+    db.prepare(`
+    INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
+    VALUES (?, MAX(0.0, MIN(1.0, 0.5 + ?)), 0, 0, 0, ?, ?)
+    ON CONFLICT(entry_id) DO UPDATE SET
+      utility    = MAX(0.0, MIN(1.0, utility + ?)),
+      updated_at = ?
+  `).run(entryId, delta, now, now, delta, now);
+}
+/**
+ * Re-link detached usage_events to their current entry_ids via entry_ref.
+ *
+ * After a full rebuild, entry IDs change. This query matches events to their
+ * new entry rows using the stable `entry_ref` ("type:name") column so usage
+ * history survives a full reindex.
+ */
+export function relinkUsageEvents(db) {
+    try {
+        db.exec(`
+      UPDATE usage_events SET entry_id = (
+        SELECT e.id FROM entries e
+        WHERE substr(e.entry_key, length(e.entry_key) - length(usage_events.entry_ref)) = ':' || usage_events.entry_ref
+        LIMIT 1
+      )
+      WHERE entry_id IS NULL AND entry_ref IS NOT NULL
+    `);
+    }
+    catch {
+        /* ignore if table doesn't exist yet */
+    }
+}
+// ── registry_index_cache helpers ─────────────────────────────────────────────
+/**
+ * Upsert a registry index cache entry in index.db.
+ *
+ * @param db          - Open index.db connection (from openDatabase / openExistingDatabase).
+ * @param registryUrl - Canonical URL of the registry (used as primary key).
+ * @param indexJson   - Serialised registry index document (JSON string).
+ * @param opts.etag        - HTTP ETag from the response (optional).
+ * @param opts.lastModified - HTTP Last-Modified from the response (optional).
+ */
+export function upsertRegistryIndexCache(db, registryUrl, indexJson, opts) {
+    db.prepare(`
+    INSERT INTO registry_index_cache (registry_url, fetched_at, etag, last_modified, index_json)
+    VALUES (?, ?, ?, ?, ?)
+    ON CONFLICT(registry_url) DO UPDATE SET
+      fetched_at    = excluded.fetched_at,
+      etag          = excluded.etag,
+      last_modified = excluded.last_modified,
+      index_json    = excluded.index_json
+  `).run(registryUrl, new Date().toISOString(), opts?.etag ?? null, opts?.lastModified ?? null, indexJson);
+}
+/**
+ * Look up a cached registry index entry from index.db.
+ * Returns undefined when not found or when the entry is older than `maxAgeMs`.
+ *
+ * TTL check: if `Date.now() - new Date(fetched_at).getTime() > maxAgeMs` the
+ * entry is considered a cache miss and undefined is returned.
+ *
+ * @param db          - Open index.db connection.
+ * @param registryUrl - Canonical URL of the registry (primary key).
+ * @param maxAgeMs    - Maximum age in milliseconds before the entry is stale (default: 1 hour).
+ */
+export function getRegistryIndexCache(db, registryUrl, maxAgeMs = 3_600_000 /* 1 hour */) {
+    const row = db
+        .prepare(`SELECT fetched_at, etag, last_modified, index_json
+       FROM registry_index_cache WHERE registry_url = ?`)
+        .get(registryUrl);
+    if (!row)
+        return undefined;
+    const fetchedAt = Date.parse(row.fetched_at);
+    if (Number.isNaN(fetchedAt) || Date.now() - fetchedAt > maxAgeMs)
+        return undefined;
+    return { indexJson: row.index_json, etag: row.etag, lastModified: row.last_modified };
+}

package/dist/indexer/ensure-index.js CHANGED Viewed

@@ -9,9 +9,67 @@
  * behind a single entry point.
  */
 import fs from "node:fs";
+import path from "node:path";
+import { ASSET_SPECS, TYPE_DIRS } from "../core/asset-spec";
 import { getDbPath } from "../core/paths";
 import { warn } from "../core/warn";
 import { closeDatabase, getEntryCount, getMeta, openExistingDatabase } from "./db";
+function getIndexableFiles(root, spec) {
+    if (!fs.existsSync(root))
+        return [];
+    const files = [];
+    const stack = [root];
+    while (stack.length > 0) {
+        const current = stack.pop();
+        if (!current)
+            continue;
+        let entries;
+        try {
+            entries = fs.readdirSync(current, { withFileTypes: true });
+        }
+        catch {
+            continue;
+        }
+        for (const entry of entries) {
+            if (entry.name === ".stash.json")
+                continue;
+            const fullPath = path.join(current, entry.name);
+            if (entry.isSymbolicLink())
+                continue;
+            if (entry.isDirectory()) {
+                if (entry.name.startsWith("."))
+                    continue;
+                stack.push(fullPath);
+                continue;
+            }
+            if (entry.isFile() && spec.isRelevantFile(entry.name)) {
+                files.push(fullPath);
+            }
+        }
+    }
+    return files;
+}
+function hasNewerIndexableFiles(stashDir, builtAt) {
+    if (!builtAt)
+        return true;
+    const builtAtMs = new Date(builtAt).getTime();
+    if (!Number.isFinite(builtAtMs))
+        return true;
+    for (const [type, spec] of Object.entries(ASSET_SPECS)) {
+        const typeRoot = path.join(stashDir, TYPE_DIRS[type] ?? spec.stashDir);
+        const files = getIndexableFiles(typeRoot, spec);
+        for (const file of files) {
+            try {
+                if (fs.statSync(file).mtimeMs > builtAtMs)
+                    return true;
+            }
+            catch {
+                return true;
+            }
+        }
+    }
+    return false;
+}
 /**
  * Check whether the local index is stale relative to the given stash directory.
  * Returns `true` when the index is missing, empty, or was built against a
@@ -27,6 +85,9 @@ export function isIndexStale(stashDir) {
         const entryCount = getEntryCount(db);
         if (entryCount === 0)
             return true;
+        const builtAt = getMeta(db, "builtAt");
+        if (hasNewerIndexableFiles(stashDir, builtAt))
+            return true;
         const storedStashDir = getMeta(db, "stashDir");
         if (storedStashDir !== stashDir) {
             // Check if the incoming stashDir appears in the stored stashDirs array