akm-cli 0.6.0-rc1 → 0.6.0-rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/README.md +9 -9
  3. package/dist/cli.js +181 -111
  4. package/dist/{completions.js → commands/completions.js} +1 -1
  5. package/dist/{config-cli.js → commands/config-cli.js} +109 -11
  6. package/dist/{curate.js → commands/curate.js} +8 -3
  7. package/dist/{info.js → commands/info.js} +15 -9
  8. package/dist/{init.js → commands/init.js} +4 -4
  9. package/dist/{install-audit.js → commands/install-audit.js} +4 -7
  10. package/dist/{installed-stashes.js → commands/installed-stashes.js} +77 -31
  11. package/dist/{migration-help.js → commands/migration-help.js} +2 -2
  12. package/dist/{registry-search.js → commands/registry-search.js} +8 -6
  13. package/dist/{remember.js → commands/remember.js} +55 -49
  14. package/dist/{stash-search.js → commands/search.js} +28 -69
  15. package/dist/{self-update.js → commands/self-update.js} +3 -3
  16. package/dist/{stash-show.js → commands/show.js} +103 -78
  17. package/dist/{stash-add.js → commands/source-add.js} +42 -32
  18. package/dist/{stash-clone.js → commands/source-clone.js} +12 -10
  19. package/dist/{stash-source-manage.js → commands/source-manage.js} +24 -24
  20. package/dist/{vault.js → commands/vault.js} +43 -0
  21. package/dist/{stash-ref.js → core/asset-ref.js} +4 -4
  22. package/dist/{asset-registry.js → core/asset-registry.js} +1 -1
  23. package/dist/{asset-spec.js → core/asset-spec.js} +1 -1
  24. package/dist/{config.js → core/config.js} +79 -31
  25. package/dist/core/errors.js +90 -0
  26. package/dist/{frontmatter.js → core/frontmatter.js} +5 -3
  27. package/dist/core/write-source.js +280 -0
  28. package/dist/{db-search.js → indexer/db-search.js} +25 -19
  29. package/dist/{db.js → indexer/db.js} +70 -47
  30. package/dist/{file-context.js → indexer/file-context.js} +3 -3
  31. package/dist/{indexer.js → indexer/indexer.js} +123 -31
  32. package/dist/{manifest.js → indexer/manifest.js} +10 -10
  33. package/dist/{matchers.js → indexer/matchers.js} +3 -6
  34. package/dist/{metadata.js → indexer/metadata.js} +9 -5
  35. package/dist/{search-source.js → indexer/search-source.js} +52 -41
  36. package/dist/{semantic-status.js → indexer/semantic-status.js} +2 -2
  37. package/dist/{walker.js → indexer/walker.js} +1 -1
  38. package/dist/{lockfile.js → integrations/lockfile.js} +1 -1
  39. package/dist/{llm-client.js → llm/client.js} +1 -1
  40. package/dist/{embedders → llm/embedders}/local.js +2 -2
  41. package/dist/{embedders → llm/embedders}/remote.js +1 -1
  42. package/dist/{embedders → llm/embedders}/types.js +1 -1
  43. package/dist/{metadata-enhance.js → llm/metadata-enhance.js} +2 -2
  44. package/dist/{cli-hints.js → output/cli-hints.js} +1 -0
  45. package/dist/{output-context.js → output/context.js} +21 -3
  46. package/dist/{renderers.js → output/renderers.js} +9 -65
  47. package/dist/{output-shapes.js → output/shapes.js} +18 -4
  48. package/dist/{output-text.js → output/text.js} +1 -1
  49. package/dist/{registry-build-index.js → registry/build-index.js} +16 -7
  50. package/dist/{create-provider-registry.js → registry/create-provider-registry.js} +6 -2
  51. package/dist/registry/factory.js +33 -0
  52. package/dist/{origin-resolve.js → registry/origin-resolve.js} +1 -1
  53. package/dist/{providers → registry/providers}/index.js +1 -1
  54. package/dist/{providers → registry/providers}/skills-sh.js +59 -3
  55. package/dist/{providers → registry/providers}/static-index.js +80 -12
  56. package/dist/registry/providers/types.js +25 -0
  57. package/dist/{registry-resolve.js → registry/resolve.js} +3 -3
  58. package/dist/{detect.js → setup/detect.js} +0 -27
  59. package/dist/{ripgrep-install.js → setup/ripgrep-install.js} +1 -1
  60. package/dist/{ripgrep-resolve.js → setup/ripgrep-resolve.js} +2 -2
  61. package/dist/{setup.js → setup/setup.js} +16 -56
  62. package/dist/{stash-include.js → sources/include.js} +1 -1
  63. package/dist/sources/provider-factory.js +36 -0
  64. package/dist/sources/provider.js +21 -0
  65. package/dist/sources/providers/filesystem.js +35 -0
  66. package/dist/{stash-providers → sources/providers}/git.js +53 -64
  67. package/dist/{stash-providers → sources/providers}/index.js +3 -4
  68. package/dist/sources/providers/install-types.js +14 -0
  69. package/dist/{stash-providers → sources/providers}/npm.js +42 -41
  70. package/dist/{stash-providers → sources/providers}/provider-utils.js +3 -3
  71. package/dist/{stash-providers → sources/providers}/sync-from-ref.js +2 -2
  72. package/dist/{stash-providers → sources/providers}/tar-utils.js +11 -8
  73. package/dist/{stash-providers → sources/providers}/website.js +29 -65
  74. package/dist/{stash-resolve.js → sources/resolve.js} +8 -7
  75. package/dist/{wiki.js → wiki/wiki.js} +12 -11
  76. package/dist/{workflow-authoring.js → workflows/authoring.js} +37 -14
  77. package/dist/{workflow-cli.js → workflows/cli.js} +2 -1
  78. package/dist/{workflow-db.js → workflows/db.js} +1 -1
  79. package/dist/workflows/document-cache.js +20 -0
  80. package/dist/workflows/parser.js +379 -0
  81. package/dist/workflows/renderer.js +78 -0
  82. package/dist/{workflow-runs.js → workflows/runs.js} +72 -28
  83. package/dist/workflows/schema.js +11 -0
  84. package/dist/workflows/validator.js +48 -0
  85. package/docs/migration/release-notes/0.6.0.md +69 -23
  86. package/package.json +1 -1
  87. package/dist/errors.js +0 -45
  88. package/dist/llm.js +0 -16
  89. package/dist/registry-factory.js +0 -19
  90. package/dist/ripgrep.js +0 -2
  91. package/dist/stash-provider-factory.js +0 -35
  92. package/dist/stash-provider.js +0 -3
  93. package/dist/stash-providers/filesystem.js +0 -71
  94. package/dist/stash-providers/openviking.js +0 -348
  95. package/dist/stash-types.js +0 -1
  96. package/dist/workflow-markdown.js +0 -260
  97. /package/dist/{common.js → core/common.js} +0 -0
  98. /package/dist/{markdown.js → core/markdown.js} +0 -0
  99. /package/dist/{paths.js → core/paths.js} +0 -0
  100. /package/dist/{warn.js → core/warn.js} +0 -0
  101. /package/dist/{search-fields.js → indexer/search-fields.js} +0 -0
  102. /package/dist/{usage-events.js → indexer/usage-events.js} +0 -0
  103. /package/dist/{github.js → integrations/github.js} +0 -0
  104. /package/dist/{embedder.js → llm/embedder.js} +0 -0
  105. /package/dist/{embedders → llm/embedders}/cache.js +0 -0
  106. /package/dist/{registry-provider.js → registry/types.js} +0 -0
  107. /package/dist/{setup-steps.js → setup/steps.js} +0 -0
  108. /package/dist/{registry-types.js → sources/types.js} +0 -0
@@ -1,29 +1,29 @@
1
1
  /**
2
- * Database-backed (SQLite + FTS5/vector) stash search implementation.
2
+ * Database-backed (SQLite + FTS5/vector) source search implementation.
3
3
  *
4
- * Extracted from stash-search.ts to break the circular import:
5
- * stash-search.ts → stash-providers/filesystem.ts → db-search.ts (no cycle)
4
+ * Extracted from source-search.ts to break the circular import:
5
+ * source-search.ts → sources/providers/filesystem.ts → db-search.ts (no cycle)
6
6
  *
7
- * stash-search.ts imports this module for the `searchLocal` export.
8
- * stash-providers/filesystem.ts also imports `searchLocal` from here.
7
+ * source-search.ts imports this module for the `searchLocal` export.
8
+ * sources/providers/filesystem.ts also imports `searchLocal` from here.
9
9
  *
10
10
  * Renamed from `local-search.ts` to signal that this is the DB-layer search
11
11
  * implementation, not a "local vs. remote" distinction.
12
12
  */
13
13
  import fs from "node:fs";
14
14
  import path from "node:path";
15
- import { defaultRendererRegistry } from "./asset-registry";
16
- import { deriveCanonicalAssetNameFromStashRoot } from "./asset-spec";
15
+ import { makeAssetRef } from "../core/asset-ref";
16
+ import { defaultRendererRegistry } from "../core/asset-registry";
17
+ import { deriveCanonicalAssetNameFromStashRoot } from "../core/asset-spec";
18
+ import { getDbPath } from "../core/paths";
19
+ import { warn } from "../core/warn";
17
20
  import { closeDatabase, getAllEntries, getEntryById, getEntryCount, getMeta, getUtilityScoresByIds, openDatabase, searchFts, searchVec, } from "./db";
18
21
  import { getRenderer } from "./file-context";
19
22
  import { generateMetadataFlat, loadStashFile, shouldIndexStashFile } from "./metadata";
20
- import { getDbPath } from "./paths";
21
23
  import { buildSearchText } from "./search-fields";
22
24
  import { buildEditHint, findSourceForPath, isEditable } from "./search-source";
23
25
  import { deriveSemanticProviderFingerprint, getEffectiveSemanticStatus, isSemanticRuntimeReady, readSemanticStatus, } from "./semantic-status";
24
- import { makeAssetRef } from "./stash-ref";
25
26
  import { walkStashFlat } from "./walker";
26
- import { warn } from "./warn";
27
27
  export async function rendererForType(type, registry = defaultRendererRegistry) {
28
28
  const name = registry.rendererNameFor(type);
29
29
  return name ? getRenderer(name) : undefined;
@@ -45,7 +45,7 @@ function resolveSearchHitOrigin(source) {
45
45
  export async function searchLocal(input) {
46
46
  const { query, searchType, limit, stashDir, sources, config } = input;
47
47
  const rendererRegistry = input.rendererRegistry ?? defaultRendererRegistry;
48
- const allStashDirs = sources.map((s) => s.path);
48
+ const allSourceDirs = sources.map((s) => s.path);
49
49
  const rawStatus = readSemanticStatus();
50
50
  const semanticStatus = getEffectiveSemanticStatus(config, rawStatus);
51
51
  const warnings = [];
@@ -85,7 +85,7 @@ export async function searchLocal(input) {
85
85
  }
86
86
  }
87
87
  if (entryCount > 0 && stashDirMatch) {
88
- const { hits, embedMs, rankMs } = await searchDatabase(db, query, searchType, limit, stashDir, allStashDirs, config, sources, rendererRegistry);
88
+ const { hits, embedMs, rankMs } = await searchDatabase(db, query, searchType, limit, stashDir, allSourceDirs, config, sources, rendererRegistry);
89
89
  return {
90
90
  hits,
91
91
  tip: hits.length === 0
@@ -105,7 +105,7 @@ export async function searchLocal(input) {
105
105
  catch (error) {
106
106
  warn("Search index unavailable, falling back to substring search:", error instanceof Error ? error.message : String(error));
107
107
  }
108
- const hitArrays = await Promise.all(allStashDirs.map((dir) => substringSearch(query, searchType, limit, dir, sources, config, rendererRegistry)));
108
+ const hitArrays = await Promise.all(allSourceDirs.map((dir) => substringSearch(query, searchType, limit, dir, sources, config, rendererRegistry)));
109
109
  const hits = hitArrays.flat().slice(0, limit);
110
110
  return {
111
111
  hits,
@@ -114,7 +114,7 @@ export async function searchLocal(input) {
114
114
  };
115
115
  }
116
116
  // ── Database search ─────────────────────────────────────────────────────────
117
- async function searchDatabase(db, query, searchType, limit, stashDir, allStashDirs, config, sources, rendererRegistry = defaultRendererRegistry) {
117
+ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceDirs, config, sources, rendererRegistry = defaultRendererRegistry) {
118
118
  // Empty query: return all entries
119
119
  if (!query) {
120
120
  const typeFilter = searchType === "any" ? undefined : searchType;
@@ -135,7 +135,7 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
135
135
  query,
136
136
  rankingMode: "fts",
137
137
  defaultStashDir: stashDir,
138
- allStashDirs,
138
+ allSourceDirs,
139
139
  sources,
140
140
  config,
141
141
  rendererRegistry,
@@ -355,13 +355,19 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
355
355
  item.utilityBoosted = true;
356
356
  }
357
357
  }
358
+ // ── minScore floor ──────────────────────────────────────────────────────
359
+ // Drop semantic-only hits (cosine-only, no FTS match) whose score falls
360
+ // below the configured floor. FTS hits and hybrid hits are always kept.
361
+ // Default floor: 0.2. Set search.minScore = 0 in config to disable.
362
+ const minScore = config.search?.minScore ?? 0.2;
363
+ const preFilter = minScore > 0 ? scored.filter((item) => item.rankingMode !== "semantic" || item.score >= minScore) : scored;
358
364
  // Deterministic tiebreaker on equal scores
359
- scored.sort((a, b) => b.score - a.score || a.entry.name.localeCompare(b.entry.name));
365
+ preFilter.sort((a, b) => b.score - a.score || a.entry.name.localeCompare(b.entry.name));
360
366
  // Deduplicate by file path — keep only the highest-scored entry per file.
361
367
  // Multiple .stash.json entries can map to the same file (e.g. entries without
362
368
  // a filename field all collapse to files[0]). Showing the same path/ref
363
369
  // multiple times clutters results.
364
- const deduped = deduplicateByPath(scored);
370
+ const deduped = deduplicateByPath(preFilter);
365
371
  const rankMs = Date.now() - tRank0;
366
372
  const selected = deduped.slice(0, limit);
367
373
  const hits = await Promise.all(selected.map(({ entry, filePath, score, rankingMode, utilityBoosted }) => buildDbHit({
@@ -372,7 +378,7 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
372
378
  query,
373
379
  rankingMode,
374
380
  defaultStashDir: stashDir,
375
- allStashDirs,
381
+ allSourceDirs,
376
382
  sources,
377
383
  config,
378
384
  utilityBoosted,
@@ -389,7 +395,7 @@ async function tryVecScores(db, query, k, config) {
389
395
  if (hasEmbeddings !== "1")
390
396
  return null;
391
397
  try {
392
- const { embed } = await import("./embedder.js");
398
+ const { embed } = await import("../llm/embedder.js");
393
399
  const queryEmbedding = await embed(query, config.embedding);
394
400
  const vecResults = searchVec(db, queryEmbedding, k);
395
401
  const scores = new Map();
@@ -2,13 +2,13 @@ import { Database } from "bun:sqlite";
2
2
  import fs from "node:fs";
3
3
  import { createRequire } from "node:module";
4
4
  import path from "node:path";
5
- import { cosineSimilarity } from "./embedders/types";
6
- import { getDbPath } from "./paths";
5
+ import { getDbPath } from "../core/paths";
6
+ import { warn } from "../core/warn";
7
+ import { cosineSimilarity } from "../llm/embedders/types";
7
8
  import { buildSearchFields } from "./search-fields";
8
9
  import { ensureUsageEventsSchema } from "./usage-events";
9
- import { warn } from "./warn";
10
10
  // ── Constants ───────────────────────────────────────────────────────────────
11
- export const DB_VERSION = 8;
11
+ export const DB_VERSION = 9;
12
12
  export const EMBEDDING_DIM = 384;
13
13
  // ── Database lifecycle ──────────────────────────────────────────────────────
14
14
  export function openDatabase(dbPath, options) {
@@ -19,6 +19,7 @@ export function openDatabase(dbPath, options) {
19
19
  }
20
20
  const db = new Database(resolvedPath);
21
21
  db.exec("PRAGMA journal_mode = WAL");
22
+ db.exec("PRAGMA busy_timeout = 5000");
22
23
  db.exec("PRAGMA foreign_keys = ON");
23
24
  // Try to load sqlite-vec extension
24
25
  loadVecExtension(db);
@@ -102,6 +103,22 @@ function ensureSchema(db, embeddingDim) {
102
103
 
103
104
  CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
104
105
  CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
106
+ `);
107
+ // Validated WorkflowDocument JSON, one row per indexed workflow entry.
108
+ // Pure index data — fully rebuilt on each `akm index`. ON DELETE CASCADE
109
+ // means clearing entries (full rebuild or per-dir delete) drops these too.
110
+ db.exec(`
111
+ CREATE TABLE IF NOT EXISTS workflow_documents (
112
+ entry_id INTEGER PRIMARY KEY REFERENCES entries(id) ON DELETE CASCADE,
113
+ schema_version INTEGER NOT NULL,
114
+ document_json TEXT NOT NULL,
115
+ source_path TEXT NOT NULL,
116
+ source_hash TEXT NOT NULL,
117
+ updated_at TEXT NOT NULL
118
+ );
119
+
120
+ CREATE INDEX IF NOT EXISTS idx_workflow_documents_source_path
121
+ ON workflow_documents(source_path);
105
122
  `);
106
123
  // Set version immediately after table creation so a crash before the end of
107
124
  // ensureSchema() does not leave the database in a versionless state on next open.
@@ -145,6 +162,15 @@ function ensureSchema(db, embeddingDim) {
145
162
  updated_at TEXT NOT NULL DEFAULT (datetime('now')),
146
163
  FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
147
164
  );
165
+ `);
166
+ // FTS-dirty queue. Created here (not lazily on first upsert) so the
167
+ // per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
168
+ // every call — that DDL would fire thousands of times during a full
169
+ // index. See `markFtsDirty` and `rebuildFts({ incremental: true })`.
170
+ db.exec(`
171
+ CREATE TABLE IF NOT EXISTS entries_fts_dirty (
172
+ entry_id INTEGER PRIMARY KEY
173
+ );
148
174
  `);
149
175
  // sqlite-vec table
150
176
  if (isVecAvailable(db)) {
@@ -280,45 +306,45 @@ export function setMeta(db, key, value) {
280
306
  * reflect the changes.
281
307
  */
282
308
  export function upsertEntry(db, entryKey, dirPath, filePath, stashDir, entry, searchText) {
283
- const stmt = db.prepare(`
284
- INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type)
285
- VALUES (?, ?, ?, ?, ?, ?, ?)
286
- ON CONFLICT(entry_key) DO UPDATE SET
287
- dir_path = excluded.dir_path,
288
- file_path = excluded.file_path,
289
- stash_dir = excluded.stash_dir,
290
- entry_json = excluded.entry_json,
291
- search_text = excluded.search_text,
292
- entry_type = excluded.entry_type
293
- `);
294
- stmt.run(entryKey, dirPath, filePath, stashDir, JSON.stringify(entry), searchText, entry.type);
295
- // Fetch the row id explicitly since last_insert_rowid() is unreliable for ON CONFLICT DO UPDATE
296
- const row = db.prepare("SELECT id FROM entries WHERE entry_key = ?").get(entryKey);
297
- if (!row)
309
+ // Hot path during indexing — cache the two prepared statements per
310
+ // database connection so we don't pay the SQL parse/compile cost on
311
+ // every call. The dirty-mark INSERT and the upsert-with-RETURNING
312
+ // share the same WeakMap so they live and die with the connection.
313
+ const stmts = getUpsertStmts(db);
314
+ const result = stmts.upsert.get(entryKey, dirPath, filePath, stashDir, JSON.stringify(entry), searchText, entry.type);
315
+ if (!result)
298
316
  throw new Error("upsertEntry: entry_key not found after upsert");
299
- // Mark this entry as FTS-dirty so an incremental rebuild only revisits the
300
- // entries that actually changed. Without this, every `akm index` run had to
301
- // re-scan and re-insert every FTS row, even if only one entry was touched.
302
- markFtsDirty(db, row.id);
303
- return row.id;
304
- }
305
- /**
306
- * Mark an entry as needing FTS re-indexing on the next `rebuildFts` call.
307
- *
308
- * The list lives in a small `entries_fts_dirty` table — a per-entry-id queue
309
- * of work items. `rebuildFts({ incremental: true })` drains this list rather
310
- * than scanning the entire `entries` table.
311
- */
312
- function markFtsDirty(db, entryId) {
313
- ensureFtsDirtyTable(db);
314
- db.prepare("INSERT OR IGNORE INTO entries_fts_dirty (entry_id) VALUES (?)").run(entryId);
315
- }
316
- function ensureFtsDirtyTable(db) {
317
- db.exec(`
318
- CREATE TABLE IF NOT EXISTS entries_fts_dirty (
319
- entry_id INTEGER PRIMARY KEY
320
- );
321
- `);
317
+ // Mark this entry as FTS-dirty so `rebuildFts({ incremental: true })`
318
+ // only revisits entries that actually changed. INSERT OR IGNORE is
319
+ // idempotent across multiple upserts of the same row.
320
+ stmts.markDirty.run(result.id);
321
+ return result.id;
322
+ }
323
+ const upsertStmtsByDb = new WeakMap();
324
+ function getUpsertStmts(db) {
325
+ const existing = upsertStmtsByDb.get(db);
326
+ if (existing)
327
+ return existing;
328
+ const stmts = {
329
+ // RETURNING id handles ON CONFLICT DO UPDATE correctly — no second
330
+ // SELECT round-trip needed (last_insert_rowid() is unreliable for
331
+ // ON CONFLICT). Use `.get()` so a single row comes back.
332
+ upsert: db.prepare(`
333
+ INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type)
334
+ VALUES (?, ?, ?, ?, ?, ?, ?)
335
+ ON CONFLICT(entry_key) DO UPDATE SET
336
+ dir_path = excluded.dir_path,
337
+ file_path = excluded.file_path,
338
+ stash_dir = excluded.stash_dir,
339
+ entry_json = excluded.entry_json,
340
+ search_text = excluded.search_text,
341
+ entry_type = excluded.entry_type
342
+ RETURNING id
343
+ `),
344
+ markDirty: db.prepare("INSERT OR IGNORE INTO entries_fts_dirty (entry_id) VALUES (?)"),
345
+ };
346
+ upsertStmtsByDb.set(db, stmts);
347
+ return stmts;
322
348
  }
323
349
  export function deleteEntriesByDir(db, dirPath) {
324
350
  db.transaction(() => {
@@ -419,7 +445,6 @@ export function rebuildFts(db, options) {
419
445
  db.transaction(() => {
420
446
  let rows;
421
447
  if (incremental) {
422
- ensureFtsDirtyTable(db);
423
448
  // Read the dirty queue and join against entries to get the JSON.
424
449
  // Then drop the matching rows from entries_fts so the INSERT below
425
450
  // doesn't double-up. The dirty list is drained at the end.
@@ -469,10 +494,8 @@ export function rebuildFts(db, options) {
469
494
  db.exec("DELETE FROM entries_fts_dirty");
470
495
  }
471
496
  else {
472
- // Full path: only drop the dirty table if it exists. Use
473
- // `CREATE IF NOT EXISTS` then DELETE so we don't error on databases
474
- // that haven't run any upserts yet (e.g. fresh schema).
475
- ensureFtsDirtyTable(db);
497
+ // Full path: drain the dirty queue too. The table is created by
498
+ // ensureSchema(), so it always exists at this point.
476
499
  db.exec("DELETE FROM entries_fts_dirty");
477
500
  }
478
501
  })();
@@ -6,8 +6,8 @@
6
6
  */
7
7
  import fs from "node:fs";
8
8
  import path from "node:path";
9
- import { toPosix } from "./common";
10
- import { parseFrontmatter } from "./frontmatter";
9
+ import { toPosix } from "../core/common";
10
+ import { parseFrontmatter } from "../core/frontmatter";
11
11
  /**
12
12
  * Build a FileContext from a stash root and an absolute file path.
13
13
  *
@@ -81,7 +81,7 @@ async function ensureBuiltinsRegistered() {
81
81
  if (!builtinsPromise) {
82
82
  builtinsPromise = (async () => {
83
83
  const { registerBuiltinMatchers } = await import("./matchers.js");
84
- const { registerBuiltinRenderers } = await import("./renderers.js");
84
+ const { registerBuiltinRenderers } = await import("../output/renderers.js");
85
85
  registerBuiltinMatchers();
86
86
  registerBuiltinRenderers();
87
87
  })();
@@ -1,27 +1,28 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
- import { isHttpUrl, resolveStashDir } from "./common";
3
+ import { isHttpUrl, resolveStashDir, toErrorMessage } from "../core/common";
4
+ import { getDbPath } from "../core/paths";
5
+ import { warn } from "../core/warn";
6
+ import { takeWorkflowDocument } from "../workflows/document-cache";
4
7
  import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getMeta, isVecAvailable, openDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertUtilityScore, warnIfVecMissing, } from "./db";
5
8
  import { generateMetadataFlat, loadStashFile, shouldIndexStashFile } from "./metadata";
6
- import { getDbPath } from "./paths";
7
9
  import { buildSearchText } from "./search-fields";
8
10
  import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
9
11
  import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
10
12
  import { walkStashFlat } from "./walker";
11
- import { warn } from "./warn";
12
13
  // ── Indexer ──────────────────────────────────────────────────────────────────
13
14
  export async function akmIndex(options) {
14
15
  const stashDir = options?.stashDir || resolveStashDir();
15
16
  const onProgress = options?.onProgress ?? (() => { });
16
17
  // Load config and resolve all stash sources
17
- const { loadConfig } = await import("./config.js");
18
+ const { loadConfig } = await import("../core/config.js");
18
19
  const config = loadConfig();
19
20
  // Ensure git stash caches are extracted before resolving stash dirs,
20
21
  // so their content directories exist on disk for the walker to discover.
21
- const { ensureStashCaches, resolveStashSources } = await import("./search-source.js");
22
- await ensureStashCaches(config);
23
- const allStashSources = resolveStashSources(stashDir, config);
24
- const allStashDirs = allStashSources.map((s) => s.path);
22
+ const { ensureSourceCaches, resolveSourceEntries } = await import("./search-source.js");
23
+ await ensureSourceCaches(config);
24
+ const allSourceEntries = resolveSourceEntries(stashDir, config);
25
+ const allSourceDirs = allSourceEntries.map((s) => s.path);
25
26
  const t0 = Date.now();
26
27
  // Open database — pass embedding dimension from config if available
27
28
  const dbPath = getDbPath();
@@ -37,7 +38,7 @@ export async function akmIndex(options) {
37
38
  phase: "summary",
38
39
  message: buildIndexSummaryMessage({
39
40
  mode: isIncremental ? "incremental" : "full",
40
- stashSources: allStashDirs.length,
41
+ sourcesCount: allSourceDirs.length,
41
42
  semanticSearchMode: config.semanticSearchMode,
42
43
  embeddingProvider: getEmbeddingProvider(config.embedding),
43
44
  llmEnabled: !!config.llm,
@@ -67,7 +68,7 @@ export async function akmIndex(options) {
67
68
  catch {
68
69
  warn("index_meta stashDirs value is corrupt JSON — treating as empty");
69
70
  }
70
- const currentSet = new Set(allStashDirs);
71
+ const currentSet = new Set(allSourceDirs);
71
72
  for (const dir of prevStashDirs) {
72
73
  if (!currentSet.has(dir)) {
73
74
  deleteEntriesByStashDir(db, dir);
@@ -80,7 +81,7 @@ export async function akmIndex(options) {
80
81
  // doFullDelete=true merges the wipe into the same transaction as the
81
82
  // inserts so readers never see an empty database mid-rebuild.
82
83
  const doFullDelete = options?.full || !isIncremental;
83
- const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allStashSources, isIncremental, builtAtMs, doFullDelete);
84
+ const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFullDelete);
84
85
  onProgress({
85
86
  phase: "scan",
86
87
  message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
@@ -130,7 +131,7 @@ export async function akmIndex(options) {
130
131
  // are read-only caches, and regenerating their indexes would mutate
131
132
  // cache content.
132
133
  try {
133
- const { regenerateAllWikiIndexes } = await import("./wiki.js");
134
+ const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
134
135
  regenerateAllWikiIndexes(stashDir);
135
136
  }
136
137
  catch {
@@ -142,7 +143,7 @@ export async function akmIndex(options) {
142
143
  // Update metadata
143
144
  setMeta(db, "builtAt", new Date().toISOString());
144
145
  setMeta(db, "stashDir", stashDir);
145
- setMeta(db, "stashDirs", JSON.stringify(allStashDirs));
146
+ setMeta(db, "stashDirs", JSON.stringify(allSourceDirs));
146
147
  setMeta(db, "hasEmbeddings", embeddingResult.success ? "1" : "0");
147
148
  const totalEntries = getEntryCount(db);
148
149
  // Warn on every index run if using JS fallback with many entries
@@ -188,7 +189,7 @@ export async function akmIndex(options) {
188
189
  }
189
190
  }
190
191
  // ── Extracted helpers for indexing ────────────────────────────────────────────
191
- async function indexEntries(db, allStashSources, isIncremental, builtAtMs, doFullDelete = false) {
192
+ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFullDelete = false) {
192
193
  let scannedDirs = 0;
193
194
  let skippedDirs = 0;
194
195
  let generatedCount = 0;
@@ -196,12 +197,12 @@ async function indexEntries(db, allStashSources, isIncremental, builtAtMs, doFul
196
197
  const seenPaths = new Set();
197
198
  const dirsNeedingLlm = [];
198
199
  const dirRecords = [];
199
- for (const stashSource of allStashSources) {
200
- const currentStashDir = stashSource.path;
200
+ for (const sourceAdded of allSourceEntries) {
201
+ const currentStashDir = sourceAdded.path;
201
202
  const fileContexts = walkStashFlat(currentStashDir);
202
203
  // Wiki-root stashes: all .md files are indexed as wiki pages under wikiName
203
- if (stashSource.wikiName) {
204
- const wikiName = stashSource.wikiName;
204
+ if (sourceAdded.wikiName) {
205
+ const wikiName = sourceAdded.wikiName;
205
206
  const wikiDirGroups = new Map();
206
207
  for (const ctx of fileContexts) {
207
208
  if (ctx.ext !== ".md")
@@ -353,7 +354,13 @@ async function indexEntries(db, allStashSources, isIncremental, builtAtMs, doFul
353
354
  const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
354
355
  const searchText = buildSearchText(entry);
355
356
  const entryWithSize = attachFileSize(entry, entryPath);
356
- upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, entryWithSize, searchText);
357
+ const entryId = upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, entryWithSize, searchText);
358
+ if (entry.type === "workflow") {
359
+ const doc = takeWorkflowDocument(entry);
360
+ if (doc) {
361
+ upsertWorkflowDocument(db, entryId, doc, fs.readFileSync(entryPath));
362
+ }
363
+ }
357
364
  }
358
365
  // Collect dirs needing LLM enhancement during the first walk
359
366
  if (stash.entries.some((e) => e.quality === "generated")) {
@@ -428,7 +435,7 @@ async function generateEmbeddingsForDb(db, config, onProgress) {
428
435
  setMeta(db, "hasEmbeddings", "0");
429
436
  }
430
437
  try {
431
- const { embedBatch } = await import("./embedder.js");
438
+ const { embedBatch } = await import("../llm/embedder.js");
432
439
  const allEntries = getAllEntriesForEmbedding(db);
433
440
  if (allEntries.length === 0) {
434
441
  onProgress({ phase: "embeddings", message: "Embeddings already up to date." });
@@ -486,10 +493,32 @@ function attachFileSize(entry, entryPath) {
486
493
  return entry;
487
494
  }
488
495
  }
496
+ function upsertWorkflowDocument(db, entryId, doc, content) {
497
+ const sourceHash = computeSourceHash(content);
498
+ db.prepare(`INSERT INTO workflow_documents (entry_id, schema_version, document_json, source_path, source_hash, updated_at)
499
+ VALUES (?, ?, ?, ?, ?, ?)
500
+ ON CONFLICT(entry_id) DO UPDATE SET
501
+ schema_version = excluded.schema_version,
502
+ document_json = excluded.document_json,
503
+ source_path = excluded.source_path,
504
+ source_hash = excluded.source_hash,
505
+ updated_at = excluded.updated_at`).run(entryId, doc.schemaVersion, JSON.stringify(doc), doc.source.path, sourceHash, new Date().toISOString());
506
+ }
507
+ function computeSourceHash(content) {
508
+ // Cheap, stable identity for the source markdown — used by future
509
+ // incremental fast-paths that skip re-validation when content is unchanged.
510
+ // Not security-sensitive; FNV-1a over the bytes is sufficient.
511
+ let hash = 0x811c9dc5;
512
+ for (let i = 0; i < content.length; i++) {
513
+ hash ^= content[i];
514
+ hash = Math.imul(hash, 0x01000193);
515
+ }
516
+ return (hash >>> 0).toString(16);
517
+ }
489
518
  function buildIndexSummaryMessage(options) {
490
- const stashSourceLabel = options.stashSources === 1 ? "stash source" : "stash sources";
519
+ const stashSourceLabel = options.sourcesCount === 1 ? "stash source" : "stash sources";
491
520
  const semanticDetail = getSemanticSearchLabel(options.semanticSearchMode, options.embeddingProvider, options.vecAvailable);
492
- return `Starting ${options.mode} index (${options.stashSources} ${stashSourceLabel}, semantic search: ${semanticDetail}, LLM: ${options.llmEnabled ? "enabled" : "disabled"}).`;
521
+ return `Starting ${options.mode} index (${options.sourcesCount} ${stashSourceLabel}, semantic search: ${semanticDetail}, LLM: ${options.llmEnabled ? "enabled" : "disabled"}).`;
493
522
  }
494
523
  function getEmbeddingProvider(embedding) {
495
524
  return isHttpUrl(embedding?.endpoint) ? "remote" : "local";
@@ -590,9 +619,8 @@ function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
590
619
  return false;
591
620
  }
592
621
  async function enhanceStashWithLlm(llmConfig, stash, files, summary) {
593
- const { enhanceMetadata } = await import("./llm.js");
622
+ const { enhanceMetadata } = await import("../llm/metadata-enhance");
594
623
  const enhanced = [];
595
- const seenSamples = new Set();
596
624
  for (const entry of stash.entries) {
597
625
  summary.attempted++;
598
626
  try {
@@ -621,10 +649,11 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary) {
621
649
  }
622
650
  catch (err) {
623
651
  enhanced.push(entry);
624
- const msg = err instanceof Error ? err.message : String(err);
625
- if (summary.failureSamples.length < 3 && !seenSamples.has(msg)) {
652
+ const msg = toErrorMessage(err);
653
+ // failureSamples is bounded to 3 items, so a linear scan is cheaper
654
+ // than maintaining a parallel Set for membership checks (#177 review).
655
+ if (summary.failureSamples.length < 3 && !summary.failureSamples.includes(msg)) {
626
656
  summary.failureSamples.push(msg);
627
- seenSamples.add(msg);
628
657
  }
629
658
  }
630
659
  }
@@ -667,10 +696,73 @@ export function matchEntryToFile(entryName, fileMap, files) {
667
696
  // Fallback to first file, or null if no files are available
668
697
  return files[0] || null;
669
698
  }
670
- // `buildSearchFields` and `buildSearchText` were previously re-exported from
671
- // here for backwards compatibility. Importers should now pull them directly
672
- // from `./search-fields` to avoid loading the indexer's full dependency
673
- // graph (LLM client, embedder facade) when only the text builder is needed.
699
+ /**
700
+ * Look up a single asset by ref. Spec §6.2 `akm show` queries this and
701
+ * reads the file from disk. The index is the source of truth for which
702
+ * file corresponds to which ref; the indexer walks `provider.path()` for
703
+ * every configured source, so this query covers all source kinds.
704
+ *
705
+ * Match rules:
706
+ * - `ref.origin === undefined` → first match across all sources (primary
707
+ * source first, then in declared order — same priority as the indexer's
708
+ * write order).
709
+ * - `ref.origin === "local"` → primary source only (entry_key prefix is
710
+ * the primary stash dir).
711
+ * - `ref.origin === <name>` → restrict to the matching source name. We
712
+ * resolve the source's directory and match on `entry_key` prefix.
713
+ *
714
+ * Returns `null` when no row matches — callers translate that into a
715
+ * `NotFoundError` with their own messaging.
716
+ */
717
+ export async function lookup(ref) {
718
+ const { loadConfig } = await import("../core/config.js");
719
+ const { resolveSourceEntries } = await import("./search-source.js");
720
+ const config = loadConfig();
721
+ const sources = resolveSourceEntries(undefined, config);
722
+ if (sources.length === 0)
723
+ return null;
724
+ const dbPath = getDbPath();
725
+ const db = openDatabase(dbPath);
726
+ try {
727
+ // entry_key shape: `${stashDir}:${type}:${name}`. Suffix-match on
728
+ // `:type:name` so we can scope by source dir as a prefix when origin is
729
+ // supplied. Use parameterised queries throughout — names may include
730
+ // user-supplied glob characters.
731
+ const escapeLike = (value) => value.replace(/\\/g, "\\\\").replace(/%/g, "\\%").replace(/_/g, "\\_");
732
+ const suffix = `:${ref.type}:${ref.name}`;
733
+ const escapedSuffix = escapeLike(suffix);
734
+ const candidateDirs = (() => {
735
+ if (!ref.origin)
736
+ return sources.map((s) => s.path);
737
+ if (ref.origin === "local")
738
+ return [sources[0].path];
739
+ const named = sources.find((s) => s.registryId === ref.origin);
740
+ return named ? [named.path] : [];
741
+ })();
742
+ if (candidateDirs.length === 0)
743
+ return null;
744
+ for (const dir of candidateDirs) {
745
+ const escapedDir = escapeLike(dir);
746
+ const row = db
747
+ .prepare("SELECT entry_key AS entryKey, file_path AS filePath, stash_dir AS stashDir, entry_type AS type FROM entries " +
748
+ "WHERE entry_key LIKE ? ESCAPE '\\' AND entry_type = ? LIMIT 1")
749
+ .get(`${escapedDir}${escapedSuffix}`, ref.type);
750
+ if (row) {
751
+ return {
752
+ entryKey: row.entryKey,
753
+ filePath: row.filePath,
754
+ stashDir: row.stashDir,
755
+ type: row.type,
756
+ name: ref.name,
757
+ };
758
+ }
759
+ }
760
+ return null;
761
+ }
762
+ finally {
763
+ closeDatabase(db);
764
+ }
765
+ }
674
766
  // ── Utility score recomputation ──────────────────────────────────────────────
675
767
  /** Retention window for usage events: events older than this are purged. */
676
768
  const USAGE_EVENT_RETENTION_DAYS = 90;
@@ -8,16 +8,16 @@
8
8
  */
9
9
  import fs from "node:fs";
10
10
  import path from "node:path";
11
- import { deriveCanonicalAssetNameFromStashRoot } from "./asset-spec";
12
- import { resolveStashDir } from "./common";
13
- import { loadConfig } from "./config";
11
+ import { makeAssetRef } from "../core/asset-ref";
12
+ import { deriveCanonicalAssetNameFromStashRoot } from "../core/asset-spec";
13
+ import { resolveStashDir } from "../core/common";
14
+ import { loadConfig } from "../core/config";
15
+ import { getDbPath } from "../core/paths";
16
+ import { warn } from "../core/warn";
14
17
  import { closeDatabase, getAllEntries, getEntryCount, getMeta, openDatabase } from "./db";
15
18
  import { generateMetadataFlat, loadStashFile } from "./metadata";
16
- import { getDbPath } from "./paths";
17
- import { resolveStashSources } from "./search-source";
18
- import { makeAssetRef } from "./stash-ref";
19
+ import { resolveSourceEntries } from "./search-source";
19
20
  import { walkStashFlat } from "./walker";
20
- import { warn } from "./warn";
21
21
  const MAX_DESCRIPTION_LENGTH = 80;
22
22
  /**
23
23
  * Truncate a description string to a maximum length, appending "..." if truncated.
@@ -99,9 +99,9 @@ function getManifestFromDb(stashDir, config, sources, type) {
99
99
  * Get the manifest by walking the stash directory (fallback when no index).
100
100
  */
101
101
  async function getManifestFromWalker(sources, type) {
102
- const allStashDirs = sources.map((s) => s.path);
102
+ const allSourceDirs = sources.map((s) => s.path);
103
103
  const entries = [];
104
- for (const currentStashDir of allStashDirs) {
104
+ for (const currentStashDir of allSourceDirs) {
105
105
  const fileContexts = walkStashFlat(currentStashDir);
106
106
  // Group by parent directory
107
107
  const dirGroups = new Map();
@@ -154,7 +154,7 @@ export async function akmManifest(options) {
154
154
  const stashDir = options?.stashDir ?? resolveStashDir();
155
155
  const type = options?.type;
156
156
  const config = loadConfig();
157
- const sources = resolveStashSources(stashDir, config);
157
+ const sources = resolveSourceEntries(stashDir, config);
158
158
  // Fast path: try database
159
159
  const dbEntries = getManifestFromDb(stashDir, config, sources, type);
160
160
  if (dbEntries !== null) {
@@ -18,7 +18,8 @@
18
18
  * - `wikiMatcher` (20) -- classifies any `.md` under `wikis/<name>/…` as
19
19
  * `wiki`. Registered last so the later-wins tiebreaker beats agent at 20.
20
20
  */
21
- import { SCRIPT_EXTENSIONS } from "./asset-spec";
21
+ import { SCRIPT_EXTENSIONS } from "../core/asset-spec";
22
+ import { looksLikeWorkflow } from "../workflows/parser";
22
23
  import { registerMatcher } from "./file-context";
23
24
  // ── extensionMatcher (specificity: 3) ────────────────────────────────────────
24
25
  /**
@@ -140,11 +141,7 @@ export function smartMdMatcher(ctx) {
140
141
  if (ctx.ext !== ".md")
141
142
  return null;
142
143
  const body = ctx.content();
143
- const hasWorkflowSignals = /^#\s+Workflow:\s+/m.test(body) &&
144
- /^##\s+Step:\s+/m.test(body) &&
145
- /^Step ID:\s+/m.test(body) &&
146
- /^###\s+Instructions\s*$/m.test(body);
147
- if (hasWorkflowSignals) {
144
+ if (looksLikeWorkflow(body)) {
148
145
  return { type: "workflow", specificity: 19, renderer: "workflow-md" };
149
146
  }
150
147
  const fm = ctx.frontmatter();