akm-cli 0.9.0-beta.53 → 0.9.0-beta.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/dist/cli/clack.js +56 -0
  2. package/dist/cli/confirm.js +1 -1
  3. package/dist/cli.js +5 -3
  4. package/dist/commands/agent/contribute-cli.js +2 -3
  5. package/dist/commands/env/env-cli.js +187 -202
  6. package/dist/commands/env/secret-cli.js +109 -121
  7. package/dist/commands/feedback-cli.js +152 -155
  8. package/dist/commands/health/advisories.js +151 -0
  9. package/dist/commands/health/html-report.js +33 -10
  10. package/dist/commands/health/improve-metrics.js +754 -0
  11. package/dist/commands/health/llm-usage.js +65 -0
  12. package/dist/commands/health/md-report.js +103 -0
  13. package/dist/commands/health/metrics.js +278 -0
  14. package/dist/commands/health/task-runs.js +135 -0
  15. package/dist/commands/health/types.js +18 -0
  16. package/dist/commands/health/windows.js +196 -0
  17. package/dist/commands/health.js +15 -1492
  18. package/dist/commands/improve/anti-collapse.js +170 -0
  19. package/dist/commands/improve/collapse-detector.js +3 -2
  20. package/dist/commands/improve/consolidate.js +636 -633
  21. package/dist/commands/improve/dedup.js +1 -1
  22. package/dist/commands/improve/distill/content-repair.js +202 -0
  23. package/dist/commands/improve/distill/promote-memory.js +228 -0
  24. package/dist/commands/improve/distill/quality-gate.js +233 -0
  25. package/dist/commands/improve/distill-guards.js +127 -0
  26. package/dist/commands/improve/distill.js +49 -575
  27. package/dist/commands/improve/extract-cli.js +74 -76
  28. package/dist/commands/improve/extract.js +6 -4
  29. package/dist/commands/improve/hot-probation.js +45 -0
  30. package/dist/commands/improve/improve-auto-accept.js +3 -2
  31. package/dist/commands/improve/improve-cli.js +14 -13
  32. package/dist/commands/improve/improve-result-file.js +2 -1
  33. package/dist/commands/improve/improve.js +6 -5
  34. package/dist/commands/improve/loop-stages.js +19 -21
  35. package/dist/commands/improve/outcome-loop.js +18 -16
  36. package/dist/commands/improve/preparation.js +23 -5
  37. package/dist/commands/improve/procedural.js +10 -31
  38. package/dist/commands/improve/recombine.js +19 -43
  39. package/dist/commands/improve/reflect.js +1 -1
  40. package/dist/commands/improve/schema-similarity-gate.js +168 -0
  41. package/dist/commands/improve/shared.js +48 -0
  42. package/dist/commands/observability-cli.js +4 -4
  43. package/dist/commands/proposal/drain-policies.js +2 -2
  44. package/dist/commands/proposal/drain.js +1 -1
  45. package/dist/commands/proposal/legacy-import.js +115 -0
  46. package/dist/commands/proposal/proposal-cli.js +3 -3
  47. package/dist/commands/proposal/proposal.js +2 -1
  48. package/dist/commands/proposal/propose.js +1 -1
  49. package/dist/commands/proposal/repository.js +829 -0
  50. package/dist/commands/proposal/validators/proposals.js +5 -920
  51. package/dist/commands/read/curate.js +4 -4
  52. package/dist/commands/read/remember-cli.js +132 -137
  53. package/dist/commands/read/search-cli.js +7 -5
  54. package/dist/commands/read/search.js +7 -3
  55. package/dist/commands/read/show.js +3 -5
  56. package/dist/commands/registry-cli.js +76 -87
  57. package/dist/commands/sources/add-cli.js +91 -95
  58. package/dist/commands/sources/history.js +1 -1
  59. package/dist/commands/sources/init.js +12 -0
  60. package/dist/commands/sources/schema-repair.js +1 -1
  61. package/dist/commands/sources/sources-cli.js +3 -3
  62. package/dist/commands/sources/stash-cli.js +2 -2
  63. package/dist/commands/tasks/default-tasks.js +12 -0
  64. package/dist/commands/tasks/tasks-cli.js +1 -2
  65. package/dist/commands/wiki-cli.js +2 -3
  66. package/dist/core/common.js +3 -3
  67. package/dist/core/config/config-schema.js +6 -0
  68. package/dist/core/config/config.js +12 -0
  69. package/dist/core/deep-merge.js +38 -0
  70. package/dist/core/events.js +2 -1
  71. package/dist/core/logs-db.js +8 -13
  72. package/dist/core/paths.js +14 -14
  73. package/dist/core/state-db.js +13 -1140
  74. package/dist/core/warn.js +21 -0
  75. package/dist/indexer/db/db.js +72 -709
  76. package/dist/indexer/db/entry-mapper.js +41 -0
  77. package/dist/indexer/db/schema.js +516 -0
  78. package/dist/indexer/ensure-index.js +3 -2
  79. package/dist/indexer/feedback/utility-policy.js +85 -0
  80. package/dist/indexer/graph/graph-extraction.js +2 -1
  81. package/dist/indexer/index-writer-lock.js +18 -0
  82. package/dist/indexer/indexer.js +94 -27
  83. package/dist/indexer/read-preflight.js +23 -0
  84. package/dist/indexer/search/fts-query.js +51 -0
  85. package/dist/indexer/walk/walker.js +21 -13
  86. package/dist/integrations/agent/detect.js +9 -0
  87. package/dist/integrations/agent/index.js +1 -1
  88. package/dist/integrations/agent/spawn.js +15 -66
  89. package/dist/llm/client.js +12 -0
  90. package/dist/llm/embedder.js +26 -2
  91. package/dist/llm/embedders/local.js +7 -1
  92. package/dist/output/text/helpers.js +13 -0
  93. package/dist/scripts/migrate-storage.js +6903 -7424
  94. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +49 -44
  95. package/dist/setup/detect.js +9 -0
  96. package/dist/setup/legacy-config.js +106 -0
  97. package/dist/setup/prompt.js +57 -0
  98. package/dist/setup/providers.js +14 -0
  99. package/dist/setup/registry-stash-loader.js +12 -0
  100. package/dist/setup/semantic-assets.js +124 -0
  101. package/dist/setup/setup.js +25 -1608
  102. package/dist/setup/steps/connection.js +734 -0
  103. package/dist/setup/steps/output.js +31 -0
  104. package/dist/setup/steps/platforms.js +124 -0
  105. package/dist/setup/steps/semantic.js +27 -0
  106. package/dist/setup/steps/sources.js +222 -0
  107. package/dist/setup/steps/stashdir.js +42 -0
  108. package/dist/setup/steps/tasks.js +152 -0
  109. package/dist/storage/repositories/canaries-repository.js +107 -0
  110. package/dist/storage/repositories/consolidation-repository.js +38 -0
  111. package/dist/storage/repositories/embeddings-repository.js +72 -0
  112. package/dist/storage/repositories/events-repository.js +187 -0
  113. package/dist/storage/repositories/extract-sessions-repository.js +96 -0
  114. package/dist/storage/repositories/improve-runs-repository.js +130 -0
  115. package/dist/storage/repositories/index-db.js +4 -7
  116. package/dist/storage/repositories/proposals-repository.js +220 -0
  117. package/dist/storage/repositories/recombine-repository.js +213 -0
  118. package/dist/storage/repositories/task-history-repository.js +93 -0
  119. package/dist/storage/sqlite-pragmas.js +3 -3
  120. package/dist/tasks/backends/index.js +9 -0
  121. package/dist/tasks/runner.js +11 -1
  122. package/package.json +2 -2
  123. package/dist/commands/improve/homeostatic.js +0 -497
@@ -7,6 +7,7 @@ import { probeLock, releaseLock, releaseLockIfOwned, tryAcquireLockSync } from "
7
7
  import { getDbPath, getIndexWriterLockPath } from "../core/paths.js";
8
8
  const INDEX_WRITER_LOCK_STALE_AFTER_MS = 12 * 60 * 60 * 1000;
9
9
  const INDEX_WRITER_WAIT_MS = 100;
10
+ const DEFAULT_INDEX_WRITER_MAX_WAIT_MS = 10 * 60 * 1000;
10
11
  const heldLocks = new Map();
11
12
  function buildPayload(purpose, pid = process.pid) {
12
13
  return JSON.stringify({
@@ -49,17 +50,23 @@ function retainHeldLock(lockPath) {
49
50
  export async function acquireIndexWriterLease(options) {
50
51
  const mode = options.mode ?? "wait";
51
52
  const lockPath = getIndexWriterLockPath();
53
+ const startedAt = Date.now();
54
+ const maxWaitMs = options.maxWaitMs ?? DEFAULT_INDEX_WRITER_MAX_WAIT_MS;
52
55
  fs.mkdirSync(path.dirname(lockPath), { recursive: true });
53
56
  if (heldLocks.has(lockPath)) {
57
+ options.onAcquired?.({ waitedMs: 0 });
54
58
  return retainHeldLock(lockPath);
55
59
  }
60
+ let lastWaitNoticeMs = 0;
56
61
  while (true) {
57
62
  throwIfAborted(options.signal);
58
63
  if (tryAcquireLockSync(lockPath, buildPayload(options.purpose))) {
64
+ options.onAcquired?.({ waitedMs: Date.now() - startedAt });
59
65
  return retainHeldLock(lockPath);
60
66
  }
61
67
  const probe = probeLock(lockPath, { staleAfterMs: INDEX_WRITER_LOCK_STALE_AFTER_MS });
62
68
  if (probe.state === "held" && probe.holderPid === process.pid) {
69
+ options.onAcquired?.({ waitedMs: Date.now() - startedAt });
63
70
  return retainHeldLock(lockPath);
64
71
  }
65
72
  if (probe.state === "stale") {
@@ -68,6 +75,17 @@ export async function acquireIndexWriterLease(options) {
68
75
  }
69
76
  if (mode === "try")
70
77
  return undefined;
78
+ // Held by another live process. Time out only *after* a real acquisition
79
+ // attempt, so a caller with maxWaitMs:0 still gets one chance at a free lock
80
+ // instead of throwing before it ever tries.
81
+ if (maxWaitMs >= 0 && Date.now() - startedAt >= maxWaitMs) {
82
+ throw new Error(`timed out waiting for index writer lease for ${options.purpose}`);
83
+ }
84
+ const waitedMs = Date.now() - startedAt;
85
+ if (waitedMs - lastWaitNoticeMs >= 15000) {
86
+ options.onWait?.({ waitedMs });
87
+ lastWaitNoticeMs = waitedMs;
88
+ }
71
89
  await delay(INDEX_WRITER_WAIT_MS);
72
90
  }
73
91
  }
@@ -118,7 +118,7 @@ async function runWalkPhase(ctx) {
118
118
  ctx.timing.tWalkEnd = Date.now();
119
119
  throwIfAborted(signal);
120
120
  // LLM enrichment for directories that need it
121
- await enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, true, reEnrich);
121
+ await enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, reEnrich);
122
122
  onProgress({
123
123
  phase: "llm",
124
124
  message: resolveIndexPassLLM("enrichment", config)
@@ -143,6 +143,7 @@ async function runEmbeddingPhase(ctx) {
143
143
  */
144
144
  async function runFinalizePhase(ctx) {
145
145
  const { db, config, sources, sourceDirs, isIncremental, stashDir, signal, onProgress } = ctx;
146
+ ctx.timing.tFinalizeStart = Date.now();
146
147
  // Rebuild FTS after all inserts. Use incremental mode when this whole
147
148
  // index run is incremental — only entries touched by `upsertEntry`
148
149
  // since the last rebuild are re-indexed.
@@ -153,10 +154,13 @@ async function runFinalizePhase(ctx) {
153
154
  });
154
155
  ctx.timing.tFtsEnd = Date.now();
155
156
  // Re-link detached usage_events and recompute utility scores.
157
+ onProgress({ phase: "finalize", message: "Relinking usage events." });
156
158
  relinkUsageEvents(db);
159
+ onProgress({ phase: "finalize", message: "Recomputing utility scores." });
157
160
  recomputeUtilityScores(db);
158
161
  // Purge LLM cache entries for assets that no longer exist in the index.
159
162
  try {
163
+ onProgress({ phase: "finalize", message: "Clearing stale LLM cache entries." });
160
164
  clearStaleCacheEntries(db);
161
165
  }
162
166
  catch {
@@ -164,6 +168,7 @@ async function runFinalizePhase(ctx) {
164
168
  }
165
169
  // Regenerate each wiki's index.md from its pages' frontmatter. Best-effort.
166
170
  try {
171
+ onProgress({ phase: "finalize", message: "Regenerating wiki indexes." });
167
172
  const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
168
173
  regenerateAllWikiIndexes(stashDir);
169
174
  }
@@ -180,6 +185,7 @@ async function runFinalizePhase(ctx) {
180
185
  warnIfVecMissing(db);
181
186
  const totalEntries = getEntryCount(db);
182
187
  const semanticEntryCount = getEmbeddableEntryCount(db);
188
+ onProgress({ phase: "finalize", message: "Verifying semantic search state." });
183
189
  const verification = verifyIndexState(db, config, semanticEntryCount, embeddingResult);
184
190
  if (config.semanticSearchMode === "off") {
185
191
  clearSemanticStatus();
@@ -199,6 +205,7 @@ async function runFinalizePhase(ctx) {
199
205
  // Store verification result and totalEntries on ctx for the caller to use
200
206
  ctx.verification = verification;
201
207
  ctx.totalEntries = totalEntries;
208
+ ctx.timing.tFinalizeEnd = Date.now();
202
209
  // suppress unused warning — sources was previously used inline
203
210
  void sources;
204
211
  }
@@ -226,8 +233,34 @@ function runCleanPass(db, dryRun) {
226
233
  };
227
234
  }
228
235
  // ── Indexer ──────────────────────────────────────────────────────────────────
236
+ // ── Test seam ────────────────────────────────────────────────────────────────
237
+ // Swap-and-restore override. Inert in production; only tests call the setter.
238
+ let akmIndexOverride;
239
+ /** TEST-ONLY. Swap the implementation of `akmIndex`; pass undefined to restore. */
240
+ export function _setAkmIndexForTests(fake) {
241
+ akmIndexOverride = fake;
242
+ }
229
243
  export async function akmIndex(options) {
230
- return withIndexWriterLease({ purpose: "akm-index", signal: options?.signal }, async () => {
244
+ if (akmIndexOverride)
245
+ return akmIndexOverride(options);
246
+ return akmIndexReal(options);
247
+ }
248
+ async function akmIndexReal(options) {
249
+ const requestedAt = Date.now();
250
+ let acquiredAt = requestedAt;
251
+ return withIndexWriterLease({
252
+ purpose: "akm-index",
253
+ signal: options?.signal,
254
+ onWait: ({ waitedMs }) => {
255
+ options?.onProgress?.({
256
+ phase: "preflight",
257
+ message: `Waiting for index writer lease (${Math.round(waitedMs / 1000)}s elapsed).`,
258
+ });
259
+ },
260
+ onAcquired: ({ waitedMs }) => {
261
+ acquiredAt = requestedAt + waitedMs;
262
+ },
263
+ }, async () => {
231
264
  const stashDir = options?.stashDir || resolveStashDir();
232
265
  const onProgress = options?.onProgress ?? (() => { });
233
266
  const signal = options?.signal;
@@ -246,10 +279,17 @@ export async function akmIndex(options) {
246
279
  warnOnUnmigratedVaults(stashDir);
247
280
  // Ensure git stash caches are extracted before resolving stash dirs,
248
281
  // so their content directories exist on disk for the walker to discover.
282
+ const sourceCacheStart = Date.now();
283
+ onProgress({ phase: "preflight", message: "Hydrating source caches." });
249
284
  const { ensureSourceCaches, resolveSourceEntries } = await import("./search/search-source.js");
250
285
  await ensureSourceCaches(config, { force: full });
286
+ const sourceCacheEnd = Date.now();
251
287
  const allSourceEntries = resolveSourceEntries(stashDir, config);
252
288
  const allSourceDirs = allSourceEntries.map((s) => s.path);
289
+ onProgress({
290
+ phase: "preflight",
291
+ message: `Resolved ${allSourceDirs.length} stash source${allSourceDirs.length === 1 ? "" : "s"}.`,
292
+ });
253
293
  const t0 = Date.now();
254
294
  // Open database — pass embedding dimension from config if available
255
295
  const dbPath = getDbPath();
@@ -279,6 +319,8 @@ export async function akmIndex(options) {
279
319
  tLlmEnd: t0,
280
320
  tFtsEnd: t0,
281
321
  tEmbedEnd: t0,
322
+ tFinalizeStart: t0,
323
+ tFinalizeEnd: t0,
282
324
  },
283
325
  isIncremental,
284
326
  builtAtMs,
@@ -316,9 +358,15 @@ export async function akmIndex(options) {
316
358
  // After the normal index completes, remove entries whose source files no
317
359
  // longer exist on disk. Remote entries (empty file_path) are skipped.
318
360
  let cleanResult;
361
+ const cleanStart = Date.now();
319
362
  if (clean) {
363
+ onProgress({
364
+ phase: "finalize",
365
+ message: dryRun ? "Scanning for stale index entries (dry run)." : "Removing stale index entries.",
366
+ });
320
367
  cleanResult = runCleanPass(db, dryRun);
321
368
  }
369
+ const cleanEnd = Date.now();
322
370
  // ────────────────────────────────────────────────────────────────────────
323
371
  return {
324
372
  stashDir,
@@ -336,6 +384,12 @@ export async function akmIndex(options) {
336
384
  llmMs: timing.tLlmEnd - timing.tWalkEnd,
337
385
  embedMs: timing.tEmbedEnd - timing.tLlmEnd,
338
386
  ftsMs: timing.tFtsEnd - timing.tEmbedEnd,
387
+ finalizeMs: timing.tFinalizeEnd - timing.tFinalizeStart,
388
+ cleanMs: clean ? cleanEnd - cleanStart : 0,
389
+ preflightMs: timing.t0 - requestedAt,
390
+ leaseWaitMs: acquiredAt - requestedAt,
391
+ sourceCacheMs: sourceCacheEnd - sourceCacheStart,
392
+ endToEndMs: Date.now() - requestedAt,
339
393
  },
340
394
  ...(cleanResult !== undefined ? { clean: cleanResult } : {}),
341
395
  };
@@ -640,7 +694,7 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
640
694
  insertTransaction();
641
695
  return { scannedDirs, skippedDirs, generatedCount, warnings, dirsNeedingLlm };
642
696
  }
643
- async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, _enrich = false, reEnrich = false) {
697
+ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, reEnrich = false) {
644
698
  // Resolve per-pass LLM config via the unified shim. Returns undefined when
645
699
  // either no `akm.llm` is configured or the user opted this pass out via
646
700
  // `index.enrichment.llm = false`. (#208)
@@ -842,31 +896,44 @@ async function generateEmbeddingsForDb(db, config, onProgress, signal) {
842
896
  warnVerbose(`[embed] ${ref} (${chars} chars, est. ${tokens} tokens) → batch ${batchNum}/${totalBatches}`);
843
897
  }
844
898
  }
845
- const embeddings = await embedBatch(texts, config.embedding, signal);
846
- throwIfAborted(signal);
847
- // Wrap all embedding upserts in a single transaction so partial
848
- // state is rolled back on failure rather than leaving the table half-filled.
849
- let storedCount = 0;
850
- let skippedCount = 0;
851
- db.transaction(() => {
852
- for (let i = 0; i < allEntries.length; i++) {
853
- if (upsertEmbedding(db, allEntries[i].id, embeddings[i])) {
854
- storedCount++;
855
- }
856
- else {
857
- skippedCount++;
899
+ let heartbeatTimer;
900
+ try {
901
+ heartbeatTimer = setInterval(() => {
902
+ onProgress({
903
+ phase: "embeddings",
904
+ message: `Still generating embeddings for ${allEntries.length} entr${allEntries.length === 1 ? "y" : "ies"}; waiting on embedding provider.`,
905
+ });
906
+ }, 15000);
907
+ const embeddings = await embedBatch(texts, config.embedding, signal);
908
+ throwIfAborted(signal);
909
+ // Wrap all embedding upserts in a single transaction so partial
910
+ // state is rolled back on failure rather than leaving the table half-filled.
911
+ let storedCount = 0;
912
+ let skippedCount = 0;
913
+ db.transaction(() => {
914
+ for (let i = 0; i < allEntries.length; i++) {
915
+ if (upsertEmbedding(db, allEntries[i].id, embeddings[i])) {
916
+ storedCount++;
917
+ }
918
+ else {
919
+ skippedCount++;
920
+ }
858
921
  }
922
+ })();
923
+ if (skippedCount > 0) {
924
+ warn(`[embed] ${skippedCount} embedding${skippedCount === 1 ? "" : "s"} skipped (entry deleted between queue and write)`);
859
925
  }
860
- })();
861
- if (skippedCount > 0) {
862
- warn(`[embed] ${skippedCount} embedding${skippedCount === 1 ? "" : "s"} skipped (entry deleted between queue and write)`);
926
+ onProgress({
927
+ phase: "embeddings",
928
+ message: `Stored ${storedCount} embedding${storedCount === 1 ? "" : "s"}.`,
929
+ });
930
+ setMeta(db, "embeddingFingerprint", currentFingerprint);
931
+ return { success: true };
932
+ }
933
+ finally {
934
+ if (heartbeatTimer)
935
+ clearInterval(heartbeatTimer);
863
936
  }
864
- onProgress({
865
- phase: "embeddings",
866
- message: `Stored ${storedCount} embedding${storedCount === 1 ? "" : "s"}.`,
867
- });
868
- setMeta(db, "embeddingFingerprint", currentFingerprint);
869
- return { success: true };
870
937
  }
871
938
  catch (error) {
872
939
  const message = error instanceof Error ? error.message : String(error);
@@ -977,7 +1044,7 @@ function resolveIndexedFiles(dirPath, files, stash) {
977
1044
  for (const entry of stash.entries) {
978
1045
  const entryPath = entry.filename
979
1046
  ? path.join(dirPath, entry.filename)
980
- : matchEntryToFile(entry.name, fileBasenameMap, files);
1047
+ : matchEntryToFile(entry.name, fileBasenameMap);
981
1048
  if (entryPath)
982
1049
  resolved.add(entryPath);
983
1050
  }
@@ -1096,7 +1163,7 @@ export function buildFileBasenameMap(files) {
1096
1163
  * try matching the last segment
1097
1164
  * 3. No implicit file fallback: ambiguous legacy entries are skipped
1098
1165
  */
1099
- export function matchEntryToFile(entryName, fileMap, _files) {
1166
+ export function matchEntryToFile(entryName, fileMap) {
1100
1167
  // Exact match on entry name
1101
1168
  const exact = fileMap.get(entryName);
1102
1169
  if (exact)
@@ -0,0 +1,23 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import { ensureIndex } from "./ensure-index.js";
5
+ import { resolveSourceEntries } from "./search/search-source.js";
6
+ /** Resolve the active read sources using the same resolution rules as search/show. */
7
+ export function resolveReadSources(overrideStashDir, existingConfig) {
8
+ const sources = resolveSourceEntries(overrideStashDir, existingConfig);
9
+ return { sources, primarySource: sources[0] };
10
+ }
11
+ /** Ensure the primary source index is readable for reads, when a primary exists. */
12
+ export async function ensurePrimaryIndexForRead(primarySource) {
13
+ if (!primarySource?.path)
14
+ return false;
15
+ return ensureIndex(primarySource.path);
16
+ }
17
+ /**
18
+ * Convenience helper for callers that only need to ensure a read index from a
19
+ * configured stash path and default config.
20
+ */
21
+ export async function ensurePrimaryIndexFromConfig(overrideStashDir, existingConfig) {
22
+ return ensurePrimaryIndexForRead(resolveReadSources(overrideStashDir, existingConfig).primarySource);
23
+ }
@@ -0,0 +1,51 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Pure FTS5 query-string helpers, extracted from indexer/db/db.ts.
6
+ *
7
+ * These transform a raw user query into an FTS5-safe MATCH expression. They
8
+ * touch no database state, so they are unit-testable with zero DB setup.
9
+ */
10
+ /**
11
+ * Sanitize a raw user query into an FTS5-safe implicit-AND expression.
12
+ *
13
+ * Allows only characters safe in FTS5 queries: letters, digits, underscores,
14
+ * and whitespace. Everything else (hyphens, dots, quotes, parens, asterisks,
15
+ * colons, carets, @, !, etc.) is replaced with a space so that compound
16
+ * identifiers like "code-review" or "k8s.setup" become AND-joined tokens
17
+ * ("code review", "k8s setup") rather than triggering FTS5 syntax errors.
18
+ */
19
+ export function sanitizeFtsQuery(query) {
20
+ let sanitized = query.replace(/[^a-zA-Z0-9_\s]/g, " ");
21
+ // Neutralize the NEAR operator (FTS5 proximity syntax)
22
+ sanitized = sanitized.replace(/\bNEAR\b/g, " ");
23
+ const tokens = sanitized.split(/\s+/).filter((t) => t.length >= 1);
24
+ if (tokens.length === 0)
25
+ return "";
26
+ // Use implicit AND (space-separated tokens) for precision. FTS5 treats
27
+ // space-separated tokens as an implicit AND, matching only rows that
28
+ // contain ALL terms.
29
+ return tokens.join(" ");
30
+ }
31
+ /**
32
+ * Build a prefix query from an FTS5 query string by appending `*` to each
33
+ * token that is 3+ characters long. Tokens shorter than 3 characters are
34
+ * kept as-is (no prefix expansion) to avoid overly broad matches.
35
+ *
36
+ * Returns null if no tokens qualify for prefix expansion.
37
+ */
38
+ export function buildPrefixQuery(ftsQuery) {
39
+ const tokens = ftsQuery.split(/\s+/).filter(Boolean);
40
+ let hasPrefix = false;
41
+ const prefixTokens = tokens.map((t) => {
42
+ if (t.length >= 3) {
43
+ hasPrefix = true;
44
+ return `${t}*`;
45
+ }
46
+ return t;
47
+ });
48
+ if (!hasPrefix)
49
+ return null;
50
+ return prefixTokens.join(" ");
51
+ }
@@ -148,20 +148,28 @@ function isInsideGitRepo(dir) {
148
148
  * read (e.g. permission errors).
149
149
  */
150
150
  export function* walkMarkdownFiles(root) {
151
- let entries;
152
- try {
153
- entries = fs.readdirSync(root, { withFileTypes: true });
154
- }
155
- catch {
156
- return;
157
- }
158
- for (const entry of entries) {
159
- const full = path.join(root, entry.name);
160
- if (entry.isDirectory()) {
161
- yield* walkMarkdownFiles(full);
151
+ const stack = [root];
152
+ while (stack.length > 0) {
153
+ const current = stack.pop();
154
+ if (!current)
155
+ continue;
156
+ let entries;
157
+ try {
158
+ entries = fs.readdirSync(current, { withFileTypes: true });
162
159
  }
163
- else if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) {
164
- yield full;
160
+ catch {
161
+ continue;
162
+ }
163
+ for (const entry of entries) {
164
+ const full = path.join(current, entry.name);
165
+ if (entry.isSymbolicLink())
166
+ continue;
167
+ if (entry.isDirectory()) {
168
+ stack.push(full);
169
+ }
170
+ else if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) {
171
+ yield full;
172
+ }
165
173
  }
166
174
  }
167
175
  }
@@ -55,6 +55,11 @@ export function defaultWhich(bin, envSource = process.env) {
55
55
  }
56
56
  return undefined;
57
57
  }
58
+ let detectOverrides;
59
+ /** TEST-ONLY. Swap the detection implementations; pass undefined to restore. */
60
+ export function _setAgentDetectForTests(fakes) {
61
+ detectOverrides = fakes;
62
+ }
58
63
  /**
59
64
  * Probe every resolvable agent profile (built-ins plus user overrides)
60
65
  * for an installed CLI.
@@ -64,6 +69,8 @@ export function defaultWhich(bin, envSource = process.env) {
64
69
  * @param whichFn Binary lookup. Tests should inject a stub.
65
70
  */
66
71
  export function detectAgentCliProfiles(agent, whichFn = defaultWhich) {
72
+ if (detectOverrides?.detectAgentCliProfiles)
73
+ return detectOverrides.detectAgentCliProfiles(agent, whichFn);
67
74
  const profiles = listResolvedAgentProfiles(agent);
68
75
  return profiles.map((profile) => probeProfile(profile, whichFn));
69
76
  }
@@ -87,6 +94,8 @@ function probeProfile(profile, whichFn) {
87
94
  * writing `agent.default`.
88
95
  */
89
96
  export function pickDefaultAgentProfile(results, existingDefault) {
97
+ if (detectOverrides?.pickDefaultAgentProfile)
98
+ return detectOverrides.pickDefaultAgentProfile(results, existingDefault);
90
99
  if (existingDefault) {
91
100
  const match = results.find((r) => r.name === existingDefault && r.available);
92
101
  if (match)
@@ -3,7 +3,7 @@
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
4
  export { getCommandBuilder } from "./builders.js";
5
5
  export { DEFAULT_AGENT_TIMEOUT_MS, listAgentProfileNames, listResolvedAgentProfiles, requireAgentProfile, resolveAgentProfile, resolveDefaultProfileName, resolveProfileFromConfig, } from "./config.js";
6
- export { defaultWhich, detectAgentCliProfiles, pickDefaultAgentProfile } from "./detect.js";
6
+ export { _setAgentDetectForTests, defaultWhich, detectAgentCliProfiles, pickDefaultAgentProfile } from "./detect.js";
7
7
  export { listBuiltinModelAliases, resolveModel } from "./model-aliases.js";
8
8
  export { BUILTIN_AGENT_PROFILE_NAMES, getBuiltinAgentProfile, listBuiltinAgentProfiles, } from "./profiles.js";
9
9
  export { buildProposePrompt, buildReflectPrompt, buildSchemaRepairPrompt, extractDraftConfidence, parseAgentProposalPayload, } from "./prompts.js";
@@ -17,6 +17,7 @@
17
17
  import fs from "node:fs";
18
18
  import os from "node:os";
19
19
  import path from "node:path";
20
+ import { parseEmbeddedJsonResponse } from "../../core/parse.js";
20
21
  import { spawn as runtimeSpawn } from "../../runtime.js";
21
22
  import { getCommandBuilder } from "./builders.js";
22
23
  import { DEFAULT_AGENT_TIMEOUT_MS } from "./config.js";
@@ -346,72 +347,20 @@ export async function runAgent(profile, prompt, options = {}) {
346
347
  };
347
348
  }
348
349
  if (parseOutput === "json" && stdioMode === "captured") {
349
- // Strip <think> blocks and code fences, then try direct parse with
350
- // embedded-JSON fallback for local LLMs that emit prose around the payload.
351
- const cleaned = stdout
352
- .trim()
353
- .replace(/<think>[\s\S]*?<\/think>/gi, "")
354
- .trim()
355
- .replace(/^```(?:json)?\s*\n?/, "")
356
- .replace(/\n?```\s*$/, "")
357
- .trim();
358
- let parsed;
359
- try {
360
- parsed = JSON.parse(cleaned);
361
- }
362
- catch {
363
- // Fallback: extract the first balanced {…} from prose output.
364
- let found;
365
- for (let s = 0; s < cleaned.length; s++) {
366
- if (cleaned[s] !== "{")
367
- continue;
368
- let depth = 0, inStr = false, esc = false;
369
- for (let i = s; i < cleaned.length; i++) {
370
- const c = cleaned[i];
371
- if (inStr) {
372
- if (esc) {
373
- esc = false;
374
- }
375
- else if (c === "\\") {
376
- esc = true;
377
- }
378
- else if (c === '"') {
379
- inStr = false;
380
- }
381
- continue;
382
- }
383
- if (c === '"') {
384
- inStr = true;
385
- continue;
386
- }
387
- if (c === "{")
388
- depth++;
389
- if (c === "}") {
390
- depth--;
391
- if (depth === 0) {
392
- try {
393
- found = JSON.parse(cleaned.slice(s, i + 1));
394
- }
395
- catch { }
396
- break;
397
- }
398
- }
399
- }
400
- if (found !== undefined)
401
- break;
402
- }
403
- if (found === undefined) {
404
- return {
405
- ok: false,
406
- exitCode,
407
- stdout,
408
- stderr,
409
- durationMs,
410
- reason: "parse_error",
411
- error: "no JSON object found in agent output",
412
- };
413
- }
414
- parsed = found;
350
+ // Strip <think> blocks and code fences, then parse with embedded-JSON
351
+ // fallback for local LLMs that emit prose around the payload. Handles
352
+ // both top-level `{…}` and `[…]` structures.
353
+ const parsed = parseEmbeddedJsonResponse(stdout);
354
+ if (parsed === undefined) {
355
+ return {
356
+ ok: false,
357
+ exitCode,
358
+ stdout,
359
+ stderr,
360
+ durationMs,
361
+ reason: "parse_error",
362
+ error: "no JSON structure found in agent output",
363
+ };
415
364
  }
416
365
  return { ok: true, exitCode, stdout, stderr, durationMs, parsed };
417
366
  }
@@ -165,7 +165,19 @@ function isRetryable(err) {
165
165
  }
166
166
  return false;
167
167
  }
168
+ // ── Test seam ────────────────────────────────────────────────────────────────
169
+ // Swap-and-restore override. Inert in production; only tests call the setter.
170
+ let chatCompletionOverride;
171
+ /** TEST-ONLY. Swap the implementation of `chatCompletion`; pass undefined to restore. */
172
+ export function _setChatCompletionForTests(fake) {
173
+ chatCompletionOverride = fake;
174
+ }
168
175
  export async function chatCompletion(config, messages, options) {
176
+ if (chatCompletionOverride)
177
+ return chatCompletionOverride(config, messages, options);
178
+ return chatCompletionReal(config, messages, options);
179
+ }
180
+ async function chatCompletionReal(config, messages, options) {
169
181
  const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 120_000;
170
182
  const started = Date.now();
171
183
  try {
@@ -3,11 +3,26 @@
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
4
  import { embedCacheKey, getCachedEmbedding, setCachedEmbedding } from "./embedders/cache.js";
5
5
  import { DETERMINISTIC_EMBED_MODEL_ID, deterministicEmbed, isDeterministicEmbedEnabled, } from "./embedders/deterministic.js";
6
- import { DEFAULT_LOCAL_MODEL, isTransformersAvailable, LocalEmbedder } from "./embedders/local.js";
6
+ import { DEFAULT_LOCAL_MODEL, isTransformersAvailable as isTransformersAvailableReal, LocalEmbedder, } from "./embedders/local.js";
7
7
  import { hasRemoteEndpoint, RemoteEmbedder } from "./embedders/remote.js";
8
8
  // ── Re-exports (public API) ─────────────────────────────────────────────────
9
9
  export { clearEmbeddingCache } from "./embedders/cache.js";
10
- export { DEFAULT_LOCAL_MODEL, isTransformersAvailable } from "./embedders/local.js";
10
+ export { _setTransformersLoaderForTests, DEFAULT_LOCAL_MODEL } from "./embedders/local.js";
11
+ let embedderOverrides;
12
+ /** TEST-ONLY. Swap embedder implementations; pass undefined to restore. */
13
+ export function _setEmbedderForTests(fakes) {
14
+ embedderOverrides = fakes;
15
+ }
16
+ /**
17
+ * Check whether the @huggingface/transformers package is importable.
18
+ * Delegating wrapper around `./embedders/local`'s probe so tests can swap it
19
+ * via {@link _setEmbedderForTests}.
20
+ */
21
+ export function isTransformersAvailable() {
22
+ if (embedderOverrides?.isTransformersAvailable)
23
+ return embedderOverrides.isTransformersAvailable();
24
+ return isTransformersAvailableReal();
25
+ }
11
26
  // ── Singleton local embedder ────────────────────────────────────────────────
12
27
  // `_localEmbedder` is an intentional module-level singleton but constructed
13
28
  // lazily on first use. The underlying @huggingface/transformers pipeline is
@@ -40,6 +55,8 @@ export function resetLocalEmbedder() {
40
55
  * and embedding config. Repeated identical queries return the cached vector.
41
56
  */
42
57
  export async function embed(text, embeddingConfig, signal) {
58
+ if (embedderOverrides?.embed)
59
+ return embedderOverrides.embed(text, embeddingConfig, signal);
43
60
  // Deterministic mode (env-gated, test/bench only): model-free, stable.
44
61
  if (isDeterministicEmbedEnabled()) {
45
62
  return deterministicEmbed(text);
@@ -61,6 +78,8 @@ export async function embed(text, embeddingConfig, signal) {
61
78
  * which processes texts in chunks of 32 for genuine batched inference.
62
79
  */
63
80
  export async function embedBatch(texts, embeddingConfig, signal) {
81
+ if (embedderOverrides?.embedBatch)
82
+ return embedderOverrides.embedBatch(texts, embeddingConfig, signal);
64
83
  if (texts.length === 0)
65
84
  return [];
66
85
  // Deterministic mode (env-gated, test/bench only): model-free, stable.
@@ -104,6 +123,8 @@ export { cosineSimilarity } from "./embedders/types.js";
104
123
  * - No config: use `DEFAULT_LOCAL_MODEL` (the shared singleton model).
105
124
  */
106
125
  export function resolveEmbeddingModelId(embeddingConfig) {
126
+ if (embedderOverrides?.resolveEmbeddingModelId)
127
+ return embedderOverrides.resolveEmbeddingModelId(embeddingConfig);
107
128
  if (isDeterministicEmbedEnabled())
108
129
  return DETERMINISTIC_EMBED_MODEL_ID;
109
130
  if (!embeddingConfig)
@@ -117,6 +138,9 @@ export function resolveEmbeddingModelId(embeddingConfig) {
117
138
  * Check whether embedding is available with a detailed reason on failure.
118
139
  */
119
140
  export async function checkEmbeddingAvailability(embeddingConfig) {
141
+ if (embedderOverrides?.checkEmbeddingAvailability) {
142
+ return embedderOverrides.checkEmbeddingAvailability(embeddingConfig);
143
+ }
120
144
  // Deterministic mode (env-gated): always available — no model, no network.
121
145
  if (isDeterministicEmbedEnabled()) {
122
146
  return { available: true };
@@ -28,6 +28,12 @@ function isBatchTensor(v) {
28
28
  Array.isArray(v.dims) &&
29
29
  v.dims.length >= 2);
30
30
  }
31
+ const realTransformersLoader = () => import("@huggingface/transformers");
32
+ let transformersLoader = realTransformersLoader;
33
+ /** TEST-ONLY. Swap the transformers module loader; pass undefined to restore. */
34
+ export function _setTransformersLoaderForTests(fake) {
35
+ transformersLoader = fake ?? realTransformersLoader;
36
+ }
31
37
  const LOCAL_EMBEDDER_DTYPE = "fp32";
32
38
  const LOCAL_EMBEDDER_FALLBACK_DTYPE = "auto";
33
39
  /**
@@ -180,7 +186,7 @@ export class LocalEmbedder {
180
186
  }
181
187
  let pipeline;
182
188
  try {
183
- const mod = await import("@huggingface/transformers");
189
+ const mod = await transformersLoader();
184
190
  pipeline = mod.pipeline;
185
191
  }
186
192
  catch (importError) {