akm-cli 0.7.0-rc1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/src/cli.js +100 -16
  2. package/dist/src/commands/config-cli.js +42 -0
  3. package/dist/src/commands/history.js +78 -7
  4. package/dist/src/commands/registry-search.js +69 -6
  5. package/dist/src/commands/search.js +30 -3
  6. package/dist/src/commands/show.js +29 -0
  7. package/dist/src/commands/source-add.js +5 -1
  8. package/dist/src/commands/source-manage.js +7 -1
  9. package/dist/src/core/config.js +28 -0
  10. package/dist/src/indexer/db-search.js +1 -0
  11. package/dist/src/indexer/indexer.js +16 -2
  12. package/dist/src/indexer/matchers.js +1 -1
  13. package/dist/src/indexer/search-source.js +4 -2
  14. package/dist/src/integrations/agent/profiles.js +1 -1
  15. package/dist/src/integrations/agent/spawn.js +67 -16
  16. package/dist/src/integrations/github.js +9 -3
  17. package/dist/src/llm/embedders/remote.js +37 -3
  18. package/dist/src/output/cli-hints.js +15 -2
  19. package/dist/src/output/renderers.js +3 -1
  20. package/dist/src/output/shapes.js +8 -1
  21. package/dist/src/output/text.js +156 -3
  22. package/dist/src/registry/build-index.js +5 -4
  23. package/dist/src/registry/providers/static-index.js +3 -1
  24. package/dist/src/setup/setup.js +9 -0
  25. package/dist/src/wiki/wiki.js +54 -6
  26. package/dist/src/workflows/runs.js +37 -3
  27. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +1 -1
  28. package/dist/tests/bench/attribution.test.js +24 -23
  29. package/dist/tests/bench/cleanup.js +31 -0
  30. package/dist/tests/bench/cli.js +366 -31
  31. package/dist/tests/bench/cli.test.js +282 -14
  32. package/dist/tests/bench/corpus.js +3 -0
  33. package/dist/tests/bench/corpus.test.js +10 -10
  34. package/dist/tests/bench/doctor.js +525 -0
  35. package/dist/tests/bench/driver.js +77 -22
  36. package/dist/tests/bench/driver.test.js +142 -1
  37. package/dist/tests/bench/environment.js +233 -0
  38. package/dist/tests/bench/environment.test.js +199 -0
  39. package/dist/tests/bench/evolve.js +67 -0
  40. package/dist/tests/bench/evolve.test.js +12 -4
  41. package/dist/tests/bench/failure-modes.test.js +52 -3
  42. package/dist/tests/bench/feedback-integrity.test.js +3 -2
  43. package/dist/tests/bench/leakage.test.js +105 -2
  44. package/dist/tests/bench/learning-curve.test.js +3 -2
  45. package/dist/tests/bench/metrics.js +102 -26
  46. package/dist/tests/bench/metrics.test.js +10 -4
  47. package/dist/tests/bench/opencode-config.js +194 -0
  48. package/dist/tests/bench/opencode-config.test.js +370 -0
  49. package/dist/tests/bench/report.js +73 -9
  50. package/dist/tests/bench/report.test.js +59 -10
  51. package/dist/tests/bench/run-config.js +355 -0
  52. package/dist/tests/bench/run-config.test.js +298 -0
  53. package/dist/tests/bench/run-curate-test.js +32 -0
  54. package/dist/tests/bench/run-failing-tasks.js +56 -0
  55. package/dist/tests/bench/run-full-bench.js +51 -0
  56. package/dist/tests/bench/run-items36-targeted.js +69 -0
  57. package/dist/tests/bench/run-nano-quick.js +42 -0
  58. package/dist/tests/bench/run-waveg-targeted.js +62 -0
  59. package/dist/tests/bench/runner.js +257 -94
  60. package/dist/tests/bench/tmp.js +90 -0
  61. package/dist/tests/bench/trajectory.js +2 -2
  62. package/dist/tests/bench/verifier.js +6 -1
  63. package/dist/tests/bench/workflow-spec.js +11 -24
  64. package/dist/tests/bench/workflow-spec.test.js +1 -1
  65. package/dist/tests/bench/workflow-trace.js +34 -0
  66. package/dist/tests/cli-errors.test.js +1 -0
  67. package/dist/tests/commands/history.test.js +195 -0
  68. package/dist/tests/config.test.js +25 -0
  69. package/dist/tests/e2e.test.js +23 -2
  70. package/dist/tests/fixtures/stashes/load.js +1 -1
  71. package/dist/tests/fixtures/stashes/load.test.js +11 -2
  72. package/dist/tests/indexer.test.js +12 -1
  73. package/dist/tests/output-baseline.test.js +2 -1
  74. package/dist/tests/output-shapes-unit.test.js +3 -1
  75. package/dist/tests/registry-build-index.test.js +17 -1
  76. package/dist/tests/registry-providers/static-index.test.js +34 -0
  77. package/dist/tests/registry-search.test.js +200 -0
  78. package/dist/tests/remember-frontmatter.test.js +11 -13
  79. package/dist/tests/source-qa-fixes.test.js +18 -0
  80. package/dist/tests/source-registry.test.js +3 -3
  81. package/dist/tests/source-source.test.js +61 -1
  82. package/dist/tests/workflow-qa-fixes.test.js +18 -0
  83. package/package.json +1 -1
@@ -120,6 +120,8 @@ export function formatPlain(command, result, detail) {
120
120
  const flagText = flags.length > 0 ? ` [${flags.join(", ")}]` : "";
121
121
  lines.push(`[${kind}] ${name}${ver}${prov}${flagText}`);
122
122
  }
123
+ lines.push("");
124
+ lines.push("To search: akm search '<query>' | To view an asset: akm show <ref>");
123
125
  return lines.join("\n");
124
126
  }
125
127
  case "add": {
@@ -597,6 +599,10 @@ export function formatHistoryPlain(r) {
597
599
  headerParts.push(`since: ${r.since}`);
598
600
  const totalCount = typeof r.totalCount === "number" ? r.totalCount : entries.length;
599
601
  headerParts.push(`${totalCount} event(s)`);
602
+ // Show active event sources so operators know which streams were consulted.
603
+ if (Array.isArray(r.sources) && r.sources.length > 0) {
604
+ headerParts.push(`sources: ${r.sources.join(", ")}`);
605
+ }
600
606
  const header = headerParts.join(" ");
601
607
  if (entries.length === 0) {
602
608
  const scope = typeof r.ref === "string" && r.ref ? ` for ${r.ref}` : "";
@@ -627,6 +633,9 @@ function formatShowPlain(r, detail) {
627
633
  if (r.type || r.name) {
628
634
  lines.push(`# ${String(r.type ?? "asset")}: ${String(r.name ?? "unknown")}`);
629
635
  }
636
+ if (r.path && r.editable !== false) {
637
+ lines.push(`file: ${String(r.path)}`);
638
+ }
630
639
  if (r.origin !== undefined)
631
640
  lines.push(`# origin: ${String(r.origin)}`);
632
641
  if (r.action)
@@ -677,7 +686,10 @@ function formatShowPlain(r, detail) {
677
686
  const id = typeof step.id === "string" ? step.id : "unknown";
678
687
  lines.push(` ${index + 1}. ${title} [${id}]`);
679
688
  if (typeof step.instructions === "string" && step.instructions.trim()) {
680
- lines.push(` instructions: ${step.instructions.replace(/\n+/g, " ").trim()}`);
689
+ const instrLines = step.instructions.trim().split("\n");
690
+ lines.push(` instructions: ${instrLines[0]}`);
691
+ for (const instrLine of instrLines.slice(1))
692
+ lines.push(` ${instrLine}`);
681
693
  }
682
694
  if (Array.isArray(step.completionCriteria) && step.completionCriteria.length > 0) {
683
695
  lines.push(" completion:");
@@ -692,8 +704,94 @@ function formatShowPlain(r, detail) {
692
704
  lines.push("");
693
705
  lines.push(...payloads);
694
706
  }
707
+ // REC-01 / REC-09: Append a type-specific directive so agents apply the
708
+ // content rather than substituting training-data approximations.
709
+ const assetType = typeof r.type === "string" ? r.type : null;
710
+ const assetRef = typeof r.name === "string" && assetType ? `${assetType}:${r.name}` : null;
711
+ // Show-loop detection: if the agent has shown this asset 3+ times without
712
+ // writing anything, surface a warning so it stops cycling and acts.
713
+ const showLoopCount = typeof r.showLoopWarning === "number" ? r.showLoopWarning : 0;
714
+ if (showLoopCount >= 3) {
715
+ lines.push("");
716
+ lines.push(`WARNING: You have shown this asset ${showLoopCount} times without completing the task.`);
717
+ lines.push("Stop re-reading — you have the information you need. Act on it now:");
718
+ lines.push(" - Write your output file using the content above.");
719
+ lines.push(` - If this asset does not contain what you need, run \`akm feedback '${assetRef ?? "<ref>"}' --negative\` and search for a different asset.`);
720
+ }
721
+ if (assetType === "skill" || assetType === "knowledge") {
722
+ const activeRun = r.activeRun;
723
+ if (activeRun) {
724
+ // Active workflow: redirect agent to workflow commands instead of direct apply
725
+ lines.unshift(` akm workflow complete '${activeRun.runId}'${activeRun.stepId ? ` --step '${activeRun.stepId}'` : ""}`);
726
+ lines.unshift("Read this schema, then follow your workflow step's instructions to edit the workspace file. When done, mark the step complete:");
727
+ lines.unshift(`WORKFLOW ACTIVE — schema shown as reference (run: ${activeRun.runId})`);
728
+ lines.unshift("---");
729
+ lines.unshift("");
730
+ // Still show feedback line at the end but skip the APPLY directive
731
+ lines.push("");
732
+ lines.push(`Run \`akm feedback ${assetRef ? `'${assetRef}'` : "<ref>"} --positive\` if the step succeeds, or \`--negative\` if this schema did not help.`);
733
+ }
734
+ else {
735
+ // No active workflow: show the APPLY directive. Branch on whether this
736
+ // skill primarily teaches CLI commands (shell output) vs YAML schema.
737
+ const preApplyLines = [...lines];
738
+ lines.push("");
739
+ lines.push("---");
740
+ if (isCommandOutputSkill(preApplyLines)) {
741
+ lines.push("APPLY (only if no workflow step is required for this task):");
742
+ lines.push(" 1. Identify the output file from README.md (typically commands.txt).");
743
+ lines.push(" 2. Write the exact command syntax from the code blocks above — replace every placeholder (`<name>`, `<value>`) with a real, concrete value from your task context. Do not write placeholder text.");
744
+ lines.push(" 3. Each command should be on a single line (no backslash line continuation unless the verifier expects it).");
745
+ lines.push(`Run \`akm feedback ${assetRef ? `'${assetRef}'` : "<ref>"} --positive\` after the task succeeds, or \`--negative\` if this reference did not contain the needed command syntax.`);
746
+ }
747
+ else {
748
+ lines.push("APPLY (only if no workflow step is required for this task):");
749
+ lines.push(" 1. Identify the target file from README.md — write or edit it. If the file does not yet exist, CREATE it with the full structure from this schema.");
750
+ lines.push(" 2. Add/edit the fields shown above using the exact field names from this schema.");
751
+ lines.push(" 3. COPY the exact YAML structure and field names from the code blocks above — do not substitute synonyms or invent nesting. Replace every placeholder value with a real, concrete value from your task context. Do not leave any field as null, empty, or a placeholder.");
752
+ lines.push(`Run \`akm feedback ${assetRef ? `'${assetRef}'` : "<ref>"} --positive\` after the task succeeds, or \`--negative\` if the task fails after following this guidance.`);
753
+ }
754
+ }
755
+ }
756
+ else if (assetType === "workflow") {
757
+ const workflowName = typeof r.name === "string" ? r.name : null;
758
+ const workflowRef = workflowName ? `workflow:${workflowName}` : "<ref>";
759
+ // Insert action directive BEFORE the workflow content by prepending to lines at the
760
+ // separator position. We find where the header ends and insert after the first `---`.
761
+ // Since lines already contain the full content at this point, we locate the insertion
762
+ // index: right after the first `---` separator if present, otherwise after the header.
763
+ const separatorIdx = lines.indexOf("---");
764
+ const insertIdx = separatorIdx >= 0 ? separatorIdx + 1 : r.type || r.name ? 1 : 0;
765
+ const actionDirective = [
766
+ `ACTION REQUIRED: Do not execute steps manually from this output.`,
767
+ `Run \`akm workflow next '${workflowRef}'\` to get your current step with exact instructions.`,
768
+ "---",
769
+ ];
770
+ lines.splice(insertIdx, 0, "", ...actionDirective);
771
+ lines.push("");
772
+ lines.push("---");
773
+ lines.push(`NEXT STEP: Run \`akm workflow next '${workflowRef}'\` to see the current workflow step.`);
774
+ lines.push("Do not edit workspace files before completing each step with `akm workflow complete`.");
775
+ }
695
776
  return lines.length > 0 ? lines.join("\n") : null;
696
777
  }
778
+ /**
779
+ * Detect whether a skill's rendered content primarily teaches CLI commands
780
+ * rather than YAML schema. Used to select the right APPLY directive variant.
781
+ *
782
+ * Heuristic: count code-block lines that start with known shell command
783
+ * prefixes vs lines that look like YAML key-value pairs. If CLI lines
784
+ * outnumber YAML lines (and there is at least one CLI line), treat the
785
+ * skill as command-output.
786
+ */
787
+ function isCommandOutputSkill(lines) {
788
+ const codeLines = lines.filter((l) => l.startsWith(" ") || l.startsWith("\t") || /^`/.test(l));
789
+ const cliPattern = /^(az |kubectl |docker |git |helm |terraform |aws |gcloud )/;
790
+ const yamlPattern = /^\s+\w+:/;
791
+ const cliCount = codeLines.filter((l) => cliPattern.test(l.trim())).length;
792
+ const yamlCount = codeLines.filter((l) => yamlPattern.test(l)).length;
793
+ return cliCount > yamlCount && cliCount > 0;
794
+ }
697
795
  export function formatWorkflowListPlain(result) {
698
796
  const runs = Array.isArray(result.runs) ? result.runs : [];
699
797
  if (runs.length === 0) {
@@ -747,7 +845,10 @@ export function formatWorkflowNextPlain(result) {
747
845
  const lines = base ? [base, "", "next:"] : ["next:"];
748
846
  lines.push(` ${String(step.title ?? "Untitled step")} [${String(step.id ?? "unknown")}]`);
749
847
  if (typeof step.instructions === "string" && step.instructions.trim()) {
750
- lines.push(` instructions: ${step.instructions.replace(/\n+/g, " ").trim()}`);
848
+ const instrLines = step.instructions.trim().split("\n");
849
+ lines.push(` instructions: ${instrLines[0]}`);
850
+ for (const instrLine of instrLines.slice(1))
851
+ lines.push(` ${instrLine}`);
751
852
  }
752
853
  const completion = Array.isArray(step.completionCriteria) ? step.completionCriteria : [];
753
854
  if (completion.length > 0) {
@@ -756,6 +857,25 @@ export function formatWorkflowNextPlain(result) {
756
857
  lines.push(` - ${String(criterion)}`);
757
858
  }
758
859
  }
860
+ // T2-3: surface run-id as labeled field
861
+ const run = typeof result.run === "object" && result.run !== null ? result.run : undefined;
862
+ const runId = typeof run?.id === "string" ? run.id : null;
863
+ const stepId = typeof step?.id === "string" ? step.id : null;
864
+ if (runId) {
865
+ lines.push("");
866
+ lines.push(`runId: ${runId}`);
867
+ }
868
+ // T1-6: complete command
869
+ if (runId && stepId) {
870
+ lines.push("");
871
+ lines.push("COMPLETE THIS STEP:");
872
+ lines.push(` akm workflow complete '${runId}' --step '${stepId}'`);
873
+ }
874
+ else if (runId) {
875
+ lines.push("");
876
+ lines.push("COMPLETE THIS STEP:");
877
+ lines.push(` akm workflow complete '${runId}' --step '<step-id>'`);
878
+ }
759
879
  return lines.join("\n");
760
880
  }
761
881
  export function formatSearchPlain(r, detail) {
@@ -763,7 +883,13 @@ export function formatSearchPlain(r, detail) {
763
883
  const registryHits = r.registryHits ?? [];
764
884
  const allHits = [...hits, ...registryHits];
765
885
  if (allHits.length === 0) {
766
- return r.tip ? String(r.tip) : "No results found.";
886
+ const warnings = Array.isArray(r.warnings) ? r.warnings : [];
887
+ const hasSetupWarning = warnings.some((w) => String(w).toLowerCase().includes("no stash") || String(w).toLowerCase().includes("not configured"));
888
+ if (hasSetupWarning) {
889
+ return "No stash configured. Run `akm init` to create your working stash, then `akm index` to build the search index.";
890
+ }
891
+ const base = r.tip ? String(r.tip) : "No matches found.";
892
+ return `${base}\nTry:\n akm search '<broader-term>' # fewer keywords\n akm list # see all configured sources\n akm curate '<query>' # let akm select the best match`;
767
893
  }
768
894
  const lines = [];
769
895
  for (const hit of allHits) {
@@ -822,6 +948,27 @@ export function formatSearchPlain(r, detail) {
822
948
  if (parts.length > 0)
823
949
  lines.push(`timing: ${parts.join(", ")}`);
824
950
  }
951
+ // REC-02: When stash hits exist, tell the agent the next required step so it
952
+ // doesn't skip `akm show` and write from training memory instead.
953
+ if (hits.length >= 1) {
954
+ // Prefer skill/command/agent type hits for the "Next:" ref — knowledge docs are
955
+ // supplementary context, not the authoritative schema agents should load first.
956
+ const preferredHit = hits.find((h) => h.type === "skill" || h.type === "command" || h.type === "agent") ?? hits[0];
957
+ const topRef = typeof preferredHit.ref === "string" ? preferredHit.ref : null;
958
+ const hasWorkflowHit = hits.some((h) => h.type === "workflow");
959
+ if (topRef) {
960
+ if (hasWorkflowHit) {
961
+ const workflowRef = hits.find((h) => h.type === "workflow");
962
+ const wfRef = workflowRef && typeof workflowRef.ref === "string" ? workflowRef.ref : topRef;
963
+ lines.push(`Next: akm show '${topRef}' | To start a workflow: akm workflow next '${wfRef}'`);
964
+ lines.push("After running workflow next: follow each step and run `akm workflow complete <run-id> --step <step-id>` when done.");
965
+ }
966
+ else {
967
+ lines.push(`Next: akm show '${topRef}'`);
968
+ lines.push("After reading the asset: check whether a workflow applies before editing — if so, use `akm workflow next` instead.");
969
+ }
970
+ }
971
+ }
825
972
  return lines.join("\n").trimEnd();
826
973
  }
827
974
  export function formatWikiListPlain(r) {
@@ -959,5 +1106,11 @@ export function formatCuratePlain(r, detail) {
959
1106
  lines.push(`- ${String(warning)}`);
960
1107
  }
961
1108
  }
1109
+ lines.push("");
1110
+ lines.push("Next steps:");
1111
+ lines.push(" 1. Run `akm show <ref>` for the best result above to read the full schema.");
1112
+ lines.push(" 2. Edit the workspace file using the schema field names and your task-specific values.");
1113
+ lines.push(" 3. Run `akm feedback <ref> --positive` when the task succeeds.");
1114
+ lines.push("To search further: akm search '<query>'");
962
1115
  return lines.join("\n");
963
1116
  }
@@ -19,15 +19,16 @@ import { detectStashRoot } from "../sources/providers/provider-utils";
19
19
  import { extractTarGzSecure } from "../sources/providers/tar-utils";
20
20
  import { parseRegistryIndex } from "./providers/static-index";
21
21
  const DEFAULT_NPM_REGISTRY_BASE = "https://registry.npmjs.org";
22
- const DEFAULT_MANUAL_ENTRIES_PATH = path.resolve("manual-entries.json");
23
- const DEFAULT_OUTPUT_PATH = path.resolve("index.json");
24
22
  const REQUIRED_KEYWORDS = ["akm-stash"];
25
23
  const GITHUB_TOPICS = ["akm-stash"];
26
24
  const EXCLUDED_REPOS = new Set(["itlackey/akm"]);
27
25
  const EXCLUDED_NPM_PACKAGES = new Set(["akm-cli"]);
28
26
  const EMPTY_INSPECTION = {};
27
+ function getDefaultRegistryBuildDir() {
28
+ return path.join(getCacheDir(), "registry-build");
29
+ }
29
30
  export async function buildRegistryIndex(options) {
30
- const manualEntriesPath = path.resolve(options?.manualEntriesPath ?? DEFAULT_MANUAL_ENTRIES_PATH);
31
+ const manualEntriesPath = path.resolve(options?.manualEntriesPath ?? path.join(getDefaultRegistryBuildDir(), "manual-entries.json"));
31
32
  const npmRegistryBase = trimTrailingSlash(options?.npmRegistryBase ?? DEFAULT_NPM_REGISTRY_BASE);
32
33
  const githubApiBase = trimTrailingSlash(options?.githubApiBase ?? GITHUB_API_BASE);
33
34
  const [manualKits, npmKits, githubKits] = await Promise.all([
@@ -55,7 +56,7 @@ export async function buildRegistryIndex(options) {
55
56
  };
56
57
  }
57
58
  export function writeRegistryIndex(index, outPath) {
58
- const resolved = path.resolve(outPath ?? DEFAULT_OUTPUT_PATH);
59
+ const resolved = path.resolve(outPath ?? path.join(getDefaultRegistryBuildDir(), "index.json"));
59
60
  fs.mkdirSync(path.dirname(resolved), { recursive: true });
60
61
  fs.writeFileSync(resolved, `${JSON.stringify(index, null, 2)}\n`, "utf8");
61
62
  return resolved;
@@ -190,7 +190,9 @@ export function parseRegistryIndex(data) {
190
190
  if (typeof data !== "object" || data === null || Array.isArray(data))
191
191
  return null;
192
192
  const obj = data;
193
- if (typeof obj.version !== "number" || obj.version !== 3)
193
+ // Accept version 2 and 3 both use the same stashes[] wire format.
194
+ // The live official registry currently publishes version 2.
195
+ if (typeof obj.version !== "number" || (obj.version !== 2 && obj.version !== 3))
194
196
  return null;
195
197
  if (typeof obj.updatedAt !== "string")
196
198
  return null;
@@ -283,6 +283,7 @@ async function stepOllama(current) {
283
283
  mxbai: 1024,
284
284
  minilm: 384,
285
285
  bge: 384,
286
+ qwen3: 1024,
286
287
  };
287
288
  const guessedDim = Object.entries(knownDims).find(([k]) => embChoice.includes(k))?.[1] ?? 384;
288
289
  p.note("Embedding dimension must match the model. Common values: 384 (BGE small), 768 (BGE base), 1024 (BGE large). Press Enter to accept the detected default.", "Embedding dimension");
@@ -302,6 +303,14 @@ async function stepOllama(current) {
302
303
  model: embChoice,
303
304
  dimension: Number(dimChoice),
304
305
  };
306
+ p.note([
307
+ "Recommended Qwen embedding models (modern, high context support):",
308
+ " • qwen3-embedding-0.6b — fast and lightweight (ollama pull qwen3-embedding-0.6b)",
309
+ " • qwen3-embedding-4b — higher quality (ollama pull qwen3-embedding-4b)",
310
+ "",
311
+ "For long documents (wiki pages, large files), set context length to avoid 400 errors:",
312
+ " akm config set embedding.contextLength 8192",
313
+ ].join("\n"), "Embedding tips");
305
314
  }
306
315
  // else: undefined → use built-in local
307
316
  // Surface Ollama details to the LLM step so it can offer Ollama as a preset.
@@ -11,6 +11,32 @@
11
11
  * Principle: "akm surfaces. The agent writes." akm owns lifecycle, raw-slug
12
12
  * generation, structural lint, and `index.md` regeneration. The agent uses
13
13
  * its native file tools for every other page operation.
14
+ *
15
+ * ## Canonical wiki content contract
16
+ *
17
+ * The three "infrastructure" files at the wiki root — `schema.md`, `index.md`,
18
+ * and `log.md` — are excluded from all user-facing content surfaces:
19
+ *
20
+ * | Surface | schema/index/log | raw/<slug>.md | <page>.md |
21
+ * | -------------------- | ---------------- | ------------- | --------- |
22
+ * | `wiki pages` | excluded | included | included |
23
+ * | scoped wiki search | excluded | included | included |
24
+ * | stash-wide FTS index | excluded | included | included |
25
+ * | `wiki lint` | excluded | tracked | tracked |
26
+ *
27
+ * `raw/` files are first-class addressable content (`wiki:<n>/raw/<slug>`),
28
+ * searchable, and listed. They are NOT authored pages — they are source
29
+ * material the agent turns into pages. `lint` tracks whether each raw file
30
+ * has been cited by a page's `sources:` frontmatter field.
31
+ *
32
+ * ## Regeneration contract
33
+ *
34
+ * `regenerateWikiIndex` / `regenerateAllWikiIndexes` apply ONLY to
35
+ * stash-owned wikis (directories under `<stashDir>/wikis/`). External wikis
36
+ * registered via `akm wiki register` are read-only caches; mutating their
37
+ * `index.md` would corrupt source-of-truth content that akm does not own.
38
+ * The indexer therefore calls `regenerateAllWikiIndexes(stashDir)` — which
39
+ * only iterates `<stashDir>/wikis/` — and never touches registered sources.
14
40
  */
15
41
  import fs from "node:fs";
16
42
  import path from "node:path";
@@ -496,10 +522,15 @@ function readPageFrontmatter(absPath) {
496
522
  return out;
497
523
  }
498
524
  /**
499
- * List the addressable markdown entries in a wiki, excluding only the
500
- * infrastructure files `schema.md`, `index.md`, and `log.md`. This includes
501
- * both authored pages and `raw/` sources so `wiki pages` can inventory content
502
- * written via `akm wiki stash`.
525
+ * List all addressable wiki content entries.
526
+ *
527
+ * Per the canonical wiki contract: `schema.md`, `index.md`, and `log.md` at
528
+ * the wiki root are infrastructure files and are excluded. Everything else —
529
+ * authored pages AND `raw/<slug>.md` sources — is included and addressable as
530
+ * `wiki:<name>/<rel-path-without-.md>`.
531
+ *
532
+ * Callers that need to distinguish authored pages from raw sources should
533
+ * check whether the returned `name` starts with `"raw/"`.
503
534
  */
504
535
  export function listPages(stashDir, name) {
505
536
  const wikiDir = resolveWikiSource(stashDir, name).path;
@@ -521,6 +552,10 @@ export function listPages(stashDir, name) {
521
552
  * Uses `akmSearch({ type: "wiki" })` to reuse the full FTS5+boost pipeline,
522
553
  * then drops hits that aren't inside `wikis/<name>/`. No parallel scorer.
523
554
  *
555
+ * Per the canonical wiki contract: infrastructure files (`schema.md`,
556
+ * `index.md`, `log.md`) at the wiki root are excluded. `raw/<slug>.md`
557
+ * sources are included — they are first-class addressable content.
558
+ *
524
559
  * When the index is absent (e.g. fresh stash), `akmSearch` falls back to its
525
560
  * substring walker; hits still come through path-filtered here.
526
561
  */
@@ -795,7 +830,13 @@ export function lintWiki(stashDir, name) {
795
830
  }
796
831
  // ── Index regeneration ─────────────────────────────────────────────────────
797
832
  /**
798
- * Rebuild a wiki's `index.md` from its pages' frontmatter.
833
+ * Rebuild a stash-owned wiki's `index.md` from its pages' frontmatter.
834
+ *
835
+ * This function uses `resolveWikiDir` (not `resolveWikiSource`) so it only
836
+ * ever operates on the stash-owned path `<stashDir>/wikis/<name>/`. External
837
+ * wikis registered via `akm wiki register` are never regenerated here — they
838
+ * are read-only caches. See the canonical wiki contract at the top of this
839
+ * file for the full regeneration rule.
799
840
  *
800
841
  * Pages are grouped by `pageKind` (falling back to `uncategorised`) and
801
842
  * listed alphabetically inside each group. If the wiki directory doesn't
@@ -861,7 +902,14 @@ export function regenerateWikiIndex(stashDir, name) {
861
902
  }
862
903
  }
863
904
  /**
864
- * Regenerate `index.md` for every wiki found under `<stashDir>/wikis/`.
905
+ * Regenerate `index.md` for every stash-owned wiki under `<stashDir>/wikis/`.
906
+ *
907
+ * Per the canonical wiki contract: regeneration applies ONLY to stash-owned
908
+ * wikis. External wikis registered via `akm wiki register` are read-only
909
+ * caches whose source-of-truth lives outside this stash; mutating their
910
+ * `index.md` would corrupt content that akm does not own. Those wikis
911
+ * therefore appear only in the FTS index (read), never in regeneration
912
+ * (write).
865
913
  *
866
914
  * Called from `akmIndex()` as a side effect after the FTS rebuild. Never
867
915
  * throws; returns the list of wiki names that were regenerated.
@@ -3,6 +3,7 @@ import fs from "node:fs";
3
3
  import { parseAssetRef } from "../core/asset-ref";
4
4
  import { loadConfig } from "../core/config";
5
5
  import { NotFoundError, UsageError } from "../core/errors";
6
+ import { appendEvent } from "../core/events";
6
7
  import { getDbPath } from "../core/paths";
7
8
  import { closeDatabase, openDatabase } from "../indexer/db";
8
9
  import { resolveSourceEntries } from "../indexer/search-source";
@@ -32,7 +33,13 @@ export async function startWorkflowRun(ref, params = {}) {
32
33
  insertStep.run(runId, step.id, step.title, step.instructions, step.completionCriteria ? JSON.stringify(step.completionCriteria) : null, step.sequenceIndex ?? 0);
33
34
  }
34
35
  })();
35
- return getWorkflowStatus(runId);
36
+ const result = getWorkflowStatus(runId);
37
+ appendEvent({
38
+ eventType: "workflow_started",
39
+ ref: ref,
40
+ metadata: { runId: result.run.id, title: result.run.workflowTitle },
41
+ });
42
+ return result;
36
43
  }
37
44
  finally {
38
45
  closeWorkflowDatabase(workflowDb);
@@ -173,7 +180,16 @@ export function completeWorkflowStep(input) {
173
180
  completed_at: state.completedAt,
174
181
  };
175
182
  })();
176
- return buildWorkflowRunDetail(updatedRun, refreshedSteps);
183
+ const detail = buildWorkflowRunDetail(updatedRun, refreshedSteps);
184
+ appendEvent({
185
+ eventType: "workflow_step_completed",
186
+ ref: detail.run.workflowRef,
187
+ metadata: { runId: input.runId, stepId: input.stepId, notes: input.notes },
188
+ });
189
+ if (detail.run.status === "completed") {
190
+ appendEvent({ eventType: "workflow_finished", ref: detail.run.workflowRef, metadata: { runId: input.runId } });
191
+ }
192
+ return detail;
177
193
  }
178
194
  finally {
179
195
  closeWorkflowDatabase(workflowDb);
@@ -187,6 +203,9 @@ async function resolveRunSpecifier(db, specifier, params) {
187
203
  }
188
204
  return { run: explicitRun, autoStarted: false };
189
205
  }
206
+ if (!specifier.includes(":")) {
207
+ throw new NotFoundError(`Workflow run "${specifier}" not found.`, "WORKFLOW_NOT_FOUND");
208
+ }
190
209
  const parsed = parseAssetRef(specifier);
191
210
  if (parsed.type !== "workflow") {
192
211
  throw new UsageError(`Expected a workflow ref or workflow run id, got "${specifier}".`);
@@ -316,7 +335,7 @@ function resolveWorkflowEntryId(sourcePath, ref) {
316
335
  function readWorkflowRun(db, runId) {
317
336
  const run = db.prepare("SELECT * FROM workflow_runs WHERE id = ?").get(runId);
318
337
  if (!run) {
319
- throw new NotFoundError(`Workflow run not found: ${runId}`);
338
+ throw new NotFoundError(`Workflow run "${runId}" not found.`, "WORKFLOW_NOT_FOUND");
320
339
  }
321
340
  return run;
322
341
  }
@@ -416,3 +435,18 @@ function parseJsonArray(value) {
416
435
  }
417
436
  return undefined;
418
437
  }
438
+ export function getActiveWorkflowRun() {
439
+ try {
440
+ const workflowDb = openWorkflowDatabase();
441
+ const row = workflowDb
442
+ .query("SELECT id, current_step_id, workflow_ref FROM workflow_runs WHERE status IN ('active', 'blocked') ORDER BY updated_at DESC LIMIT 1")
443
+ .get();
444
+ closeWorkflowDatabase(workflowDb);
445
+ if (!row)
446
+ return null;
447
+ return { runId: row.id, stepId: row.current_step_id, workflowRef: row.workflow_ref };
448
+ }
449
+ catch {
450
+ return null; // fail-open: never crash show output due to DB error
451
+ }
452
+ }
@@ -81,7 +81,7 @@ function listAgentSourceFiles() {
81
81
  */
82
82
  function buildImportRegex(pkg) {
83
83
  const escaped = pkg.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
84
- return new RegExp(String.raw `(?:from|import\(|require\()\s*['"]` + escaped + `(?:/[^'"]*)?['"]`);
84
+ return new RegExp(`${String.raw `(?:from|import\(|require\()\s*['"]` + escaped}(?:/[^'"]*)?['"]`);
85
85
  }
86
86
  describe("regression guard: src/integrations/agent/** never imports LLM SDKs", () => {
87
87
  test("the agent integration tree exists", () => {
@@ -43,13 +43,14 @@ function makeReport(akmRuns) {
43
43
  commit: "abc",
44
44
  model: "m",
45
45
  corpus: { domains: 1, tasks: 1, slice: "all", seedsPerArm: akmRuns.length },
46
- aggregateNoakm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
46
+ aggregateNoakm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
47
47
  aggregateAkm: {
48
48
  passRate: akmRuns.filter((r) => r.outcome === "pass").length / Math.max(1, akmRuns.length),
49
49
  tokensPerPass: null,
50
+ tokensPerRun: null,
50
51
  wallclockMs: 0,
51
52
  },
52
- aggregateDelta: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
53
+ aggregateDelta: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
53
54
  trajectoryAkm: { correctAssetLoaded: null, feedbackRecorded: 0 },
54
55
  failureModes: { byLabel: {}, byTask: {} },
55
56
  tasks: [],
@@ -251,7 +252,7 @@ describe("runMaskedCorpus", () => {
251
252
  const passRate = alphaMissing ? 0.25 : 0.6;
252
253
  return {
253
254
  ...baseReport,
254
- aggregateAkm: { passRate, tokensPerPass: null, wallclockMs: 0 },
255
+ aggregateAkm: { passRate, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
255
256
  akmRuns: [],
256
257
  };
257
258
  };
@@ -334,7 +335,7 @@ describe("runMaskedCorpus", () => {
334
335
  observedTaskStashUnchanged = task?.stash === "fixtureA";
335
336
  return {
336
337
  ...baseReport,
337
- aggregateAkm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
338
+ aggregateAkm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
338
339
  akmRuns: [],
339
340
  };
340
341
  },
@@ -377,7 +378,7 @@ describe("runMaskedCorpus", () => {
377
378
  topN: 2,
378
379
  runUtility: async () => ({
379
380
  ...baseReport,
380
- aggregateAkm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
381
+ aggregateAkm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
381
382
  akmRuns: [],
382
383
  }),
383
384
  baseOptions: { arms: ["akm"], model: "m", seedsPerArm: 1 },
@@ -414,7 +415,7 @@ describe("runMaskedCorpus", () => {
414
415
  successDirs.push(dir);
415
416
  return {
416
417
  ...baseReport,
417
- aggregateAkm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
418
+ aggregateAkm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
418
419
  akmRuns: [],
419
420
  };
420
421
  },
@@ -440,7 +441,7 @@ describe("runMaskedCorpus", () => {
440
441
  throw new Error("simulated runner failure");
441
442
  return {
442
443
  ...baseReport,
443
- aggregateAkm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
444
+ aggregateAkm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
444
445
  akmRuns: [],
445
446
  };
446
447
  },
@@ -480,7 +481,7 @@ describe("runMaskedCorpus", () => {
480
481
  callCount += 1;
481
482
  return {
482
483
  ...baseReport,
483
- aggregateAkm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
484
+ aggregateAkm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
484
485
  };
485
486
  },
486
487
  baseOptions: { arms: ["akm"], model: "m", seedsPerArm: 1 },
@@ -521,7 +522,7 @@ describe("runMaskedCorpus", () => {
521
522
  callCount += 1;
522
523
  return {
523
524
  ...baseReport,
524
- aggregateAkm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
525
+ aggregateAkm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
525
526
  };
526
527
  },
527
528
  baseOptions: { arms: ["akm"], model: "m", seedsPerArm: 1 },
@@ -551,7 +552,7 @@ describe("runMaskedCorpus", () => {
551
552
  callCount += 1;
552
553
  return {
553
554
  ...baseReport,
554
- aggregateAkm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
555
+ aggregateAkm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
555
556
  };
556
557
  },
557
558
  baseOptions: { arms: ["akm"], model: "m", seedsPerArm: 1 },
@@ -626,9 +627,9 @@ describe("bench attribute --top clamping", () => {
626
627
  commit: "abc",
627
628
  model: "test-model",
628
629
  corpus: { domains: 1, tasks: 0, slice: "all", seedsPerArm: 1 },
629
- aggregateNoakm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
630
- aggregateAkm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
631
- aggregateDelta: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
630
+ aggregateNoakm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
631
+ aggregateAkm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
632
+ aggregateDelta: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
632
633
  trajectoryAkm: { correctAssetLoaded: null, feedbackRecorded: 0 },
633
634
  failureModes: { byLabel: {}, byTask: {} },
634
635
  tasks: [],
@@ -686,11 +687,11 @@ describe("runMaskedCorpus marginal_contribution arithmetic", () => {
686
687
  commit: "abc",
687
688
  model: "m",
688
689
  corpus: { domains: 1, tasks: 1, slice: "all", seedsPerArm: baseRuns.length },
689
- aggregateNoakm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
690
+ aggregateNoakm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
690
691
  // Engineered base pass rate distinct from the masked rates so the
691
692
  // arithmetic is observable.
692
- aggregateAkm: { passRate: 0.8, tokensPerPass: null, wallclockMs: 0 },
693
- aggregateDelta: { passRate: 0.8, tokensPerPass: null, wallclockMs: 0 },
693
+ aggregateAkm: { passRate: 0.8, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
694
+ aggregateDelta: { passRate: 0.8, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
694
695
  trajectoryAkm: { correctAssetLoaded: null, feedbackRecorded: 0 },
695
696
  failureModes: { byLabel: {}, byTask: {} },
696
697
  tasks: [],
@@ -730,7 +731,7 @@ describe("runMaskedCorpus marginal_contribution arithmetic", () => {
730
731
  const passRate = maskedPassRates[masked] ?? 0;
731
732
  return {
732
733
  ...baseReport,
733
- aggregateAkm: { passRate, tokensPerPass: null, wallclockMs: 0 },
734
+ aggregateAkm: { passRate, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
734
735
  akmRuns: [],
735
736
  };
736
737
  },
@@ -875,9 +876,9 @@ describe("bench attribute prefers persisted runs[] (#249)", () => {
875
876
  commit: "abc",
876
877
  model: "test-model",
877
878
  corpus: { domains: 1, tasks: 1, slice: "all", seedsPerArm: 2 },
878
- aggregateNoakm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
879
- aggregateAkm: { passRate: 0.25, tokensPerPass: null, wallclockMs: 0 },
880
- aggregateDelta: { passRate: 0.25, tokensPerPass: null, wallclockMs: 0 },
879
+ aggregateNoakm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
880
+ aggregateAkm: { passRate: 0.25, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
881
+ aggregateDelta: { passRate: 0.25, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
881
882
  trajectoryAkm: { correctAssetLoaded: null, feedbackRecorded: 0 },
882
883
  failureModes: { byLabel: {}, byTask: {} },
883
884
  tasks: [],
@@ -948,9 +949,9 @@ describe("bench attribute prefers persisted runs[] (#249)", () => {
948
949
  commit: "abc",
949
950
  model: "test-model",
950
951
  corpus: { domains: 1, tasks: 0, slice: "all", seedsPerArm: 1 },
951
- aggregateNoakm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
952
- aggregateAkm: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
953
- aggregateDelta: { passRate: 0, tokensPerPass: null, wallclockMs: 0 },
952
+ aggregateNoakm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
953
+ aggregateAkm: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
954
+ aggregateDelta: { passRate: 0, tokensPerPass: null, tokensPerRun: null, wallclockMs: 0 },
954
955
  trajectoryAkm: { correctAssetLoaded: null, feedbackRecorded: 0 },
955
956
  failureModes: { byLabel: {}, byTask: {} },
956
957
  tasks: [],