akm-cli 0.5.0 → 0.6.0-rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/CHANGELOG.md +53 -5
  2. package/README.md +9 -9
  3. package/dist/cli.js +379 -1448
  4. package/dist/{completions.js → commands/completions.js} +1 -1
  5. package/dist/{config-cli.js → commands/config-cli.js} +109 -11
  6. package/dist/commands/curate.js +263 -0
  7. package/dist/{info.js → commands/info.js} +17 -11
  8. package/dist/{init.js → commands/init.js} +4 -4
  9. package/dist/{install-audit.js → commands/install-audit.js} +14 -2
  10. package/dist/{installed-kits.js → commands/installed-stashes.js} +122 -50
  11. package/dist/commands/migration-help.js +141 -0
  12. package/dist/{registry-search.js → commands/registry-search.js} +68 -9
  13. package/dist/commands/remember.js +178 -0
  14. package/dist/{stash-search.js → commands/search.js} +28 -69
  15. package/dist/{self-update.js → commands/self-update.js} +3 -3
  16. package/dist/{stash-show.js → commands/show.js} +106 -81
  17. package/dist/{stash-add.js → commands/source-add.js} +133 -67
  18. package/dist/{stash-clone.js → commands/source-clone.js} +15 -13
  19. package/dist/{stash-source-manage.js → commands/source-manage.js} +24 -24
  20. package/dist/{vault.js → commands/vault.js} +43 -0
  21. package/dist/{stash-ref.js → core/asset-ref.js} +4 -4
  22. package/dist/{asset-registry.js → core/asset-registry.js} +30 -6
  23. package/dist/{asset-spec.js → core/asset-spec.js} +13 -6
  24. package/dist/{common.js → core/common.js} +147 -50
  25. package/dist/{config.js → core/config.js} +288 -29
  26. package/dist/core/errors.js +90 -0
  27. package/dist/{frontmatter.js → core/frontmatter.js} +64 -8
  28. package/dist/{paths.js → core/paths.js} +4 -4
  29. package/dist/core/write-source.js +280 -0
  30. package/dist/{local-search.js → indexer/db-search.js} +49 -32
  31. package/dist/{db.js → indexer/db.js} +210 -81
  32. package/dist/{file-context.js → indexer/file-context.js} +3 -3
  33. package/dist/{indexer.js → indexer/indexer.js} +153 -30
  34. package/dist/{manifest.js → indexer/manifest.js} +10 -10
  35. package/dist/{matchers.js → indexer/matchers.js} +4 -7
  36. package/dist/{metadata.js → indexer/metadata.js} +9 -5
  37. package/dist/{search-source.js → indexer/search-source.js} +97 -55
  38. package/dist/{semantic-status.js → indexer/semantic-status.js} +2 -2
  39. package/dist/{walker.js → indexer/walker.js} +1 -1
  40. package/dist/{lockfile.js → integrations/lockfile.js} +29 -2
  41. package/dist/{llm.js → llm/client.js} +12 -48
  42. package/dist/llm/embedder.js +127 -0
  43. package/dist/llm/embedders/cache.js +47 -0
  44. package/dist/llm/embedders/local.js +152 -0
  45. package/dist/llm/embedders/remote.js +121 -0
  46. package/dist/llm/embedders/types.js +39 -0
  47. package/dist/llm/metadata-enhance.js +53 -0
  48. package/dist/output/cli-hints.js +301 -0
  49. package/dist/output/context.js +95 -0
  50. package/dist/{renderers.js → output/renderers.js} +57 -61
  51. package/dist/output/shapes.js +212 -0
  52. package/dist/output/text.js +520 -0
  53. package/dist/{registry-build-index.js → registry/build-index.js} +48 -32
  54. package/dist/{create-provider-registry.js → registry/create-provider-registry.js} +6 -2
  55. package/dist/registry/factory.js +33 -0
  56. package/dist/{origin-resolve.js → registry/origin-resolve.js} +1 -1
  57. package/dist/registry/providers/index.js +11 -0
  58. package/dist/{providers → registry/providers}/skills-sh.js +60 -4
  59. package/dist/{providers → registry/providers}/static-index.js +126 -56
  60. package/dist/registry/providers/types.js +25 -0
  61. package/dist/{registry-resolve.js → registry/resolve.js} +10 -6
  62. package/dist/{detect.js → setup/detect.js} +0 -27
  63. package/dist/{ripgrep-install.js → setup/ripgrep-install.js} +1 -1
  64. package/dist/{ripgrep-resolve.js → setup/ripgrep-resolve.js} +2 -2
  65. package/dist/{setup.js → setup/setup.js} +162 -129
  66. package/dist/setup/steps.js +45 -0
  67. package/dist/{kit-include.js → sources/include.js} +1 -1
  68. package/dist/sources/provider-factory.js +36 -0
  69. package/dist/sources/provider.js +21 -0
  70. package/dist/sources/providers/filesystem.js +35 -0
  71. package/dist/{stash-providers → sources/providers}/git.js +218 -28
  72. package/dist/{stash-providers → sources/providers}/index.js +4 -4
  73. package/dist/sources/providers/install-types.js +14 -0
  74. package/dist/sources/providers/npm.js +160 -0
  75. package/dist/sources/providers/provider-utils.js +173 -0
  76. package/dist/sources/providers/sync-from-ref.js +45 -0
  77. package/dist/sources/providers/tar-utils.js +154 -0
  78. package/dist/{stash-providers → sources/providers}/website.js +60 -20
  79. package/dist/{stash-resolve.js → sources/resolve.js} +13 -12
  80. package/dist/{wiki.js → wiki/wiki.js} +18 -17
  81. package/dist/{workflow-authoring.js → workflows/authoring.js} +48 -17
  82. package/dist/{workflow-cli.js → workflows/cli.js} +2 -1
  83. package/dist/{workflow-db.js → workflows/db.js} +1 -1
  84. package/dist/workflows/document-cache.js +20 -0
  85. package/dist/workflows/parser.js +379 -0
  86. package/dist/workflows/renderer.js +78 -0
  87. package/dist/{workflow-runs.js → workflows/runs.js} +84 -30
  88. package/dist/workflows/schema.js +11 -0
  89. package/dist/workflows/validator.js +48 -0
  90. package/docs/README.md +30 -0
  91. package/docs/migration/release-notes/0.0.13.md +4 -0
  92. package/docs/migration/release-notes/0.1.0.md +6 -0
  93. package/docs/migration/release-notes/0.2.0.md +6 -0
  94. package/docs/migration/release-notes/0.3.0.md +5 -0
  95. package/docs/migration/release-notes/0.5.0.md +6 -0
  96. package/docs/migration/release-notes/0.6.0.md +75 -0
  97. package/docs/migration/release-notes/README.md +21 -0
  98. package/package.json +3 -2
  99. package/dist/embedder.js +0 -351
  100. package/dist/errors.js +0 -34
  101. package/dist/migration-help.js +0 -110
  102. package/dist/registry-factory.js +0 -19
  103. package/dist/registry-install.js +0 -532
  104. package/dist/ripgrep.js +0 -2
  105. package/dist/stash-provider-factory.js +0 -35
  106. package/dist/stash-provider.js +0 -1
  107. package/dist/stash-providers/filesystem.js +0 -41
  108. package/dist/stash-providers/openviking.js +0 -348
  109. package/dist/stash-providers/provider-utils.js +0 -11
  110. package/dist/stash-types.js +0 -1
  111. package/dist/workflow-markdown.js +0 -251
  112. /package/dist/{markdown.js → core/markdown.js} +0 -0
  113. /package/dist/{warn.js → core/warn.js} +0 -0
  114. /package/dist/{search-fields.js → indexer/search-fields.js} +0 -0
  115. /package/dist/{usage-events.js → indexer/usage-events.js} +0 -0
  116. /package/dist/{github.js → integrations/github.js} +0 -0
  117. /package/dist/{registry-provider.js → registry/types.js} +0 -0
  118. /package/dist/{registry-types.js → sources/types.js} +0 -0
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Unified install-ref dispatcher.
3
+ *
4
+ * Replaces the historical `installRegistryRef()` entry point. Given an
5
+ * unparsed install ref, this resolves the right syncable provider and
6
+ * invokes its `sync()` method.
7
+ *
8
+ * Audit is intentionally NOT performed here; callers (`akmAdd`,
9
+ * `akmUpdate`) decide whether to run `auditInstallCandidate` on the
10
+ * synced `contentDir` because they own the `--trust` flag.
11
+ */
12
+ import { UsageError } from "../../core/errors";
13
+ import { parseRegistryRef } from "../../registry/resolve";
14
+ import { detectStashRoot } from "./provider-utils";
15
+ export async function syncFromRef(ref, options) {
16
+ const parsed = parseRegistryRef(ref);
17
+ if (parsed.source === "local") {
18
+ return syncLocalRef(parsed, options);
19
+ }
20
+ if (parsed.source === "npm") {
21
+ const { syncNpmRef } = await import("./npm");
22
+ return syncNpmRef(ref, options);
23
+ }
24
+ if (parsed.source === "git" || parsed.source === "github") {
25
+ const { syncRegistryGitRef } = await import("./git");
26
+ return syncRegistryGitRef(ref, options);
27
+ }
28
+ // Exhaustiveness — `parseRegistryRef` only emits the four sources above.
29
+ throw new UsageError(`No syncable provider for ref: ${ref} (source=${parsed.source})`);
30
+ }
31
+ function syncLocalRef(parsed, options) {
32
+ const stashRoot = detectStashRoot(parsed.sourcePath);
33
+ const syncedAt = (options?.now ?? new Date()).toISOString();
34
+ return {
35
+ id: parsed.id,
36
+ source: "local",
37
+ ref: parsed.ref,
38
+ artifactUrl: parsed.sourcePath,
39
+ contentDir: stashRoot,
40
+ cacheDir: parsed.sourcePath,
41
+ extractedDir: parsed.sourcePath,
42
+ writable: options?.writable,
43
+ syncedAt,
44
+ };
45
+ }
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Tar archive extraction and integrity verification utilities.
3
+ *
4
+ * These helpers are security-critical: they validate archive entries to
5
+ * prevent path traversal, run a post-extraction scan for symlink escapes,
6
+ * and verify integrity hashes (SRI or hex shasum) before extraction.
7
+ *
8
+ * Extracted from `registry-install.ts` and shared by all syncable
9
+ * providers that fetch tarballs (currently `NpmSourceProvider` and the
10
+ * registry index builder).
11
+ */
12
+ import { spawnSync } from "node:child_process";
13
+ import { createHash } from "node:crypto";
14
+ import fs from "node:fs";
15
+ import path from "node:path";
16
+ import { isWithin } from "../../core/common";
17
+ import { warn } from "../../core/warn";
18
+ /**
19
+ * Verify an archive's integrity against a known hash. Throws and removes
20
+ * the archive when verification fails.
21
+ *
22
+ * Supports SRI hashes (sha256-/sha512-) and hex SHA-1 from npm.
23
+ * Skips verification for git/github sources (revisions are commit SHAs,
24
+ * not content hashes).
25
+ */
26
+ export function verifyArchiveIntegrity(archivePath, expected, source) {
27
+ if (!expected)
28
+ return;
29
+ // For GitHub and git sources, resolvedRevision is a commit SHA, not a content hash.
30
+ // Content integrity cannot be verified from a commit hash, so skip verification.
31
+ if (source === "github" || source === "git")
32
+ return;
33
+ const fileBuffer = fs.readFileSync(archivePath);
34
+ // SRI hash format: sha256-<base64> or sha512-<base64>
35
+ if (expected.startsWith("sha256-") || expected.startsWith("sha512-")) {
36
+ const dashIndex = expected.indexOf("-");
37
+ const algorithm = expected.slice(0, dashIndex);
38
+ const expectedBase64 = expected.slice(dashIndex + 1);
39
+ const actualBase64 = createHash(algorithm).update(fileBuffer).digest("base64");
40
+ if (actualBase64 !== expectedBase64) {
41
+ fs.unlinkSync(archivePath);
42
+ throw new Error(`Integrity check failed for ${archivePath}: expected ${algorithm} digest ${expectedBase64}, got ${actualBase64}`);
43
+ }
44
+ return;
45
+ }
46
+ // Hex shasum (SHA-1 from npm)
47
+ if (/^[0-9a-f]{40}$/i.test(expected)) {
48
+ const actualHex = createHash("sha1").update(fileBuffer).digest("hex");
49
+ if (actualHex.toLowerCase() !== expected.toLowerCase()) {
50
+ fs.unlinkSync(archivePath);
51
+ throw new Error(`Integrity check failed for ${archivePath}: expected sha1 ${expected}, got ${actualHex}`);
52
+ }
53
+ return;
54
+ }
55
+ // Unrecognized format — warn and skip verification
56
+ warn("Unrecognized integrity format: %s — verification skipped", expected);
57
+ }
58
+ /**
59
+ * Extract a tar.gz archive into `destinationDir`, validating entries first
60
+ * (no absolute paths, no `..` traversal, no NUL bytes), invoking tar with
61
+ * `--no-same-owner --strip-components=1`, and finally scanning the extracted
62
+ * tree for symlinks that would escape the destination.
63
+ */
64
+ export function extractTarGzSecure(archivePath, destinationDir) {
65
+ const listResult = spawnSync("tar", ["tzf", archivePath], { encoding: "utf8" });
66
+ if (listResult.status !== 0) {
67
+ const err = listResult.stderr?.trim() || listResult.error?.message || "unknown error";
68
+ throw new Error(`Failed to inspect archive ${archivePath}: ${err}`);
69
+ }
70
+ validateTarEntries(listResult.stdout);
71
+ fs.rmSync(destinationDir, { recursive: true, force: true });
72
+ fs.mkdirSync(destinationDir, { recursive: true });
73
+ const extractResult = spawnSync("tar", ["xzf", archivePath, "--no-same-owner", "--strip-components=1", "-C", destinationDir], { encoding: "utf8" });
74
+ if (extractResult.status !== 0) {
75
+ const err = extractResult.stderr?.trim() || extractResult.error?.message || "unknown error";
76
+ throw new Error(`Failed to extract archive ${archivePath}: ${err}`);
77
+ }
78
+ // Post-extraction scan: verify all extracted files are within destinationDir
79
+ // This mitigates TOCTOU between validateTarEntries (list) and tar extract.
80
+ scanExtractedFiles(destinationDir, destinationDir);
81
+ }
82
+ function scanExtractedFiles(dir, root) {
83
+ let entries;
84
+ try {
85
+ entries = fs.readdirSync(dir, { withFileTypes: true });
86
+ }
87
+ catch {
88
+ return;
89
+ }
90
+ for (const entry of entries) {
91
+ const fullPath = path.join(dir, entry.name);
92
+ // Reject only entries whose name is exactly the parent-traversal segment
93
+ // (or `.`). Substring matches (`foo..bar`, `archive..2024.tar`) are
94
+ // legitimate filenames that the previous `entry.name.includes("..")`
95
+ // check rejected as false positives — flagged in PR #168 review.
96
+ if (entry.name === ".." || entry.name === ".") {
97
+ throw new Error(`Post-extraction scan: suspicious entry name: ${fullPath}`);
98
+ }
99
+ // Symlinks: resolve and confirm the target stays inside the destination.
100
+ if (entry.isSymbolicLink()) {
101
+ const target = fs.realpathSync(fullPath);
102
+ if (!isWithin(target, root)) {
103
+ throw new Error(`Post-extraction scan: symlink escapes destination directory: ${fullPath} -> ${target}`);
104
+ }
105
+ }
106
+ // Belt-and-suspenders: check that the resolved path of regular entries
107
+ // stays within the destination root. This catches path traversal attempts
108
+ // via symlink TOCTOU, directory renames, or any other anomalies.
109
+ if (!entry.isSymbolicLink()) {
110
+ const resolved = path.resolve(fullPath);
111
+ if (!isWithin(resolved, root)) {
112
+ throw new Error(`Post-extraction scan: entry escapes destination directory: ${fullPath}`);
113
+ }
114
+ }
115
+ if (entry.isDirectory()) {
116
+ scanExtractedFiles(fullPath, root);
117
+ }
118
+ }
119
+ }
120
+ /**
121
+ * Validate the line-oriented `tar tzf` listing for unsafe entries.
122
+ *
123
+ * Rejects:
124
+ * - empty/NUL-containing entries
125
+ * - absolute paths
126
+ * - parent traversal (`..` / `../`)
127
+ * - any entry that would still escape after `--strip-components=1`
128
+ */
129
+ export function validateTarEntries(listOutput) {
130
+ const lines = listOutput.split(/\r?\n/).filter(Boolean);
131
+ for (const rawLine of lines) {
132
+ const entry = rawLine.trim();
133
+ if (!entry || entry.includes("\0")) {
134
+ throw new Error(`Archive contains an invalid entry: ${JSON.stringify(rawLine)}`);
135
+ }
136
+ if (entry.startsWith("/")) {
137
+ throw new Error(`Archive contains an absolute path entry: ${entry}`);
138
+ }
139
+ const normalized = path.posix.normalize(entry);
140
+ if (normalized === ".." || normalized.startsWith("../")) {
141
+ throw new Error(`Archive contains a path traversal entry: ${entry}`);
142
+ }
143
+ const parts = normalized.split("/").filter(Boolean);
144
+ const stripped = parts.slice(1).join("/");
145
+ if (!stripped)
146
+ continue;
147
+ const normalizedStripped = path.posix.normalize(stripped);
148
+ if (normalizedStripped === ".." ||
149
+ normalizedStripped.startsWith("../") ||
150
+ path.posix.isAbsolute(normalizedStripped)) {
151
+ throw new Error(`Archive contains an unsafe entry after strip-components: ${entry}`);
152
+ }
153
+ }
154
+ }
@@ -1,10 +1,11 @@
1
1
  import { createHash } from "node:crypto";
2
2
  import fs from "node:fs";
3
3
  import path from "node:path";
4
- import { fetchWithRetry } from "../common";
5
- import { ConfigError, UsageError } from "../errors";
6
- import { getRegistryIndexCacheDir } from "../paths";
7
- import { registerStashProvider } from "../stash-provider-factory";
4
+ import { fetchWithRetry, ResponseTooLargeError, readBodyWithByteCap } from "../../core/common";
5
+ import { ConfigError, UsageError } from "../../core/errors";
6
+ import { getRegistryIndexCacheDir } from "../../core/paths";
7
+ import { warn } from "../../core/warn";
8
+ import { registerSourceProvider } from "../provider-factory";
8
9
  import { isExpired, sanitizeString } from "./provider-utils";
9
10
  /** Refresh website snapshots every 12 hours to balance freshness with scraping load. */
10
11
  const CACHE_REFRESH_INTERVAL_MS = 12 * 60 * 60 * 1000;
@@ -14,27 +15,48 @@ const CACHE_STALE_MS = 7 * 24 * 60 * 60 * 1000;
14
15
  const QUEUE_EXPANSION_FACTOR = 5;
15
16
  const MAX_PAGES_DEFAULT = 50;
16
17
  const MAX_DEPTH_DEFAULT = 3;
17
- class WebsiteStashProvider {
18
- type = "website";
18
+ /**
19
+ * Per-page body cap for website scraping. HTML pages this large are
20
+ * almost never useful as agent knowledge sources and a runaway server
21
+ * streaming tens of megabytes would blow memory with no upside.
22
+ */
23
+ const WEBSITE_PAGE_BYTE_CAP = 5 * 1024 * 1024;
24
+ /**
25
+ * Wall-clock cap for a full crawl (10 minutes). With per-request timeouts
26
+ * of 15s and a `maxPages` default of 50, an unresponsive site could
27
+ * otherwise stall `akm add` for 12.5 minutes with no feedback. Cap the
28
+ * whole crawl and return what we have when time runs out.
29
+ */
30
+ const WEBSITE_CRAWL_WALL_CLOCK_MS = 10 * 60 * 1000;
31
+ /**
32
+ * Website source provider — scrapes pages into a local mirror so the FTS5
33
+ * indexer can walk them. Implements the v1 {@link SourceProvider} interface
34
+ * (spec §2.1): `{ name, kind, init, path, sync }`.
35
+ *
36
+ * Reading is the indexer's job — this class doesn't implement `search` or
37
+ * `show`.
38
+ */
39
+ class WebsiteSourceProvider {
40
+ kind = "website";
19
41
  name;
42
+ #config;
43
+ #url;
20
44
  constructor(config) {
45
+ this.#config = config;
21
46
  this.name = config.name ?? "website";
22
- validateWebsiteUrl(config.url ?? "");
47
+ this.#url = validateWebsiteUrl(config.url ?? "");
23
48
  }
24
- /** Content is indexed through the standard FTS5 pipeline. */
25
- async search(_options) {
26
- return { hits: [] };
49
+ async init(_ctx) {
50
+ // URL validation already happens in the constructor; nothing else to do.
27
51
  }
28
- /** Content is local files, shown via showLocal. */
29
- async show(_ref, _view) {
30
- throw new Error("Website provider content is shown via local index");
52
+ path() {
53
+ return getCachePaths(this.#url).stashDir;
31
54
  }
32
- /** Content is local; no remote show needed. */
33
- canShow(_ref) {
34
- return false;
55
+ async sync() {
56
+ await ensureWebsiteMirror(this.#config, { requireStashDir: true });
35
57
  }
36
58
  }
37
- registerStashProvider("website", (config) => new WebsiteStashProvider(config));
59
+ registerSourceProvider("website", (config) => new WebsiteSourceProvider(config));
38
60
  function getCachePaths(siteUrl) {
39
61
  const key = createHash("sha256").update(normalizeSiteUrl(siteUrl)).digest("hex").slice(0, 16);
40
62
  const rootDir = path.join(getRegistryIndexCacheDir(), `website-${key}`);
@@ -49,6 +71,7 @@ async function ensureWebsiteMirror(config, options) {
49
71
  const normalizedUrl = validateWebsiteUrl(rawUrl);
50
72
  const cachePaths = getCachePaths(normalizedUrl);
51
73
  const requireStashDir = options?.requireStashDir === true;
74
+ const force = options?.force === true;
52
75
  let mtime = 0;
53
76
  try {
54
77
  mtime = fs.statSync(cachePaths.manifestPath).mtimeMs;
@@ -56,7 +79,8 @@ async function ensureWebsiteMirror(config, options) {
56
79
  catch {
57
80
  /* no cached manifest */
58
81
  }
59
- if (mtime &&
82
+ if (!force &&
83
+ mtime &&
60
84
  !isExpired(mtime, CACHE_REFRESH_INTERVAL_MS) &&
61
85
  (!requireStashDir || hasExtractedSite(cachePaths.stashDir))) {
62
86
  return cachePaths;
@@ -124,7 +148,10 @@ async function crawlWebsite(startUrl, options) {
124
148
  const queue = [{ url: start.toString(), depth: 0 }];
125
149
  const visited = new Set();
126
150
  const pages = [];
151
+ const deadline = Date.now() + WEBSITE_CRAWL_WALL_CLOCK_MS;
127
152
  while (queue.length > 0 && pages.length < options.maxPages) {
153
+ if (Date.now() > deadline)
154
+ break;
128
155
  const next = queue.shift();
129
156
  if (!next)
130
157
  break;
@@ -149,6 +176,9 @@ async function crawlWebsite(startUrl, options) {
149
176
  queue.push({ url: candidate, depth: next.depth + 1 });
150
177
  }
151
178
  }
179
+ if (Date.now() > deadline) {
180
+ warn("[akm] website crawl stopped at the %ds wall-clock cap with %d/%d pages collected from %s.", WEBSITE_CRAWL_WALL_CLOCK_MS / 1000, pages.length, options.maxPages, startUrl);
181
+ }
152
182
  return pages;
153
183
  }
154
184
  async function fetchWebsitePage(pageUrl) {
@@ -164,7 +194,17 @@ async function fetchWebsitePage(pageUrl) {
164
194
  throw new Error(`Failed to fetch website content (${response.status}) from ${pageUrl}`);
165
195
  }
166
196
  const contentType = response.headers.get("content-type")?.toLowerCase() ?? "";
167
- const body = await response.text();
197
+ let body;
198
+ try {
199
+ body = await readBodyWithByteCap(response, WEBSITE_PAGE_BYTE_CAP);
200
+ }
201
+ catch (err) {
202
+ if (err instanceof ResponseTooLargeError) {
203
+ // Skip oversized pages rather than aborting the whole crawl.
204
+ return null;
205
+ }
206
+ throw err;
207
+ }
168
208
  const finalUrl = normalizeCrawlUrl(response.url || pageUrl) ?? pageUrl;
169
209
  if (contentType.includes("text/html") || contentType.includes("application/xhtml+xml") || looksLikeMarkup(body)) {
170
210
  const title = extractHtmlTitle(body) || new URL(finalUrl).hostname;
@@ -440,4 +480,4 @@ function safeCodePointToString(value) {
440
480
  return undefined;
441
481
  }
442
482
  }
443
- export { ensureWebsiteMirror, getCachePaths, validateWebsiteInputUrl, validateWebsiteUrl, WebsiteStashProvider };
483
+ export { ensureWebsiteMirror, getCachePaths, validateWebsiteInputUrl, validateWebsiteUrl, WebsiteSourceProvider };
@@ -1,10 +1,10 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
- import { deriveCanonicalAssetNameFromStashRoot, isRelevantAssetFile, resolveAssetPathFromName, TYPE_DIRS, } from "./asset-spec";
4
- import { hasErrnoCode, isWithin } from "./common";
5
- import { NotFoundError, UsageError } from "./errors";
6
- import { runMatchers } from "./file-context";
7
- import { walkStashFlat } from "./walker";
3
+ import { deriveCanonicalAssetNameFromStashRoot, isRelevantAssetFile, resolveAssetPathFromName, TYPE_DIRS, } from "../core/asset-spec";
4
+ import { hasErrnoCode, isWithin } from "../core/common";
5
+ import { NotFoundError, UsageError } from "../core/errors";
6
+ import { runMatchers } from "../indexer/file-context";
7
+ import { walkStashFlat } from "../indexer/walker";
8
8
  /**
9
9
  * Resolve an asset path from a stash directory, type, and name.
10
10
  */
@@ -30,27 +30,28 @@ function resolveInTypeDir(stashDir, typeDir, type, name) {
30
30
  const resolvedRoot = resolveAndValidateTypeRoot(root, type, name);
31
31
  const resolvedTarget = path.resolve(target);
32
32
  if (!isWithin(resolvedTarget, resolvedRoot)) {
33
- throw new UsageError("Ref resolves outside the stash root.");
33
+ throw new UsageError("Ref resolves outside the stash root.", "PATH_ESCAPE_VIOLATION");
34
34
  }
35
35
  if (!fs.existsSync(resolvedTarget) || !fs.statSync(resolvedTarget).isFile()) {
36
- throw new NotFoundError(`Stash asset not found for ref: ${type}:${name}`);
36
+ throw new NotFoundError(`Stash asset not found for ref: ${type}:${name}`, "ASSET_NOT_FOUND");
37
37
  }
38
38
  const realTarget = fs.realpathSync(resolvedTarget);
39
39
  if (!isWithin(realTarget, resolvedRoot)) {
40
- throw new UsageError("Ref resolves outside the stash root.");
40
+ throw new UsageError("Ref resolves outside the stash root.", "PATH_ESCAPE_VIOLATION");
41
41
  }
42
42
  if (!isRelevantAssetFile(type, path.basename(resolvedTarget))) {
43
43
  if (type === "script") {
44
44
  throw new NotFoundError("Script ref must resolve to a file with a supported script extension. Refer to the akm documentation for the complete list of supported script extensions.");
45
45
  }
46
- throw new NotFoundError(`Stash asset not found for ref: ${type}:${name}`);
46
+ throw new NotFoundError(`Stash asset not found for ref: ${type}:${name}`, "ASSET_NOT_FOUND");
47
47
  }
48
48
  return realTarget;
49
49
  }
50
50
  function resolveAndValidateTypeRoot(root, type, name) {
51
51
  const rootStat = readTypeRootStat(root, type, name);
52
52
  if (!rootStat.isDirectory()) {
53
- throw new NotFoundError(`Stash type root is not a directory for ref: ${type}:${name}`);
53
+ throw new NotFoundError(`Asset directory for ${type} assets is not accessible — got a file where a directory was expected for ref: ${type}:${name}. ` +
54
+ "Run `akm index` to rebuild the index, or check your source configuration.", "ASSET_NOT_FOUND", "Run `akm list` to see your configured sources and verify the source path exists.");
54
55
  }
55
56
  return fs.realpathSync(root);
56
57
  }
@@ -60,7 +61,7 @@ function readTypeRootStat(root, type, name) {
60
61
  }
61
62
  catch (error) {
62
63
  if (hasErrnoCode(error, "ENOENT")) {
63
- throw new NotFoundError(`Stash type root not found for ref: ${type}:${name}`);
64
+ throw new NotFoundError(`Asset not found for ref: ${type}:${name}. No ${type} assets are present in the configured source.`, "ASSET_NOT_FOUND", "Run `akm list` to see your configured sources, or `akm index` to rebuild the search index.");
64
65
  }
65
66
  throw error;
66
67
  }
@@ -77,7 +78,7 @@ async function resolveByCanonicalName(stashDir, type, name) {
77
78
  const realTarget = fs.realpathSync(ctx.absPath);
78
79
  const resolvedRoot = fs.realpathSync(stashDir);
79
80
  if (!isWithin(realTarget, resolvedRoot)) {
80
- throw new UsageError("Ref resolves outside the stash root.");
81
+ throw new UsageError("Ref resolves outside the stash root.", "PATH_ESCAPE_VIOLATION");
81
82
  }
82
83
  return realTarget;
83
84
  }
@@ -15,13 +15,13 @@
15
15
  import fs from "node:fs";
16
16
  import path from "node:path";
17
17
  import { parse as yamlParse } from "yaml";
18
- import { isWithin } from "./common";
19
- import { loadUserConfig, saveConfig } from "./config";
20
- import { NotFoundError, UsageError } from "./errors";
21
- import { parseFrontmatter, parseFrontmatterBlock } from "./frontmatter";
22
- import { resolveStashSources } from "./search-source";
23
- import { akmSearch } from "./stash-search";
24
- import { buildIndexMd, buildLogMd, buildSchemaMd } from "./templates/wiki-templates";
18
+ import { akmSearch } from "../commands/search";
19
+ import { isWithin } from "../core/common";
20
+ import { loadUserConfig, saveConfig } from "../core/config";
21
+ import { NotFoundError, UsageError } from "../core/errors";
22
+ import { parseFrontmatter, parseFrontmatterBlock } from "../core/frontmatter";
23
+ import { resolveSourceEntries } from "../indexer/search-source";
24
+ import { buildIndexMd, buildLogMd, buildSchemaMd } from "../templates/wiki-templates";
25
25
  // ── Constants ───────────────────────────────────────────────────────────────
26
26
  export const WIKIS_SUBDIR = "wikis";
27
27
  export const SCHEMA_MD = "schema.md";
@@ -64,7 +64,7 @@ function wikiNotFoundMessage(name) {
64
64
  return `Wiki not found: ${name}. Run \`akm wiki create ${name}\` to create it or \`akm wiki register ${name} <path-or-repo>\` to register an external wiki.`;
65
65
  }
66
66
  function registeredWikiSources(stashDir) {
67
- return resolveStashSources(stashDir)
67
+ return resolveSourceEntries(stashDir)
68
68
  .filter((source) => typeof source.wikiName === "string")
69
69
  .map((source) => ({
70
70
  name: source.wikiName,
@@ -82,17 +82,17 @@ export function resolveWikiSource(stashDir, name) {
82
82
  const external = registeredWikiSources(stashDir).find((source) => source.name === name);
83
83
  if (external)
84
84
  return external;
85
- throw new NotFoundError(wikiNotFoundMessage(name));
85
+ throw new NotFoundError(wikiNotFoundMessage(name), "STASH_NOT_FOUND");
86
86
  }
87
87
  export function ensureWikiNameAvailable(stashDir, name) {
88
88
  validateWikiName(name);
89
89
  const wikiDir = resolveWikiDir(stashDir, name);
90
90
  if (fs.existsSync(wikiDir)) {
91
- throw new UsageError(`Wiki already exists: ${name}.`);
91
+ throw new UsageError(`Wiki already exists: ${name}.`, "RESOURCE_ALREADY_EXISTS");
92
92
  }
93
93
  const external = registeredWikiSources(stashDir).find((source) => source.name === name);
94
94
  if (external) {
95
- throw new UsageError(`Wiki already registered: ${name}.`);
95
+ throw new UsageError(`Wiki already registered: ${name}.`, "RESOURCE_ALREADY_EXISTS");
96
96
  }
97
97
  }
98
98
  /**
@@ -305,7 +305,7 @@ export function showWiki(stashDir, name) {
305
305
  export function createWiki(stashDir, name) {
306
306
  const existing = registeredWikiSources(stashDir).find((source) => source.name === name);
307
307
  if (existing) {
308
- throw new UsageError(`Wiki already registered: ${name}.`);
308
+ throw new UsageError(`Wiki already registered: ${name}.`, "RESOURCE_ALREADY_EXISTS");
309
309
  }
310
310
  const wikiDir = resolveWikiDir(stashDir, name);
311
311
  fs.mkdirSync(wikiDir, { recursive: true });
@@ -357,11 +357,12 @@ export function removeWiki(stashDir, name, options = {}) {
357
357
  const wikiDir = resolved.path;
358
358
  if (resolved.mode === "external") {
359
359
  const config = loadUserConfig();
360
- const stashes = (config.stashes ?? []).filter((entry) => entry.wikiName !== name);
360
+ const filteredSources = (config.sources ?? config.stashes ?? []).filter((entry) => entry.wikiName !== name);
361
361
  const installed = (config.installed ?? []).filter((entry) => entry.wikiName !== name);
362
362
  saveConfig({
363
363
  ...config,
364
- stashes: stashes.length > 0 ? stashes : undefined,
364
+ sources: filteredSources.length > 0 ? filteredSources : undefined,
365
+ stashes: undefined,
365
366
  installed: installed.length > 0 ? installed : undefined,
366
367
  });
367
368
  return {
@@ -373,11 +374,11 @@ export function removeWiki(stashDir, name, options = {}) {
373
374
  };
374
375
  }
375
376
  if (!fs.existsSync(wikiDir)) {
376
- throw new NotFoundError(`Wiki not found: ${name}.`);
377
+ throw new NotFoundError(`Wiki not found: ${name}.`, "STASH_NOT_FOUND");
377
378
  }
378
379
  const wikisRoot = resolveWikisRoot(stashDir);
379
380
  if (!isWithin(wikiDir, wikisRoot)) {
380
- throw new UsageError(`Refusing to remove a path outside the wikis root: ${wikiDir}`);
381
+ throw new UsageError(`Refusing to remove a path outside the wikis root: ${wikiDir}`, "PATH_ESCAPE_VIOLATION");
381
382
  }
382
383
  const removed = [];
383
384
  const rawDir = path.join(wikiDir, RAW_SUBDIR);
@@ -527,7 +528,7 @@ export async function searchInWiki(input) {
527
528
  const rawDir = path.join(wikiDir, RAW_SUBDIR);
528
529
  const filtered = [];
529
530
  for (const hit of response.hits) {
530
- // hits can be StashSearchHit or RegistrySearchResultHit (union); filter
531
+ // hits can be SourceSearchHit or RegistrySearchResultHit (union); filter
531
532
  // by path inclusion. Registry hits have no path and are dropped.
532
533
  if (hit.type === "registry")
533
534
  continue;
@@ -1,9 +1,10 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
- import { resolveAssetPathFromName } from "./asset-spec";
4
- import { isWithin, resolveStashDir } from "./common";
5
- import { UsageError } from "./errors";
6
- import { parseWorkflowMarkdown, WorkflowValidationError } from "./workflow-markdown";
3
+ import { resolveAssetPathFromName } from "../core/asset-spec";
4
+ import { isWithin, resolveStashDir } from "../core/common";
5
+ import { UsageError } from "../core/errors";
6
+ import { warn } from "../core/warn";
7
+ import { parseWorkflow } from "./parser";
7
8
  const DEFAULT_WORKFLOW_TEMPLATE = renderWorkflowTemplate({
8
9
  title: "Example Workflow",
9
10
  firstStepTitle: "First Step",
@@ -22,7 +23,10 @@ export function buildWorkflowTemplate(name) {
22
23
  firstStepTitle: `${title} Setup`,
23
24
  firstStepId: `${stepId}-setup`,
24
25
  });
25
- parseWorkflowMarkdown(customized);
26
+ const result = parseWorkflow(customized, { path: `<template:${name}>` });
27
+ if (!result.ok) {
28
+ throw new UsageError(formatWorkflowErrors(`<template:${name}>`, result.errors));
29
+ }
26
30
  return customized;
27
31
  }
28
32
  export function createWorkflowAsset(input) {
@@ -32,22 +36,18 @@ export function createWorkflowAsset(input) {
32
36
  const normalizedName = normalizeWorkflowName(input.name);
33
37
  const assetPath = resolveAssetPathFromName("workflow", typeRoot, normalizedName);
34
38
  if (!isWithin(assetPath, typeRoot)) {
35
- throw new UsageError(`Resolved workflow path escapes the stash: "${normalizedName}"`);
39
+ throw new UsageError(`Resolved workflow path escapes the stash: "${normalizedName}"`, "PATH_ESCAPE_VIOLATION");
36
40
  }
37
41
  if (fs.existsSync(assetPath) && !input.force) {
38
- throw new UsageError(`Workflow "${normalizedName}" already exists. Re-run with --force to overwrite it.`);
42
+ throw new UsageError(`Workflow "${normalizedName}" already exists. Re-run with --force to overwrite it.`, "RESOURCE_ALREADY_EXISTS");
39
43
  }
40
44
  const content = input.from
41
- ? readWorkflowSource(input.from)
45
+ ? readWorkflowSource(input.from, stashDir)
42
46
  : (input.content ?? buildWorkflowTemplate(normalizedName));
43
- try {
44
- parseWorkflowMarkdown(content);
45
- }
46
- catch (error) {
47
- if (error instanceof WorkflowValidationError) {
48
- throw new UsageError(error.message);
49
- }
50
- throw error;
47
+ const sourcePath = input.from ?? `workflows/${normalizedName}.md`;
48
+ const result = parseWorkflow(content, { path: sourcePath });
49
+ if (!result.ok) {
50
+ throw new UsageError(formatWorkflowErrors(sourcePath, result.errors));
51
51
  }
52
52
  fs.mkdirSync(path.dirname(assetPath), { recursive: true });
53
53
  fs.writeFileSync(assetPath, content.endsWith("\n") ? content : `${content}\n`, "utf8");
@@ -57,7 +57,7 @@ export function createWorkflowAsset(input) {
57
57
  stashDir,
58
58
  };
59
59
  }
60
- function readWorkflowSource(source) {
60
+ function readWorkflowSource(source, stashDir) {
61
61
  const resolved = path.resolve(source);
62
62
  let stat;
63
63
  try {
@@ -69,6 +69,13 @@ function readWorkflowSource(source) {
69
69
  if (!stat.isFile()) {
70
70
  throw new UsageError(`Workflow source must be a file: "${source}".`);
71
71
  }
72
+ // The user is allowed to import any readable file as a workflow body, but
73
+ // an import from outside the stash is unusual enough to warn about. Anyone
74
+ // running `akm workflow create --from /etc/passwd` deserves a heads-up.
75
+ if (!isWithin(resolved, stashDir)) {
76
+ warn(`Importing workflow content from outside the stash: ${resolved}\n ` +
77
+ `If this was unintentional, abort and re-run with a --from path inside ${stashDir}.`);
78
+ }
72
79
  return fs.readFileSync(resolved, "utf8");
73
80
  }
74
81
  function normalizeWorkflowName(name) {
@@ -102,6 +109,30 @@ function slugifyWorkflowStepId(name) {
102
109
  .replace(/[^a-z0-9]+/g, "-")
103
110
  .replace(/^-+|-+$/g, "") || "workflow");
104
111
  }
112
+ export function formatWorkflowErrors(path, errors) {
113
+ const lines = errors.map((e) => ` ${path}:${e.line} — ${e.message}`);
114
+ const heading = errors.length === 1 ? "Workflow has 1 error:" : `Workflow has ${errors.length} errors:`;
115
+ return [heading, ...lines].join("\n");
116
+ }
117
+ /**
118
+ * Validate a workflow by ref (`workflow:<name>`) or filesystem path.
119
+ *
120
+ * Returns the parse result plus the source-relative path used. Throws
121
+ * `UsageError` only when the target cannot be located on disk; parse
122
+ * failures are returned as `{ ok: false, errors }` so callers can
123
+ * format them however they like.
124
+ */
125
+ export function validateWorkflowSource(target) {
126
+ if (target.startsWith("workflow:")) {
127
+ throw new UsageError(`validateWorkflowSource expects a filesystem path; resolve refs to paths in the caller before invoking.`);
128
+ }
129
+ const resolved = path.resolve(target);
130
+ if (!fs.existsSync(resolved)) {
131
+ throw new UsageError(`Workflow file not found: "${target}".`);
132
+ }
133
+ const content = fs.readFileSync(resolved, "utf8");
134
+ return { path: target, parse: parseWorkflow(content, { path: target }) };
135
+ }
105
136
  function renderWorkflowTemplate(input) {
106
137
  return `---
107
138
  description: Describe what this workflow accomplishes
@@ -1,4 +1,4 @@
1
- import { UsageError } from "./errors";
1
+ import { UsageError } from "../core/errors";
2
2
  export const WORKFLOW_STEP_STATES = [
3
3
  "completed",
4
4
  "blocked",
@@ -14,6 +14,7 @@ export const WORKFLOW_SUBCOMMANDS = new Set([
14
14
  "create",
15
15
  "template",
16
16
  "resume",
17
+ "validate",
17
18
  ]);
18
19
  export function parseWorkflowJsonObject(raw, flagName) {
19
20
  if (!raw)
@@ -1,7 +1,7 @@
1
1
  import { Database } from "bun:sqlite";
2
2
  import fs from "node:fs";
3
3
  import path from "node:path";
4
- import { getWorkflowDbPath } from "./paths";
4
+ import { getWorkflowDbPath } from "../core/paths";
5
5
  export function openWorkflowDatabase(dbPath = getWorkflowDbPath()) {
6
6
  const dir = path.dirname(dbPath);
7
7
  if (!fs.existsSync(dir)) {
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Side-channel cache that lets the workflow renderer hand a validated
3
+ * `WorkflowDocument` to the indexer without persisting it through the
4
+ * `entry_json` column or widening `StashEntry` with a workflow-shaped field.
5
+ *
6
+ * The renderer is called during metadata generation; the indexer writes the
7
+ * document to `workflow_documents` after `upsertEntry` returns the row id.
8
+ * A WeakMap keyed by the entry object preserves the parse work between the
9
+ * two phases without leaking memory if the entry is dropped.
10
+ */
11
+ const cache = new WeakMap();
12
+ export function cacheWorkflowDocument(entry, doc) {
13
+ cache.set(entry, doc);
14
+ }
15
+ export function takeWorkflowDocument(entry) {
16
+ const doc = cache.get(entry);
17
+ if (doc !== undefined)
18
+ cache.delete(entry);
19
+ return doc;
20
+ }