akm-cli 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/dist/cli.js +62 -16
  3. package/dist/commands/history.js +2 -7
  4. package/dist/commands/info.js +2 -2
  5. package/dist/commands/installed-stashes.js +45 -1
  6. package/dist/commands/search.js +2 -2
  7. package/dist/commands/show.js +4 -19
  8. package/dist/commands/source-add.js +1 -1
  9. package/dist/core/common.js +16 -1
  10. package/dist/core/config.js +18 -3
  11. package/dist/indexer/db-search.js +33 -39
  12. package/dist/indexer/db.js +51 -1
  13. package/dist/indexer/graph-extraction.js +5 -3
  14. package/dist/indexer/indexer.js +334 -121
  15. package/dist/indexer/manifest.js +18 -23
  16. package/dist/indexer/memory-inference.js +47 -58
  17. package/dist/indexer/metadata.js +253 -21
  18. package/dist/indexer/search-source.js +11 -5
  19. package/dist/llm/client.js +61 -1
  20. package/dist/llm/embedder.js +8 -5
  21. package/dist/llm/embedders/local.js +8 -2
  22. package/dist/llm/embedders/remote.js +4 -2
  23. package/dist/llm/graph-extract.js +4 -4
  24. package/dist/llm/memory-infer.js +61 -33
  25. package/dist/llm/metadata-enhance.js +2 -2
  26. package/dist/output/cli-hints.js +5 -2
  27. package/dist/output/renderers.js +22 -49
  28. package/dist/registry/build-index.js +13 -18
  29. package/dist/setup/setup.js +238 -96
  30. package/dist/sources/providers/git.js +14 -2
  31. package/dist/sources/providers/website.js +4 -460
  32. package/dist/sources/website-ingest.js +470 -0
  33. package/dist/wiki/wiki.js +11 -1
  34. package/dist/workflows/parser.js +19 -4
  35. package/dist/workflows/runs.js +3 -3
  36. package/docs/README.md +10 -3
  37. package/docs/migration/release-notes/0.7.0.md +22 -0
  38. package/package.json +5 -2
@@ -14,7 +14,7 @@ import { resolveStashDir } from "../core/common";
14
14
  import { loadConfig } from "../core/config";
15
15
  import { getDbPath } from "../core/paths";
16
16
  import { warn } from "../core/warn";
17
- import { closeDatabase, getAllEntries, getEntryCount, getMeta, openDatabase } from "./db";
17
+ import { closeDatabase, getAllEntries, getEntryCount, getMeta, openExistingDatabase } from "./db";
18
18
  import { generateMetadataFlat, loadStashFile } from "./metadata";
19
19
  import { resolveSourceEntries } from "./search-source";
20
20
  import { walkStashFlat } from "./walker";
@@ -57,13 +57,12 @@ function toManifestEntry(entry, filePath, stashDir, registryId) {
57
57
  /**
58
58
  * Get the manifest from the database (fast path).
59
59
  */
60
- function getManifestFromDb(stashDir, config, sources, type) {
60
+ function getManifestFromDb(stashDir, _config, sources, type) {
61
61
  const dbPath = getDbPath();
62
62
  try {
63
63
  if (!fs.existsSync(dbPath))
64
64
  return null;
65
- const embeddingDim = config.embedding?.dimension;
66
- const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
65
+ const db = openExistingDatabase(dbPath);
67
66
  try {
68
67
  const entryCount = getEntryCount(db);
69
68
  const storedStashDir = getMeta(db, "stashDir");
@@ -113,29 +112,21 @@ async function getManifestFromWalker(sources, type) {
113
112
  dirGroups.set(ctx.parentDirAbs, [ctx.absPath]);
114
113
  }
115
114
  for (const [dirPath, files] of dirGroups) {
116
- // Try loading existing .stash.json first
117
- let stash = loadStashFile(dirPath);
118
- if (stash) {
119
- const coveredFiles = new Set(stash.entries.map((e) => e.filename).filter((e) => !!e));
120
- const uncoveredFiles = files.filter((f) => !coveredFiles.has(path.basename(f)));
121
- if (uncoveredFiles.length > 0) {
122
- const generated = await generateMetadataFlat(currentStashDir, uncoveredFiles);
123
- if (generated.entries.length > 0) {
124
- stash = { entries: [...stash.entries, ...generated.entries] };
125
- }
126
- }
127
- }
128
- else {
129
- const generated = await generateMetadataFlat(currentStashDir, files);
130
- if (generated.entries.length === 0)
131
- continue;
132
- stash = generated;
133
- }
115
+ const generated = await generateMetadataFlat(currentStashDir, files);
116
+ const legacyOverrides = loadStashFile(dirPath, { requireFilename: true });
117
+ const mergedEntries = legacyOverrides
118
+ ? generated.entries.map((entry) => mergeLegacyEntry(entry, legacyOverrides.entries))
119
+ : generated.entries;
120
+ const stash = mergedEntries.length > 0 ? { entries: mergedEntries } : legacyOverrides;
121
+ if (!stash || stash.entries.length === 0)
122
+ continue;
134
123
  const source = sources.find((s) => dirPath.startsWith(path.resolve(s.path) + path.sep));
135
124
  for (const stashEntry of stash.entries) {
136
125
  if (type && type !== "any" && stashEntry.type !== type)
137
126
  continue;
138
- const entryPath = stashEntry.filename ? path.join(dirPath, stashEntry.filename) : files[0] || dirPath;
127
+ if (!stashEntry.filename)
128
+ continue;
129
+ const entryPath = path.join(dirPath, stashEntry.filename);
139
130
  const manifestEntry = toManifestEntry(stashEntry, entryPath, currentStashDir, source?.registryId);
140
131
  if (manifestEntry)
141
132
  entries.push(manifestEntry);
@@ -144,6 +135,10 @@ async function getManifestFromWalker(sources, type) {
144
135
  }
145
136
  return entries;
146
137
  }
138
+ function mergeLegacyEntry(entry, legacyEntries) {
139
+ const legacy = legacyEntries.find((candidate) => candidate.filename === entry.filename);
140
+ return legacy ? { ...entry, ...legacy, filename: entry.filename } : entry;
141
+ }
147
142
  /**
148
143
  * Generate a compact manifest of all assets in the stash.
149
144
  *
@@ -1,9 +1,10 @@
1
1
  /**
2
2
  * Memory inference pass for `akm index` (#201).
3
3
  *
4
- * Detects memories pending inference, asks the configured LLM to split each
5
- * into atomic facts, and writes the results back as new memory files with
6
- * frontmatter `inferred: true` + a `source:` backref to the parent memory.
4
+ * Detects memories pending inference, asks the configured LLM to compress each
5
+ * into one higher-signal derived memory, and writes the result back as a new
6
+ * memory file with frontmatter `inferred: true` + a `source:` backref to the
7
+ * parent memory.
7
8
  *
8
9
  * Pending predicate (see {@link isPendingMemory}):
9
10
  * - File lives under `<stashRoot>/memories/` and ends in `.md`.
@@ -36,7 +37,7 @@ import { parseFrontmatter, parseFrontmatterBlock } from "../core/frontmatter";
36
37
  import { warn } from "../core/warn";
37
38
  import { writeAssetToSource } from "../core/write-source";
38
39
  import { resolveIndexPassLLM } from "../llm/index-passes";
39
- import { splitMemoryIntoAtomicFacts } from "../llm/memory-infer";
40
+ import { compressMemoryToDerivedMemory } from "../llm/memory-infer";
40
41
  /**
41
42
  * Frontmatter keys this pass cares about. Constants so a future rename only
42
43
  * needs to touch one site.
@@ -59,8 +60,8 @@ const FM_SOURCE = "source";
59
60
  * Both must allow the call for the pass to run. Either set to `false`
60
61
  * short-circuits to a no-op result.
61
62
  */
62
- export async function runMemoryInferencePass(config, sources) {
63
- const empty = {
63
+ export async function runMemoryInferencePass(config, sources, signal) {
64
+ const result = {
64
65
  considered: 0,
65
66
  splitParents: 0,
66
67
  writtenFacts: 0,
@@ -69,38 +70,40 @@ export async function runMemoryInferencePass(config, sources) {
69
70
  // Gate 1 — locked feature flag (§14). Defaults to enabled; only an
70
71
  // explicit `false` disables the pass entirely.
71
72
  if (config.llm?.features?.memory_inference === false)
72
- return empty;
73
+ return result;
73
74
  // Gate 2 — per-pass opt-out (#208). Returns the resolved llm config or
74
75
  // `undefined` when the pass should not run.
75
76
  const llmConfig = resolveIndexPassLLM("memory", config);
76
77
  if (!llmConfig)
77
- return empty;
78
+ return result;
78
79
  // The pass only writes to the primary (working) stash. Read-only caches
79
80
  // (git, npm, website) are deliberately untouched — writing inferred
80
81
  // children there would be clobbered by the next sync().
81
82
  const primary = sources[0];
82
83
  if (!primary)
83
- return empty;
84
+ return result;
84
85
  const pending = collectPendingMemories(primary.path);
85
- empty.considered = pending.length;
86
+ result.considered = pending.length;
86
87
  if (pending.length === 0)
87
- return empty;
88
+ return result;
88
89
  for (const record of pending) {
89
- const facts = await splitMemoryIntoAtomicFacts(llmConfig, record.body);
90
- if (facts.length === 0) {
91
- empty.skippedNoFacts += 1;
90
+ if (signal?.aborted)
91
+ return result;
92
+ const derived = await compressMemoryToDerivedMemory(llmConfig, record.body, signal);
93
+ if (!derived) {
94
+ result.skippedNoFacts += 1;
92
95
  // Intentionally NOT marked processed — a transient LLM failure should
93
96
  // be retried on the next index run.
94
97
  continue;
95
98
  }
96
- const written = await writeAtomicChildren(record, facts);
99
+ const written = await writeDerivedMemory(record, derived);
97
100
  if (written > 0) {
98
101
  markParentProcessed(record);
99
- empty.splitParents += 1;
100
- empty.writtenFacts += written;
102
+ result.splitParents += 1;
103
+ result.writtenFacts += written;
101
104
  }
102
105
  }
103
- return empty;
106
+ return result;
104
107
  }
105
108
  // ── Pending detection ───────────────────────────────────────────────────────
106
109
  /**
@@ -133,6 +136,7 @@ export function collectPendingMemories(stashRoot) {
133
136
  ref: `memory:${relName}`,
134
137
  data: parsed.data,
135
138
  body: parsed.content,
139
+ name: relName,
136
140
  });
137
141
  }
138
142
  return out;
@@ -177,19 +181,8 @@ function toMemoryName(memoriesDir, filePath) {
177
181
  // user has organised under memories/.
178
182
  return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
179
183
  }
180
- // ── Writing children + marking parent ───────────────────────────────────────
181
- async function writeAtomicChildren(parent, facts) {
182
- const memoriesDir = path.join(parent.stashRoot, "memories");
183
- // Sibling directory layout: <parentDir>/<parentBase>.facts/fact-N.md
184
- // Keeps facts grouped near the parent without polluting the top level.
185
- const parentRel = path.relative(memoriesDir, parent.filePath).replace(/\\/g, "/");
186
- const parentBase = parentRel.replace(/\.md$/i, "");
187
- const factsDirRel = `${parentBase}.facts`;
188
- // Children are routed through writeAssetToSource — the single dispatch
189
- // point for kind-branching writes (CLAUDE.md / spec §10 step 5). Memory
190
- // assets resolve to `<source.path>/memories/<name>.md`, so a child name
191
- // of `<parentBase>.facts/fact-N` lands at exactly the documented child
192
- // path scheme.
184
+ // ── Writing derived memories + marking parent ───────────────────────────────
185
+ async function writeDerivedMemory(parent, derived) {
193
186
  const writeTarget = {
194
187
  kind: "filesystem",
195
188
  name: "stash",
@@ -201,39 +194,35 @@ async function writeAtomicChildren(parent, facts) {
201
194
  path: parent.stashRoot,
202
195
  writable: true,
203
196
  };
204
- let written = 0;
205
- for (let i = 0; i < facts.length; i++) {
206
- const fact = facts[i];
207
- const childName = `${factsDirRel}/fact-${i + 1}`;
208
- const childRefStr = `memory:${childName}`;
209
- const childPath = path.join(memoriesDir, `${childName}.md`);
210
- // Idempotent re-writes: if a child already exists at this slot we skip
211
- // it. The parent's `inferenceProcessed` marker is the primary idempotency
212
- // guard (we never re-enter the splitter for a processed parent), but a
213
- // partial previous run that crashed before the marker landed should not
214
- // duplicate facts.
215
- if (fs.existsSync(childPath)) {
216
- continue;
217
- }
218
- try {
219
- const content = renderChildMemory(fact, parent.ref);
220
- const childRef = parseAssetRef(childRefStr);
221
- await writeAssetToSource(writeTarget, writeConfig, childRef, content);
222
- written += 1;
223
- }
224
- catch (err) {
225
- warn(`memory inference: failed to write atomic child ${childName}: ${err instanceof Error ? err.message : String(err)}`);
226
- }
197
+ const childName = `${parent.name}.derived`;
198
+ const childRefStr = `memory:${childName}`;
199
+ const childPath = path.join(parent.stashRoot, "memories", `${childName}.md`);
200
+ if (fs.existsSync(childPath)) {
201
+ return 0;
202
+ }
203
+ try {
204
+ const content = renderDerivedMemory(parent, derived);
205
+ const childRef = parseAssetRef(childRefStr);
206
+ await writeAssetToSource(writeTarget, writeConfig, childRef, content);
207
+ return 1;
208
+ }
209
+ catch (err) {
210
+ warn(`memory inference: failed to write derived memory ${childName}: ${err instanceof Error ? err.message : String(err)}`);
211
+ return 0;
227
212
  }
228
- return written;
229
213
  }
230
- function renderChildMemory(fact, parentRef) {
214
+ function renderDerivedMemory(parent, derived) {
231
215
  const fm = {
232
216
  [FM_INFERRED]: true,
233
- [FM_SOURCE]: parentRef,
217
+ [FM_SOURCE]: parent.ref,
218
+ description: derived.description,
219
+ tags: derived.tags,
220
+ searchHints: derived.searchHints,
221
+ title: derived.title,
222
+ derivedFrom: parent.name,
234
223
  };
235
224
  const yaml = yamlStringify(fm).trimEnd();
236
- return `---\n${yaml}\n---\n\n${fact.trim()}\n`;
225
+ return `---\n${yaml}\n---\n\n# ${derived.title.trim()}\n\n${derived.content.trim()}\n`;
237
226
  }
238
227
  function markParentProcessed(parent) {
239
228
  // Frontmatter-only rewrite of an existing asset: not a new asset write,
@@ -49,7 +49,7 @@ export function isProposedQuality(quality) {
49
49
  export function stashFilePath(dirPath) {
50
50
  return path.join(dirPath, STASH_FILENAME);
51
51
  }
52
- export function loadStashFile(dirPath) {
52
+ export function loadStashFile(dirPath, options) {
53
53
  const filePath = stashFilePath(dirPath);
54
54
  if (!fs.existsSync(filePath))
55
55
  return null;
@@ -61,6 +61,8 @@ export function loadStashFile(dirPath) {
61
61
  for (const e of raw.entries) {
62
62
  const validated = validateStashEntry(e);
63
63
  if (validated) {
64
+ if (options?.requireFilename && !validated.filename)
65
+ continue;
64
66
  entries.push(validated);
65
67
  }
66
68
  else {
@@ -268,6 +270,69 @@ export function applyScopeFrontmatter(entry, fmData) {
268
270
  if (scope)
269
271
  entry.scope = scope;
270
272
  }
273
+ function normalizeIntent(value) {
274
+ if (typeof value !== "object" || value === null || Array.isArray(value))
275
+ return undefined;
276
+ const raw = value;
277
+ const intent = {};
278
+ const when = toStringOrUndefined(raw.when);
279
+ const input = toStringOrUndefined(raw.input);
280
+ const output = toStringOrUndefined(raw.output);
281
+ if (when)
282
+ intent.when = when;
283
+ if (input)
284
+ intent.input = input;
285
+ if (output)
286
+ intent.output = output;
287
+ return Object.keys(intent).length > 0 ? intent : undefined;
288
+ }
289
+ function normalizeStringListOrUndefined(value) {
290
+ return normalizeNonEmptyStringList(value);
291
+ }
292
+ export function applyCuratedFrontmatter(entry, fmData) {
293
+ const description = toStringOrUndefined(fmData.description);
294
+ if (description) {
295
+ entry.description = description;
296
+ entry.source = "frontmatter";
297
+ entry.confidence = 0.9;
298
+ }
299
+ const tags = normalizeStringListOrUndefined(fmData.tags);
300
+ if (tags)
301
+ entry.tags = normalizeTerms(tags);
302
+ const aliases = normalizeStringListOrUndefined(fmData.aliases);
303
+ if (aliases)
304
+ entry.aliases = normalizeTerms(aliases);
305
+ const searchHints = normalizeStringListOrUndefined(fmData.searchHints);
306
+ if (searchHints)
307
+ entry.searchHints = searchHints;
308
+ const usage = normalizeStringListOrUndefined(fmData.usage);
309
+ if (usage)
310
+ entry.usage = usage;
311
+ const examples = normalizeStringListOrUndefined(fmData.examples);
312
+ if (examples)
313
+ entry.examples = examples;
314
+ const run = toStringOrUndefined(fmData.run);
315
+ if (run)
316
+ entry.run = run;
317
+ const setup = toStringOrUndefined(fmData.setup);
318
+ if (setup)
319
+ entry.setup = setup;
320
+ const cwd = toStringOrUndefined(fmData.cwd);
321
+ if (cwd)
322
+ entry.cwd = cwd;
323
+ const quality = toStringOrUndefined(fmData.quality);
324
+ if (quality)
325
+ entry.quality = normalizeQuality(quality);
326
+ const intent = normalizeIntent(fmData.intent);
327
+ if (intent)
328
+ entry.intent = intent;
329
+ if (typeof fmData.scope === "object" && fmData.scope !== null && !Array.isArray(fmData.scope)) {
330
+ const normalizedScope = normalizeScopeObject(fmData.scope);
331
+ if (normalizedScope)
332
+ entry.scope = normalizedScope;
333
+ }
334
+ applyScopeFrontmatter(entry, fmData);
335
+ }
271
336
  function normalizeNonEmptyStringList(value) {
272
337
  if (typeof value === "string") {
273
338
  const trimmed = value.trim();
@@ -439,6 +504,183 @@ function mergeParameters(existing, additional) {
439
504
  }
440
505
  return merged;
441
506
  }
507
+ function splitCommentList(raw) {
508
+ return raw
509
+ .split(/[;,]/)
510
+ .map((item) => item.trim())
511
+ .filter((item) => item.length > 0);
512
+ }
513
+ function parseCommentScope(raw) {
514
+ const pairs = raw
515
+ .split(/[;,]/)
516
+ .map((item) => item.trim())
517
+ .filter((item) => item.length > 0);
518
+ if (pairs.length === 0)
519
+ return undefined;
520
+ const scopeRaw = {};
521
+ for (const pair of pairs) {
522
+ const [keyPart, ...valueParts] = pair.split("=");
523
+ const key = keyPart?.trim();
524
+ const value = valueParts.join("=").trim();
525
+ if (!key || !value)
526
+ continue;
527
+ if (SCOPE_KEYS.includes(key)) {
528
+ scopeRaw[key] = value;
529
+ }
530
+ }
531
+ return normalizeScopeObject(scopeRaw);
532
+ }
533
+ function parseIntentCommentLine(cleaned, metadata) {
534
+ const intentMatch = cleaned.match(/^@intent(?:\.(when|input|output))?\s+(.+)$/);
535
+ if (!intentMatch)
536
+ return false;
537
+ metadata.intent ??= {};
538
+ const value = intentMatch[2].trim();
539
+ const key = intentMatch[1];
540
+ if (key === "when")
541
+ metadata.intent.when = value;
542
+ else if (key === "input")
543
+ metadata.intent.input = value;
544
+ else if (key === "output")
545
+ metadata.intent.output = value;
546
+ else
547
+ metadata.intent.when ??= value;
548
+ return true;
549
+ }
550
+ export function extractCommentMetadata(filePath, content) {
551
+ if (content === undefined) {
552
+ try {
553
+ content = fs.readFileSync(filePath, "utf8");
554
+ }
555
+ catch {
556
+ return undefined;
557
+ }
558
+ }
559
+ const lines = content.split(/\r?\n/).slice(0, 50);
560
+ const metadata = {};
561
+ for (const line of lines) {
562
+ const trimmed = line.trim();
563
+ if (!/^(?:\/\/|#|\/?\*|;|--)/.test(trimmed) && !trimmed.startsWith("'"))
564
+ continue;
565
+ const cleaned = trimmed
566
+ .replace(/^(?:\/\/|##?|\/?\*\*?\/?|;|--|'?)\s*/, "")
567
+ .replace(/\*\/\s*$/, "")
568
+ .trim();
569
+ if (!cleaned)
570
+ continue;
571
+ if (parseIntentCommentLine(cleaned, metadata))
572
+ continue;
573
+ const descMatch = cleaned.match(/^@description\s+(.+)$/);
574
+ if (descMatch) {
575
+ metadata.description = descMatch[1].trim();
576
+ continue;
577
+ }
578
+ const tagsMatch = cleaned.match(/^@tags?\s+(.+)$/);
579
+ if (tagsMatch) {
580
+ metadata.tags = splitCommentList(tagsMatch[1]);
581
+ continue;
582
+ }
583
+ const aliasesMatch = cleaned.match(/^@aliases?\s+(.+)$/);
584
+ if (aliasesMatch) {
585
+ metadata.aliases = splitCommentList(aliasesMatch[1]);
586
+ continue;
587
+ }
588
+ const hintsMatch = cleaned.match(/^@searchHints?\s+(.+)$/);
589
+ if (hintsMatch) {
590
+ metadata.searchHints = splitCommentList(hintsMatch[1]);
591
+ continue;
592
+ }
593
+ const usageMatch = cleaned.match(/^@usage\s+(.+)$/);
594
+ if (usageMatch) {
595
+ metadata.usage = [...(metadata.usage ?? []), usageMatch[1].trim()];
596
+ continue;
597
+ }
598
+ const examplesMatch = cleaned.match(/^@examples?\s+(.+)$/);
599
+ if (examplesMatch) {
600
+ metadata.examples = [...(metadata.examples ?? []), examplesMatch[1].trim()];
601
+ continue;
602
+ }
603
+ const runMatch = cleaned.match(/^@run\s+(.+)$/);
604
+ if (runMatch) {
605
+ metadata.run = runMatch[1].trim();
606
+ continue;
607
+ }
608
+ const setupMatch = cleaned.match(/^@setup\s+(.+)$/);
609
+ if (setupMatch) {
610
+ metadata.setup = setupMatch[1].trim();
611
+ continue;
612
+ }
613
+ const cwdMatch = cleaned.match(/^@cwd\s+(.+)$/);
614
+ if (cwdMatch) {
615
+ metadata.cwd = cwdMatch[1].trim();
616
+ continue;
617
+ }
618
+ const scopeMatch = cleaned.match(/^@scope\s+(.+)$/);
619
+ if (scopeMatch) {
620
+ const scope = parseCommentScope(scopeMatch[1]);
621
+ if (scope)
622
+ metadata.scope = scope;
623
+ }
624
+ }
625
+ return Object.keys(metadata).length > 0 ? metadata : undefined;
626
+ }
627
+ export function applyCommentMetadata(entry, metadata) {
628
+ if (!metadata)
629
+ return;
630
+ let usedCommentMetadata = false;
631
+ if (metadata.description && !entry.description) {
632
+ entry.description = metadata.description;
633
+ usedCommentMetadata = true;
634
+ }
635
+ if (metadata.tags?.length && (!entry.tags || entry.tags.length === 0)) {
636
+ entry.tags = normalizeTerms(metadata.tags);
637
+ usedCommentMetadata = true;
638
+ }
639
+ if (metadata.aliases?.length) {
640
+ entry.aliases = normalizeTerms(metadata.aliases);
641
+ usedCommentMetadata = true;
642
+ }
643
+ if (metadata.searchHints?.length) {
644
+ entry.searchHints = metadata.searchHints;
645
+ usedCommentMetadata = true;
646
+ }
647
+ if (metadata.usage?.length) {
648
+ entry.usage = metadata.usage;
649
+ usedCommentMetadata = true;
650
+ }
651
+ if (metadata.examples?.length) {
652
+ entry.examples = metadata.examples;
653
+ usedCommentMetadata = true;
654
+ }
655
+ if (metadata.intent && Object.keys(metadata.intent).length > 0) {
656
+ entry.intent = metadata.intent;
657
+ usedCommentMetadata = true;
658
+ }
659
+ if (metadata.run) {
660
+ entry.run = metadata.run;
661
+ usedCommentMetadata = true;
662
+ }
663
+ if (metadata.setup) {
664
+ entry.setup = metadata.setup;
665
+ usedCommentMetadata = true;
666
+ }
667
+ if (metadata.cwd) {
668
+ entry.cwd = metadata.cwd;
669
+ usedCommentMetadata = true;
670
+ }
671
+ if (metadata.scope) {
672
+ entry.scope = metadata.scope;
673
+ usedCommentMetadata = true;
674
+ }
675
+ if (usedCommentMetadata && entry.source !== "frontmatter" && entry.source !== "manual") {
676
+ entry.source = "comments";
677
+ entry.confidence = Math.max(entry.confidence ?? 0, 0.7);
678
+ }
679
+ }
680
+ function mergeAliases(existing, generated) {
681
+ const merged = normalizeTerms([...(existing ?? []), ...generated]);
682
+ return merged.length > 0 ? merged : undefined;
683
+ }
442
684
  // ── Metadata Generation ─────────────────────────────────────────────────────
443
685
  export async function generateMetadata(dirPath, assetType, files, typeRoot = dirPath) {
444
686
  const entries = [];
@@ -473,20 +715,13 @@ export async function generateMetadata(dirPath, assetType, files, typeRoot = dir
473
715
  if (ext === ".md") {
474
716
  const content = fs.readFileSync(file, "utf8");
475
717
  const parsed = parseFrontmatter(content);
476
- const fm = toStringOrUndefined(parsed.data.description);
477
- if (fm) {
478
- entry.description = fm;
479
- entry.source = "frontmatter";
480
- entry.confidence = 0.9;
481
- }
718
+ applyCuratedFrontmatter(entry, parsed.data);
482
719
  // Extract parameters from frontmatter params: key
483
720
  const fmParams = extractFrontmatterParameters(parsed.data);
484
721
  if (fmParams)
485
722
  entry.parameters = fmParams;
486
723
  // Pass wiki-pattern frontmatter through onto the entry
487
724
  applyWikiFrontmatter(entry, parsed.data);
488
- // Pass canonical scope_* frontmatter through onto the entry
489
- applyScopeFrontmatter(entry, parsed.data);
490
725
  // Extract parameters from template placeholders ($1, $ARGUMENTS, {{named}})
491
726
  if (entry.type === "command") {
492
727
  const cmdParams = extractCommandParameters(parsed.content);
@@ -500,9 +735,11 @@ export async function generateMetadata(dirPath, assetType, files, typeRoot = dir
500
735
  // and must never be parsed for @param or any other metadata that could
501
736
  // embed a value into the entry.
502
737
  if (ext !== ".md" && assetType !== "vault") {
503
- const scriptParams = extractScriptParameters(file);
738
+ const content = fs.readFileSync(file, "utf8");
739
+ const scriptParams = extractScriptParameters(file, content);
504
740
  if (scriptParams)
505
741
  entry.parameters = scriptParams;
742
+ applyCommentMetadata(entry, extractCommentMetadata(file, content));
506
743
  }
507
744
  // Priority 3: Type-specific metadata extraction (e.g. TOC for knowledge, comments for scripts)
508
745
  const fileCtx = buildFileContext(typeRoot, file);
@@ -530,7 +767,7 @@ export async function generateMetadata(dirPath, assetType, files, typeRoot = dir
530
767
  entry.tags = extractTagsFromPath(file, dirPath);
531
768
  }
532
769
  entry.tags = normalizeTerms(entry.tags ?? []);
533
- entry.aliases = buildAliases(canonicalName, entry.tags);
770
+ entry.aliases = mergeAliases(entry.aliases, buildAliases(canonicalName, entry.tags));
534
771
  // Search hints are only generated when LLM is configured (via enhanceStashWithLlm)
535
772
  // Heuristic search hints are too noisy to be useful for search quality
536
773
  entry.filename = path.basename(file);
@@ -591,20 +828,13 @@ export async function generateMetadataFlat(stashRoot, files) {
591
828
  if (ext === ".md") {
592
829
  const content = ctx.content();
593
830
  const parsed = parseFrontmatter(content);
594
- const fm = toStringOrUndefined(parsed.data.description);
595
- if (fm) {
596
- entry.description = fm;
597
- entry.source = "frontmatter";
598
- entry.confidence = 0.9;
599
- }
831
+ applyCuratedFrontmatter(entry, parsed.data);
600
832
  // Extract parameters from frontmatter params: key
601
833
  const fmParams = extractFrontmatterParameters(parsed.data);
602
834
  if (fmParams)
603
835
  entry.parameters = fmParams;
604
836
  // Pass wiki-pattern frontmatter through onto the entry
605
837
  applyWikiFrontmatter(entry, parsed.data);
606
- // Pass canonical scope_* frontmatter through onto the entry
607
- applyScopeFrontmatter(entry, parsed.data);
608
838
  // Extract parameters from template placeholders ($1, $ARGUMENTS, {{named}})
609
839
  if (entry.type === "command") {
610
840
  const cmdParams = extractCommandParameters(parsed.content);
@@ -618,9 +848,11 @@ export async function generateMetadataFlat(stashRoot, files) {
618
848
  // and must never be parsed for @param or any other metadata that could
619
849
  // embed a value into the entry.
620
850
  if (ext !== ".md" && assetType !== "vault") {
621
- const scriptParams = extractScriptParameters(file, ctx.content());
851
+ const content = ctx.content();
852
+ const scriptParams = extractScriptParameters(file, content);
622
853
  if (scriptParams)
623
854
  entry.parameters = scriptParams;
855
+ applyCommentMetadata(entry, extractCommentMetadata(file, content));
624
856
  }
625
857
  // Renderer metadata extraction
626
858
  const renderer = await getRenderer(match.renderer);
@@ -644,7 +876,7 @@ export async function generateMetadataFlat(stashRoot, files) {
644
876
  entry.tags = extractTagsFromPath(file, dirPath);
645
877
  }
646
878
  entry.tags = normalizeTerms(entry.tags ?? []);
647
- entry.aliases = buildAliases(canonicalName, entry.tags);
879
+ entry.aliases = mergeAliases(entry.aliases, buildAliases(canonicalName, entry.tags));
648
880
  entry.filename = path.basename(file);
649
881
  entries.push(entry);
650
882
  }
@@ -8,7 +8,7 @@ import { resolveSourceProviderFactory } from "../sources/provider-factory";
8
8
  import "../sources/providers/index";
9
9
  import { warn } from "../core/warn";
10
10
  import { ensureGitMirror, getCachePaths, parseGitRepoUrl } from "../sources/providers/git";
11
- import { ensureWebsiteMirror } from "../sources/providers/website";
11
+ import { ensureWebsiteMirror } from "../sources/website-ingest";
12
12
  // Legacy "context-hub" / "github" type aliases are normalized to "git" at
13
13
  // config-load time (see src/config.ts), so this set only contains the canonical
14
14
  // type.
@@ -121,7 +121,8 @@ function resolveEntryContentDir(entry) {
121
121
  // that subdirectory. This is a content-layout convention, not a provider
122
122
  // capability — keep it here.
123
123
  if (GIT_STASH_TYPES.has(entry.type)) {
124
- return path.join(dir, "content");
124
+ const contentDir = path.join(dir, "content");
125
+ return isValidDirectory(contentDir) ? contentDir : dir;
125
126
  }
126
127
  return dir;
127
128
  }
@@ -222,8 +223,9 @@ function isValidDirectory(dir) {
222
223
  * `resolveSourceEntries()` so the content directories pass the
223
224
  * `isValidDirectory()` check.
224
225
  */
225
- export async function ensureSourceCaches(config) {
226
+ export async function ensureSourceCaches(config, options) {
226
227
  const cfg = config ?? loadConfig();
228
+ const force = options?.force === true;
227
229
  // Use sources[] (current key) with fallback to stashes[] (deprecated, one-release compat).
228
230
  const entries = cfg.sources ?? cfg.stashes ?? [];
229
231
  for (const entry of entries) {
@@ -232,7 +234,11 @@ export async function ensureSourceCaches(config) {
232
234
  try {
233
235
  const repo = parseGitRepoUrl(entry.url);
234
236
  const cachePaths = getCachePaths(repo.canonicalUrl);
235
- await ensureGitMirror(repo, cachePaths, { requireRepoDir: true, writable: entry.writable === true });
237
+ await ensureGitMirror(repo, cachePaths, {
238
+ requireRepoDir: true,
239
+ writable: entry.writable === true,
240
+ force,
241
+ });
236
242
  }
237
243
  catch (err) {
238
244
  warn(`Warning: failed to refresh git mirror for "${entry.url}": ${err instanceof Error ? err.message : String(err)}`);
@@ -242,7 +248,7 @@ export async function ensureSourceCaches(config) {
242
248
  if (entry.type !== "website" || !entry.url || entry.enabled === false)
243
249
  continue;
244
250
  try {
245
- await ensureWebsiteMirror(entry, { requireStashDir: true });
251
+ await ensureWebsiteMirror(entry, { requireStashDir: true, force });
246
252
  }
247
253
  catch (err) {
248
254
  warn(`Warning: failed to refresh website stash for "${entry.url}": ${err instanceof Error ? err.message : String(err)}`);