akm-cli 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/dist/cli.js +62 -16
- package/dist/commands/history.js +2 -7
- package/dist/commands/info.js +2 -2
- package/dist/commands/installed-stashes.js +45 -1
- package/dist/commands/search.js +2 -2
- package/dist/commands/show.js +4 -19
- package/dist/commands/source-add.js +1 -1
- package/dist/core/common.js +16 -1
- package/dist/core/config.js +18 -3
- package/dist/indexer/db-search.js +33 -39
- package/dist/indexer/db.js +51 -1
- package/dist/indexer/graph-extraction.js +5 -3
- package/dist/indexer/indexer.js +334 -121
- package/dist/indexer/manifest.js +18 -23
- package/dist/indexer/memory-inference.js +47 -58
- package/dist/indexer/metadata.js +253 -21
- package/dist/indexer/search-source.js +11 -5
- package/dist/llm/client.js +61 -1
- package/dist/llm/embedder.js +8 -5
- package/dist/llm/embedders/local.js +8 -2
- package/dist/llm/embedders/remote.js +4 -2
- package/dist/llm/graph-extract.js +4 -4
- package/dist/llm/memory-infer.js +61 -33
- package/dist/llm/metadata-enhance.js +2 -2
- package/dist/output/cli-hints.js +5 -2
- package/dist/output/renderers.js +22 -49
- package/dist/registry/build-index.js +13 -18
- package/dist/setup/setup.js +238 -96
- package/dist/sources/providers/git.js +14 -2
- package/dist/sources/providers/website.js +4 -460
- package/dist/sources/website-ingest.js +470 -0
- package/dist/wiki/wiki.js +11 -1
- package/dist/workflows/parser.js +19 -4
- package/dist/workflows/runs.js +3 -3
- package/docs/README.md +10 -3
- package/docs/migration/release-notes/0.7.0.md +22 -0
- package/package.json +5 -2
package/dist/indexer/manifest.js
CHANGED
|
@@ -14,7 +14,7 @@ import { resolveStashDir } from "../core/common";
|
|
|
14
14
|
import { loadConfig } from "../core/config";
|
|
15
15
|
import { getDbPath } from "../core/paths";
|
|
16
16
|
import { warn } from "../core/warn";
|
|
17
|
-
import { closeDatabase, getAllEntries, getEntryCount, getMeta,
|
|
17
|
+
import { closeDatabase, getAllEntries, getEntryCount, getMeta, openExistingDatabase } from "./db";
|
|
18
18
|
import { generateMetadataFlat, loadStashFile } from "./metadata";
|
|
19
19
|
import { resolveSourceEntries } from "./search-source";
|
|
20
20
|
import { walkStashFlat } from "./walker";
|
|
@@ -57,13 +57,12 @@ function toManifestEntry(entry, filePath, stashDir, registryId) {
|
|
|
57
57
|
/**
|
|
58
58
|
* Get the manifest from the database (fast path).
|
|
59
59
|
*/
|
|
60
|
-
function getManifestFromDb(stashDir,
|
|
60
|
+
function getManifestFromDb(stashDir, _config, sources, type) {
|
|
61
61
|
const dbPath = getDbPath();
|
|
62
62
|
try {
|
|
63
63
|
if (!fs.existsSync(dbPath))
|
|
64
64
|
return null;
|
|
65
|
-
const
|
|
66
|
-
const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
|
|
65
|
+
const db = openExistingDatabase(dbPath);
|
|
67
66
|
try {
|
|
68
67
|
const entryCount = getEntryCount(db);
|
|
69
68
|
const storedStashDir = getMeta(db, "stashDir");
|
|
@@ -113,29 +112,21 @@ async function getManifestFromWalker(sources, type) {
|
|
|
113
112
|
dirGroups.set(ctx.parentDirAbs, [ctx.absPath]);
|
|
114
113
|
}
|
|
115
114
|
for (const [dirPath, files] of dirGroups) {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
stash = { entries: [...stash.entries, ...generated.entries] };
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
else {
|
|
129
|
-
const generated = await generateMetadataFlat(currentStashDir, files);
|
|
130
|
-
if (generated.entries.length === 0)
|
|
131
|
-
continue;
|
|
132
|
-
stash = generated;
|
|
133
|
-
}
|
|
115
|
+
const generated = await generateMetadataFlat(currentStashDir, files);
|
|
116
|
+
const legacyOverrides = loadStashFile(dirPath, { requireFilename: true });
|
|
117
|
+
const mergedEntries = legacyOverrides
|
|
118
|
+
? generated.entries.map((entry) => mergeLegacyEntry(entry, legacyOverrides.entries))
|
|
119
|
+
: generated.entries;
|
|
120
|
+
const stash = mergedEntries.length > 0 ? { entries: mergedEntries } : legacyOverrides;
|
|
121
|
+
if (!stash || stash.entries.length === 0)
|
|
122
|
+
continue;
|
|
134
123
|
const source = sources.find((s) => dirPath.startsWith(path.resolve(s.path) + path.sep));
|
|
135
124
|
for (const stashEntry of stash.entries) {
|
|
136
125
|
if (type && type !== "any" && stashEntry.type !== type)
|
|
137
126
|
continue;
|
|
138
|
-
|
|
127
|
+
if (!stashEntry.filename)
|
|
128
|
+
continue;
|
|
129
|
+
const entryPath = path.join(dirPath, stashEntry.filename);
|
|
139
130
|
const manifestEntry = toManifestEntry(stashEntry, entryPath, currentStashDir, source?.registryId);
|
|
140
131
|
if (manifestEntry)
|
|
141
132
|
entries.push(manifestEntry);
|
|
@@ -144,6 +135,10 @@ async function getManifestFromWalker(sources, type) {
|
|
|
144
135
|
}
|
|
145
136
|
return entries;
|
|
146
137
|
}
|
|
138
|
+
function mergeLegacyEntry(entry, legacyEntries) {
|
|
139
|
+
const legacy = legacyEntries.find((candidate) => candidate.filename === entry.filename);
|
|
140
|
+
return legacy ? { ...entry, ...legacy, filename: entry.filename } : entry;
|
|
141
|
+
}
|
|
147
142
|
/**
|
|
148
143
|
* Generate a compact manifest of all assets in the stash.
|
|
149
144
|
*
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Memory inference pass for `akm index` (#201).
|
|
3
3
|
*
|
|
4
|
-
* Detects memories pending inference, asks the configured LLM to
|
|
5
|
-
* into
|
|
6
|
-
* frontmatter `inferred: true` + a `source:` backref to the
|
|
4
|
+
* Detects memories pending inference, asks the configured LLM to compress each
|
|
5
|
+
* into one higher-signal derived memory, and writes the result back as a new
|
|
6
|
+
* memory file with frontmatter `inferred: true` + a `source:` backref to the
|
|
7
|
+
* parent memory.
|
|
7
8
|
*
|
|
8
9
|
* Pending predicate (see {@link isPendingMemory}):
|
|
9
10
|
* - File lives under `<stashRoot>/memories/` and ends in `.md`.
|
|
@@ -36,7 +37,7 @@ import { parseFrontmatter, parseFrontmatterBlock } from "../core/frontmatter";
|
|
|
36
37
|
import { warn } from "../core/warn";
|
|
37
38
|
import { writeAssetToSource } from "../core/write-source";
|
|
38
39
|
import { resolveIndexPassLLM } from "../llm/index-passes";
|
|
39
|
-
import {
|
|
40
|
+
import { compressMemoryToDerivedMemory } from "../llm/memory-infer";
|
|
40
41
|
/**
|
|
41
42
|
* Frontmatter keys this pass cares about. Constants so a future rename only
|
|
42
43
|
* needs to touch one site.
|
|
@@ -59,8 +60,8 @@ const FM_SOURCE = "source";
|
|
|
59
60
|
* Both must allow the call for the pass to run. Either set to `false`
|
|
60
61
|
* short-circuits to a no-op result.
|
|
61
62
|
*/
|
|
62
|
-
export async function runMemoryInferencePass(config, sources) {
|
|
63
|
-
const
|
|
63
|
+
export async function runMemoryInferencePass(config, sources, signal) {
|
|
64
|
+
const result = {
|
|
64
65
|
considered: 0,
|
|
65
66
|
splitParents: 0,
|
|
66
67
|
writtenFacts: 0,
|
|
@@ -69,38 +70,40 @@ export async function runMemoryInferencePass(config, sources) {
|
|
|
69
70
|
// Gate 1 — locked feature flag (§14). Defaults to enabled; only an
|
|
70
71
|
// explicit `false` disables the pass entirely.
|
|
71
72
|
if (config.llm?.features?.memory_inference === false)
|
|
72
|
-
return
|
|
73
|
+
return result;
|
|
73
74
|
// Gate 2 — per-pass opt-out (#208). Returns the resolved llm config or
|
|
74
75
|
// `undefined` when the pass should not run.
|
|
75
76
|
const llmConfig = resolveIndexPassLLM("memory", config);
|
|
76
77
|
if (!llmConfig)
|
|
77
|
-
return
|
|
78
|
+
return result;
|
|
78
79
|
// The pass only writes to the primary (working) stash. Read-only caches
|
|
79
80
|
// (git, npm, website) are deliberately untouched — writing inferred
|
|
80
81
|
// children there would be clobbered by the next sync().
|
|
81
82
|
const primary = sources[0];
|
|
82
83
|
if (!primary)
|
|
83
|
-
return
|
|
84
|
+
return result;
|
|
84
85
|
const pending = collectPendingMemories(primary.path);
|
|
85
|
-
|
|
86
|
+
result.considered = pending.length;
|
|
86
87
|
if (pending.length === 0)
|
|
87
|
-
return
|
|
88
|
+
return result;
|
|
88
89
|
for (const record of pending) {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
if (signal?.aborted)
|
|
91
|
+
return result;
|
|
92
|
+
const derived = await compressMemoryToDerivedMemory(llmConfig, record.body, signal);
|
|
93
|
+
if (!derived) {
|
|
94
|
+
result.skippedNoFacts += 1;
|
|
92
95
|
// Intentionally NOT marked processed — a transient LLM failure should
|
|
93
96
|
// be retried on the next index run.
|
|
94
97
|
continue;
|
|
95
98
|
}
|
|
96
|
-
const written = await
|
|
99
|
+
const written = await writeDerivedMemory(record, derived);
|
|
97
100
|
if (written > 0) {
|
|
98
101
|
markParentProcessed(record);
|
|
99
|
-
|
|
100
|
-
|
|
102
|
+
result.splitParents += 1;
|
|
103
|
+
result.writtenFacts += written;
|
|
101
104
|
}
|
|
102
105
|
}
|
|
103
|
-
return
|
|
106
|
+
return result;
|
|
104
107
|
}
|
|
105
108
|
// ── Pending detection ───────────────────────────────────────────────────────
|
|
106
109
|
/**
|
|
@@ -133,6 +136,7 @@ export function collectPendingMemories(stashRoot) {
|
|
|
133
136
|
ref: `memory:${relName}`,
|
|
134
137
|
data: parsed.data,
|
|
135
138
|
body: parsed.content,
|
|
139
|
+
name: relName,
|
|
136
140
|
});
|
|
137
141
|
}
|
|
138
142
|
return out;
|
|
@@ -177,19 +181,8 @@ function toMemoryName(memoriesDir, filePath) {
|
|
|
177
181
|
// user has organised under memories/.
|
|
178
182
|
return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
|
|
179
183
|
}
|
|
180
|
-
// ── Writing
|
|
181
|
-
async function
|
|
182
|
-
const memoriesDir = path.join(parent.stashRoot, "memories");
|
|
183
|
-
// Sibling directory layout: <parentDir>/<parentBase>.facts/fact-N.md
|
|
184
|
-
// Keeps facts grouped near the parent without polluting the top level.
|
|
185
|
-
const parentRel = path.relative(memoriesDir, parent.filePath).replace(/\\/g, "/");
|
|
186
|
-
const parentBase = parentRel.replace(/\.md$/i, "");
|
|
187
|
-
const factsDirRel = `${parentBase}.facts`;
|
|
188
|
-
// Children are routed through writeAssetToSource — the single dispatch
|
|
189
|
-
// point for kind-branching writes (CLAUDE.md / spec §10 step 5). Memory
|
|
190
|
-
// assets resolve to `<source.path>/memories/<name>.md`, so a child name
|
|
191
|
-
// of `<parentBase>.facts/fact-N` lands at exactly the documented child
|
|
192
|
-
// path scheme.
|
|
184
|
+
// ── Writing derived memories + marking parent ───────────────────────────────
|
|
185
|
+
async function writeDerivedMemory(parent, derived) {
|
|
193
186
|
const writeTarget = {
|
|
194
187
|
kind: "filesystem",
|
|
195
188
|
name: "stash",
|
|
@@ -201,39 +194,35 @@ async function writeAtomicChildren(parent, facts) {
|
|
|
201
194
|
path: parent.stashRoot,
|
|
202
195
|
writable: true,
|
|
203
196
|
};
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
const content = renderChildMemory(fact, parent.ref);
|
|
220
|
-
const childRef = parseAssetRef(childRefStr);
|
|
221
|
-
await writeAssetToSource(writeTarget, writeConfig, childRef, content);
|
|
222
|
-
written += 1;
|
|
223
|
-
}
|
|
224
|
-
catch (err) {
|
|
225
|
-
warn(`memory inference: failed to write atomic child ${childName}: ${err instanceof Error ? err.message : String(err)}`);
|
|
226
|
-
}
|
|
197
|
+
const childName = `${parent.name}.derived`;
|
|
198
|
+
const childRefStr = `memory:${childName}`;
|
|
199
|
+
const childPath = path.join(parent.stashRoot, "memories", `${childName}.md`);
|
|
200
|
+
if (fs.existsSync(childPath)) {
|
|
201
|
+
return 0;
|
|
202
|
+
}
|
|
203
|
+
try {
|
|
204
|
+
const content = renderDerivedMemory(parent, derived);
|
|
205
|
+
const childRef = parseAssetRef(childRefStr);
|
|
206
|
+
await writeAssetToSource(writeTarget, writeConfig, childRef, content);
|
|
207
|
+
return 1;
|
|
208
|
+
}
|
|
209
|
+
catch (err) {
|
|
210
|
+
warn(`memory inference: failed to write derived memory ${childName}: ${err instanceof Error ? err.message : String(err)}`);
|
|
211
|
+
return 0;
|
|
227
212
|
}
|
|
228
|
-
return written;
|
|
229
213
|
}
|
|
230
|
-
function
|
|
214
|
+
function renderDerivedMemory(parent, derived) {
|
|
231
215
|
const fm = {
|
|
232
216
|
[FM_INFERRED]: true,
|
|
233
|
-
[FM_SOURCE]:
|
|
217
|
+
[FM_SOURCE]: parent.ref,
|
|
218
|
+
description: derived.description,
|
|
219
|
+
tags: derived.tags,
|
|
220
|
+
searchHints: derived.searchHints,
|
|
221
|
+
title: derived.title,
|
|
222
|
+
derivedFrom: parent.name,
|
|
234
223
|
};
|
|
235
224
|
const yaml = yamlStringify(fm).trimEnd();
|
|
236
|
-
return `---\n${yaml}\n---\n\n${
|
|
225
|
+
return `---\n${yaml}\n---\n\n# ${derived.title.trim()}\n\n${derived.content.trim()}\n`;
|
|
237
226
|
}
|
|
238
227
|
function markParentProcessed(parent) {
|
|
239
228
|
// Frontmatter-only rewrite of an existing asset: not a new asset write,
|
package/dist/indexer/metadata.js
CHANGED
|
@@ -49,7 +49,7 @@ export function isProposedQuality(quality) {
|
|
|
49
49
|
export function stashFilePath(dirPath) {
|
|
50
50
|
return path.join(dirPath, STASH_FILENAME);
|
|
51
51
|
}
|
|
52
|
-
export function loadStashFile(dirPath) {
|
|
52
|
+
export function loadStashFile(dirPath, options) {
|
|
53
53
|
const filePath = stashFilePath(dirPath);
|
|
54
54
|
if (!fs.existsSync(filePath))
|
|
55
55
|
return null;
|
|
@@ -61,6 +61,8 @@ export function loadStashFile(dirPath) {
|
|
|
61
61
|
for (const e of raw.entries) {
|
|
62
62
|
const validated = validateStashEntry(e);
|
|
63
63
|
if (validated) {
|
|
64
|
+
if (options?.requireFilename && !validated.filename)
|
|
65
|
+
continue;
|
|
64
66
|
entries.push(validated);
|
|
65
67
|
}
|
|
66
68
|
else {
|
|
@@ -268,6 +270,69 @@ export function applyScopeFrontmatter(entry, fmData) {
|
|
|
268
270
|
if (scope)
|
|
269
271
|
entry.scope = scope;
|
|
270
272
|
}
|
|
273
|
+
function normalizeIntent(value) {
|
|
274
|
+
if (typeof value !== "object" || value === null || Array.isArray(value))
|
|
275
|
+
return undefined;
|
|
276
|
+
const raw = value;
|
|
277
|
+
const intent = {};
|
|
278
|
+
const when = toStringOrUndefined(raw.when);
|
|
279
|
+
const input = toStringOrUndefined(raw.input);
|
|
280
|
+
const output = toStringOrUndefined(raw.output);
|
|
281
|
+
if (when)
|
|
282
|
+
intent.when = when;
|
|
283
|
+
if (input)
|
|
284
|
+
intent.input = input;
|
|
285
|
+
if (output)
|
|
286
|
+
intent.output = output;
|
|
287
|
+
return Object.keys(intent).length > 0 ? intent : undefined;
|
|
288
|
+
}
|
|
289
|
+
function normalizeStringListOrUndefined(value) {
|
|
290
|
+
return normalizeNonEmptyStringList(value);
|
|
291
|
+
}
|
|
292
|
+
export function applyCuratedFrontmatter(entry, fmData) {
|
|
293
|
+
const description = toStringOrUndefined(fmData.description);
|
|
294
|
+
if (description) {
|
|
295
|
+
entry.description = description;
|
|
296
|
+
entry.source = "frontmatter";
|
|
297
|
+
entry.confidence = 0.9;
|
|
298
|
+
}
|
|
299
|
+
const tags = normalizeStringListOrUndefined(fmData.tags);
|
|
300
|
+
if (tags)
|
|
301
|
+
entry.tags = normalizeTerms(tags);
|
|
302
|
+
const aliases = normalizeStringListOrUndefined(fmData.aliases);
|
|
303
|
+
if (aliases)
|
|
304
|
+
entry.aliases = normalizeTerms(aliases);
|
|
305
|
+
const searchHints = normalizeStringListOrUndefined(fmData.searchHints);
|
|
306
|
+
if (searchHints)
|
|
307
|
+
entry.searchHints = searchHints;
|
|
308
|
+
const usage = normalizeStringListOrUndefined(fmData.usage);
|
|
309
|
+
if (usage)
|
|
310
|
+
entry.usage = usage;
|
|
311
|
+
const examples = normalizeStringListOrUndefined(fmData.examples);
|
|
312
|
+
if (examples)
|
|
313
|
+
entry.examples = examples;
|
|
314
|
+
const run = toStringOrUndefined(fmData.run);
|
|
315
|
+
if (run)
|
|
316
|
+
entry.run = run;
|
|
317
|
+
const setup = toStringOrUndefined(fmData.setup);
|
|
318
|
+
if (setup)
|
|
319
|
+
entry.setup = setup;
|
|
320
|
+
const cwd = toStringOrUndefined(fmData.cwd);
|
|
321
|
+
if (cwd)
|
|
322
|
+
entry.cwd = cwd;
|
|
323
|
+
const quality = toStringOrUndefined(fmData.quality);
|
|
324
|
+
if (quality)
|
|
325
|
+
entry.quality = normalizeQuality(quality);
|
|
326
|
+
const intent = normalizeIntent(fmData.intent);
|
|
327
|
+
if (intent)
|
|
328
|
+
entry.intent = intent;
|
|
329
|
+
if (typeof fmData.scope === "object" && fmData.scope !== null && !Array.isArray(fmData.scope)) {
|
|
330
|
+
const normalizedScope = normalizeScopeObject(fmData.scope);
|
|
331
|
+
if (normalizedScope)
|
|
332
|
+
entry.scope = normalizedScope;
|
|
333
|
+
}
|
|
334
|
+
applyScopeFrontmatter(entry, fmData);
|
|
335
|
+
}
|
|
271
336
|
function normalizeNonEmptyStringList(value) {
|
|
272
337
|
if (typeof value === "string") {
|
|
273
338
|
const trimmed = value.trim();
|
|
@@ -439,6 +504,183 @@ function mergeParameters(existing, additional) {
|
|
|
439
504
|
}
|
|
440
505
|
return merged;
|
|
441
506
|
}
|
|
507
|
+
function splitCommentList(raw) {
|
|
508
|
+
return raw
|
|
509
|
+
.split(/[;,]/)
|
|
510
|
+
.map((item) => item.trim())
|
|
511
|
+
.filter((item) => item.length > 0);
|
|
512
|
+
}
|
|
513
|
+
function parseCommentScope(raw) {
|
|
514
|
+
const pairs = raw
|
|
515
|
+
.split(/[;,]/)
|
|
516
|
+
.map((item) => item.trim())
|
|
517
|
+
.filter((item) => item.length > 0);
|
|
518
|
+
if (pairs.length === 0)
|
|
519
|
+
return undefined;
|
|
520
|
+
const scopeRaw = {};
|
|
521
|
+
for (const pair of pairs) {
|
|
522
|
+
const [keyPart, ...valueParts] = pair.split("=");
|
|
523
|
+
const key = keyPart?.trim();
|
|
524
|
+
const value = valueParts.join("=").trim();
|
|
525
|
+
if (!key || !value)
|
|
526
|
+
continue;
|
|
527
|
+
if (SCOPE_KEYS.includes(key)) {
|
|
528
|
+
scopeRaw[key] = value;
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
return normalizeScopeObject(scopeRaw);
|
|
532
|
+
}
|
|
533
|
+
function parseIntentCommentLine(cleaned, metadata) {
|
|
534
|
+
const intentMatch = cleaned.match(/^@intent(?:\.(when|input|output))?\s+(.+)$/);
|
|
535
|
+
if (!intentMatch)
|
|
536
|
+
return false;
|
|
537
|
+
metadata.intent ??= {};
|
|
538
|
+
const value = intentMatch[2].trim();
|
|
539
|
+
const key = intentMatch[1];
|
|
540
|
+
if (key === "when")
|
|
541
|
+
metadata.intent.when = value;
|
|
542
|
+
else if (key === "input")
|
|
543
|
+
metadata.intent.input = value;
|
|
544
|
+
else if (key === "output")
|
|
545
|
+
metadata.intent.output = value;
|
|
546
|
+
else
|
|
547
|
+
metadata.intent.when ??= value;
|
|
548
|
+
return true;
|
|
549
|
+
}
|
|
550
|
+
export function extractCommentMetadata(filePath, content) {
|
|
551
|
+
if (content === undefined) {
|
|
552
|
+
try {
|
|
553
|
+
content = fs.readFileSync(filePath, "utf8");
|
|
554
|
+
}
|
|
555
|
+
catch {
|
|
556
|
+
return undefined;
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
const lines = content.split(/\r?\n/).slice(0, 50);
|
|
560
|
+
const metadata = {};
|
|
561
|
+
for (const line of lines) {
|
|
562
|
+
const trimmed = line.trim();
|
|
563
|
+
if (!/^(?:\/\/|#|\/?\*|;|--)/.test(trimmed) && !trimmed.startsWith("'"))
|
|
564
|
+
continue;
|
|
565
|
+
const cleaned = trimmed
|
|
566
|
+
.replace(/^(?:\/\/|##?|\/?\*\*?\/?|;|--|'?)\s*/, "")
|
|
567
|
+
.replace(/\*\/\s*$/, "")
|
|
568
|
+
.trim();
|
|
569
|
+
if (!cleaned)
|
|
570
|
+
continue;
|
|
571
|
+
if (parseIntentCommentLine(cleaned, metadata))
|
|
572
|
+
continue;
|
|
573
|
+
const descMatch = cleaned.match(/^@description\s+(.+)$/);
|
|
574
|
+
if (descMatch) {
|
|
575
|
+
metadata.description = descMatch[1].trim();
|
|
576
|
+
continue;
|
|
577
|
+
}
|
|
578
|
+
const tagsMatch = cleaned.match(/^@tags?\s+(.+)$/);
|
|
579
|
+
if (tagsMatch) {
|
|
580
|
+
metadata.tags = splitCommentList(tagsMatch[1]);
|
|
581
|
+
continue;
|
|
582
|
+
}
|
|
583
|
+
const aliasesMatch = cleaned.match(/^@aliases?\s+(.+)$/);
|
|
584
|
+
if (aliasesMatch) {
|
|
585
|
+
metadata.aliases = splitCommentList(aliasesMatch[1]);
|
|
586
|
+
continue;
|
|
587
|
+
}
|
|
588
|
+
const hintsMatch = cleaned.match(/^@searchHints?\s+(.+)$/);
|
|
589
|
+
if (hintsMatch) {
|
|
590
|
+
metadata.searchHints = splitCommentList(hintsMatch[1]);
|
|
591
|
+
continue;
|
|
592
|
+
}
|
|
593
|
+
const usageMatch = cleaned.match(/^@usage\s+(.+)$/);
|
|
594
|
+
if (usageMatch) {
|
|
595
|
+
metadata.usage = [...(metadata.usage ?? []), usageMatch[1].trim()];
|
|
596
|
+
continue;
|
|
597
|
+
}
|
|
598
|
+
const examplesMatch = cleaned.match(/^@examples?\s+(.+)$/);
|
|
599
|
+
if (examplesMatch) {
|
|
600
|
+
metadata.examples = [...(metadata.examples ?? []), examplesMatch[1].trim()];
|
|
601
|
+
continue;
|
|
602
|
+
}
|
|
603
|
+
const runMatch = cleaned.match(/^@run\s+(.+)$/);
|
|
604
|
+
if (runMatch) {
|
|
605
|
+
metadata.run = runMatch[1].trim();
|
|
606
|
+
continue;
|
|
607
|
+
}
|
|
608
|
+
const setupMatch = cleaned.match(/^@setup\s+(.+)$/);
|
|
609
|
+
if (setupMatch) {
|
|
610
|
+
metadata.setup = setupMatch[1].trim();
|
|
611
|
+
continue;
|
|
612
|
+
}
|
|
613
|
+
const cwdMatch = cleaned.match(/^@cwd\s+(.+)$/);
|
|
614
|
+
if (cwdMatch) {
|
|
615
|
+
metadata.cwd = cwdMatch[1].trim();
|
|
616
|
+
continue;
|
|
617
|
+
}
|
|
618
|
+
const scopeMatch = cleaned.match(/^@scope\s+(.+)$/);
|
|
619
|
+
if (scopeMatch) {
|
|
620
|
+
const scope = parseCommentScope(scopeMatch[1]);
|
|
621
|
+
if (scope)
|
|
622
|
+
metadata.scope = scope;
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
return Object.keys(metadata).length > 0 ? metadata : undefined;
|
|
626
|
+
}
|
|
627
|
+
export function applyCommentMetadata(entry, metadata) {
|
|
628
|
+
if (!metadata)
|
|
629
|
+
return;
|
|
630
|
+
let usedCommentMetadata = false;
|
|
631
|
+
if (metadata.description && !entry.description) {
|
|
632
|
+
entry.description = metadata.description;
|
|
633
|
+
usedCommentMetadata = true;
|
|
634
|
+
}
|
|
635
|
+
if (metadata.tags?.length && (!entry.tags || entry.tags.length === 0)) {
|
|
636
|
+
entry.tags = normalizeTerms(metadata.tags);
|
|
637
|
+
usedCommentMetadata = true;
|
|
638
|
+
}
|
|
639
|
+
if (metadata.aliases?.length) {
|
|
640
|
+
entry.aliases = normalizeTerms(metadata.aliases);
|
|
641
|
+
usedCommentMetadata = true;
|
|
642
|
+
}
|
|
643
|
+
if (metadata.searchHints?.length) {
|
|
644
|
+
entry.searchHints = metadata.searchHints;
|
|
645
|
+
usedCommentMetadata = true;
|
|
646
|
+
}
|
|
647
|
+
if (metadata.usage?.length) {
|
|
648
|
+
entry.usage = metadata.usage;
|
|
649
|
+
usedCommentMetadata = true;
|
|
650
|
+
}
|
|
651
|
+
if (metadata.examples?.length) {
|
|
652
|
+
entry.examples = metadata.examples;
|
|
653
|
+
usedCommentMetadata = true;
|
|
654
|
+
}
|
|
655
|
+
if (metadata.intent && Object.keys(metadata.intent).length > 0) {
|
|
656
|
+
entry.intent = metadata.intent;
|
|
657
|
+
usedCommentMetadata = true;
|
|
658
|
+
}
|
|
659
|
+
if (metadata.run) {
|
|
660
|
+
entry.run = metadata.run;
|
|
661
|
+
usedCommentMetadata = true;
|
|
662
|
+
}
|
|
663
|
+
if (metadata.setup) {
|
|
664
|
+
entry.setup = metadata.setup;
|
|
665
|
+
usedCommentMetadata = true;
|
|
666
|
+
}
|
|
667
|
+
if (metadata.cwd) {
|
|
668
|
+
entry.cwd = metadata.cwd;
|
|
669
|
+
usedCommentMetadata = true;
|
|
670
|
+
}
|
|
671
|
+
if (metadata.scope) {
|
|
672
|
+
entry.scope = metadata.scope;
|
|
673
|
+
usedCommentMetadata = true;
|
|
674
|
+
}
|
|
675
|
+
if (usedCommentMetadata && entry.source !== "frontmatter" && entry.source !== "manual") {
|
|
676
|
+
entry.source = "comments";
|
|
677
|
+
entry.confidence = Math.max(entry.confidence ?? 0, 0.7);
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
function mergeAliases(existing, generated) {
|
|
681
|
+
const merged = normalizeTerms([...(existing ?? []), ...generated]);
|
|
682
|
+
return merged.length > 0 ? merged : undefined;
|
|
683
|
+
}
|
|
442
684
|
// ── Metadata Generation ─────────────────────────────────────────────────────
|
|
443
685
|
export async function generateMetadata(dirPath, assetType, files, typeRoot = dirPath) {
|
|
444
686
|
const entries = [];
|
|
@@ -473,20 +715,13 @@ export async function generateMetadata(dirPath, assetType, files, typeRoot = dir
|
|
|
473
715
|
if (ext === ".md") {
|
|
474
716
|
const content = fs.readFileSync(file, "utf8");
|
|
475
717
|
const parsed = parseFrontmatter(content);
|
|
476
|
-
|
|
477
|
-
if (fm) {
|
|
478
|
-
entry.description = fm;
|
|
479
|
-
entry.source = "frontmatter";
|
|
480
|
-
entry.confidence = 0.9;
|
|
481
|
-
}
|
|
718
|
+
applyCuratedFrontmatter(entry, parsed.data);
|
|
482
719
|
// Extract parameters from frontmatter params: key
|
|
483
720
|
const fmParams = extractFrontmatterParameters(parsed.data);
|
|
484
721
|
if (fmParams)
|
|
485
722
|
entry.parameters = fmParams;
|
|
486
723
|
// Pass wiki-pattern frontmatter through onto the entry
|
|
487
724
|
applyWikiFrontmatter(entry, parsed.data);
|
|
488
|
-
// Pass canonical scope_* frontmatter through onto the entry
|
|
489
|
-
applyScopeFrontmatter(entry, parsed.data);
|
|
490
725
|
// Extract parameters from template placeholders ($1, $ARGUMENTS, {{named}})
|
|
491
726
|
if (entry.type === "command") {
|
|
492
727
|
const cmdParams = extractCommandParameters(parsed.content);
|
|
@@ -500,9 +735,11 @@ export async function generateMetadata(dirPath, assetType, files, typeRoot = dir
|
|
|
500
735
|
// and must never be parsed for @param or any other metadata that could
|
|
501
736
|
// embed a value into the entry.
|
|
502
737
|
if (ext !== ".md" && assetType !== "vault") {
|
|
503
|
-
const
|
|
738
|
+
const content = fs.readFileSync(file, "utf8");
|
|
739
|
+
const scriptParams = extractScriptParameters(file, content);
|
|
504
740
|
if (scriptParams)
|
|
505
741
|
entry.parameters = scriptParams;
|
|
742
|
+
applyCommentMetadata(entry, extractCommentMetadata(file, content));
|
|
506
743
|
}
|
|
507
744
|
// Priority 3: Type-specific metadata extraction (e.g. TOC for knowledge, comments for scripts)
|
|
508
745
|
const fileCtx = buildFileContext(typeRoot, file);
|
|
@@ -530,7 +767,7 @@ export async function generateMetadata(dirPath, assetType, files, typeRoot = dir
|
|
|
530
767
|
entry.tags = extractTagsFromPath(file, dirPath);
|
|
531
768
|
}
|
|
532
769
|
entry.tags = normalizeTerms(entry.tags ?? []);
|
|
533
|
-
entry.aliases = buildAliases(canonicalName, entry.tags);
|
|
770
|
+
entry.aliases = mergeAliases(entry.aliases, buildAliases(canonicalName, entry.tags));
|
|
534
771
|
// Search hints are only generated when LLM is configured (via enhanceStashWithLlm)
|
|
535
772
|
// Heuristic search hints are too noisy to be useful for search quality
|
|
536
773
|
entry.filename = path.basename(file);
|
|
@@ -591,20 +828,13 @@ export async function generateMetadataFlat(stashRoot, files) {
|
|
|
591
828
|
if (ext === ".md") {
|
|
592
829
|
const content = ctx.content();
|
|
593
830
|
const parsed = parseFrontmatter(content);
|
|
594
|
-
|
|
595
|
-
if (fm) {
|
|
596
|
-
entry.description = fm;
|
|
597
|
-
entry.source = "frontmatter";
|
|
598
|
-
entry.confidence = 0.9;
|
|
599
|
-
}
|
|
831
|
+
applyCuratedFrontmatter(entry, parsed.data);
|
|
600
832
|
// Extract parameters from frontmatter params: key
|
|
601
833
|
const fmParams = extractFrontmatterParameters(parsed.data);
|
|
602
834
|
if (fmParams)
|
|
603
835
|
entry.parameters = fmParams;
|
|
604
836
|
// Pass wiki-pattern frontmatter through onto the entry
|
|
605
837
|
applyWikiFrontmatter(entry, parsed.data);
|
|
606
|
-
// Pass canonical scope_* frontmatter through onto the entry
|
|
607
|
-
applyScopeFrontmatter(entry, parsed.data);
|
|
608
838
|
// Extract parameters from template placeholders ($1, $ARGUMENTS, {{named}})
|
|
609
839
|
if (entry.type === "command") {
|
|
610
840
|
const cmdParams = extractCommandParameters(parsed.content);
|
|
@@ -618,9 +848,11 @@ export async function generateMetadataFlat(stashRoot, files) {
|
|
|
618
848
|
// and must never be parsed for @param or any other metadata that could
|
|
619
849
|
// embed a value into the entry.
|
|
620
850
|
if (ext !== ".md" && assetType !== "vault") {
|
|
621
|
-
const
|
|
851
|
+
const content = ctx.content();
|
|
852
|
+
const scriptParams = extractScriptParameters(file, content);
|
|
622
853
|
if (scriptParams)
|
|
623
854
|
entry.parameters = scriptParams;
|
|
855
|
+
applyCommentMetadata(entry, extractCommentMetadata(file, content));
|
|
624
856
|
}
|
|
625
857
|
// Renderer metadata extraction
|
|
626
858
|
const renderer = await getRenderer(match.renderer);
|
|
@@ -644,7 +876,7 @@ export async function generateMetadataFlat(stashRoot, files) {
|
|
|
644
876
|
entry.tags = extractTagsFromPath(file, dirPath);
|
|
645
877
|
}
|
|
646
878
|
entry.tags = normalizeTerms(entry.tags ?? []);
|
|
647
|
-
entry.aliases = buildAliases(canonicalName, entry.tags);
|
|
879
|
+
entry.aliases = mergeAliases(entry.aliases, buildAliases(canonicalName, entry.tags));
|
|
648
880
|
entry.filename = path.basename(file);
|
|
649
881
|
entries.push(entry);
|
|
650
882
|
}
|
|
@@ -8,7 +8,7 @@ import { resolveSourceProviderFactory } from "../sources/provider-factory";
|
|
|
8
8
|
import "../sources/providers/index";
|
|
9
9
|
import { warn } from "../core/warn";
|
|
10
10
|
import { ensureGitMirror, getCachePaths, parseGitRepoUrl } from "../sources/providers/git";
|
|
11
|
-
import { ensureWebsiteMirror } from "../sources/
|
|
11
|
+
import { ensureWebsiteMirror } from "../sources/website-ingest";
|
|
12
12
|
// Legacy "context-hub" / "github" type aliases are normalized to "git" at
|
|
13
13
|
// config-load time (see src/config.ts), so this set only contains the canonical
|
|
14
14
|
// type.
|
|
@@ -121,7 +121,8 @@ function resolveEntryContentDir(entry) {
|
|
|
121
121
|
// that subdirectory. This is a content-layout convention, not a provider
|
|
122
122
|
// capability — keep it here.
|
|
123
123
|
if (GIT_STASH_TYPES.has(entry.type)) {
|
|
124
|
-
|
|
124
|
+
const contentDir = path.join(dir, "content");
|
|
125
|
+
return isValidDirectory(contentDir) ? contentDir : dir;
|
|
125
126
|
}
|
|
126
127
|
return dir;
|
|
127
128
|
}
|
|
@@ -222,8 +223,9 @@ function isValidDirectory(dir) {
|
|
|
222
223
|
* `resolveSourceEntries()` so the content directories pass the
|
|
223
224
|
* `isValidDirectory()` check.
|
|
224
225
|
*/
|
|
225
|
-
export async function ensureSourceCaches(config) {
|
|
226
|
+
export async function ensureSourceCaches(config, options) {
|
|
226
227
|
const cfg = config ?? loadConfig();
|
|
228
|
+
const force = options?.force === true;
|
|
227
229
|
// Use sources[] (current key) with fallback to stashes[] (deprecated, one-release compat).
|
|
228
230
|
const entries = cfg.sources ?? cfg.stashes ?? [];
|
|
229
231
|
for (const entry of entries) {
|
|
@@ -232,7 +234,11 @@ export async function ensureSourceCaches(config) {
|
|
|
232
234
|
try {
|
|
233
235
|
const repo = parseGitRepoUrl(entry.url);
|
|
234
236
|
const cachePaths = getCachePaths(repo.canonicalUrl);
|
|
235
|
-
await ensureGitMirror(repo, cachePaths, {
|
|
237
|
+
await ensureGitMirror(repo, cachePaths, {
|
|
238
|
+
requireRepoDir: true,
|
|
239
|
+
writable: entry.writable === true,
|
|
240
|
+
force,
|
|
241
|
+
});
|
|
236
242
|
}
|
|
237
243
|
catch (err) {
|
|
238
244
|
warn(`Warning: failed to refresh git mirror for "${entry.url}": ${err instanceof Error ? err.message : String(err)}`);
|
|
@@ -242,7 +248,7 @@ export async function ensureSourceCaches(config) {
|
|
|
242
248
|
if (entry.type !== "website" || !entry.url || entry.enabled === false)
|
|
243
249
|
continue;
|
|
244
250
|
try {
|
|
245
|
-
await ensureWebsiteMirror(entry, { requireStashDir: true });
|
|
251
|
+
await ensureWebsiteMirror(entry, { requireStashDir: true, force });
|
|
246
252
|
}
|
|
247
253
|
catch (err) {
|
|
248
254
|
warn(`Warning: failed to refresh website stash for "${entry.url}": ${err instanceof Error ? err.message : String(err)}`);
|