akm-cli 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/cli.js +22 -8
- package/dist/commands/installed-stashes.js +1 -1
- package/dist/commands/source-add.js +1 -1
- package/dist/core/common.js +16 -1
- package/dist/core/config.js +5 -2
- package/dist/indexer/db-search.js +16 -1
- package/dist/indexer/graph-extraction.js +5 -3
- package/dist/indexer/indexer.js +27 -11
- package/dist/indexer/memory-inference.js +47 -58
- package/dist/indexer/search-source.js +1 -1
- package/dist/llm/client.js +61 -1
- package/dist/llm/embedder.js +8 -5
- package/dist/llm/embedders/local.js +8 -2
- package/dist/llm/embedders/remote.js +4 -2
- package/dist/llm/graph-extract.js +4 -4
- package/dist/llm/memory-infer.js +61 -33
- package/dist/llm/metadata-enhance.js +2 -2
- package/dist/output/cli-hints.js +2 -0
- package/dist/setup/setup.js +30 -20
- package/dist/sources/providers/website.js +4 -460
- package/dist/sources/website-ingest.js +470 -0
- package/docs/README.md +7 -0
- package/docs/migration/release-notes/0.7.0.md +14 -0
- package/package.json +4 -1
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- **One-shot URL ingest for `akm import` and `akm wiki stash`** — both commands now accept a single HTTP/HTTPS URL in addition to file paths and stdin. `akm import <url>` fetches the exact page, converts it to markdown, and writes it into `knowledge/` using a URL-path-derived default name. `akm wiki stash <wiki> <url>` fetches the exact page, converts it to markdown, and writes it into `wikis/<wiki>/raw/`. Neither command registers a persistent website source or crawls linked pages.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- **Shared website ingest boundary** — website URL validation, single-page fetch/convert, and website mirror generation now live in a dedicated shared ingest module. The website source provider is a thin adapter, and `akm add`, `akm import`, and `akm wiki stash` all reuse the same core website-ingest path.
|
|
16
|
+
|
|
9
17
|
## [0.7.0]
|
|
10
18
|
|
|
11
19
|
### Added
|
package/dist/cli.js
CHANGED
|
@@ -25,7 +25,7 @@ import { akmClone } from "./commands/source-clone";
|
|
|
25
25
|
import { addStash } from "./commands/source-manage";
|
|
26
26
|
import { parseAssetRef } from "./core/asset-ref";
|
|
27
27
|
import { deriveCanonicalAssetName, resolveAssetPathFromName } from "./core/asset-spec";
|
|
28
|
-
import { isWithin, resolveStashDir, tryReadStdinText } from "./core/common";
|
|
28
|
+
import { isHttpUrl, isWithin, resolveStashDir, tryReadStdinText } from "./core/common";
|
|
29
29
|
import { DEFAULT_CONFIG, getConfigPath, loadConfig, loadUserConfig, saveConfig } from "./core/config";
|
|
30
30
|
import { ConfigError, NotFoundError, UsageError } from "./core/errors";
|
|
31
31
|
import { appendEvent } from "./core/events";
|
|
@@ -44,6 +44,7 @@ import { buildRegistryIndex, writeRegistryIndex } from "./registry/build-index";
|
|
|
44
44
|
import { resolveSourcesForOrigin } from "./registry/origin-resolve";
|
|
45
45
|
import { saveGitStash } from "./sources/providers/git";
|
|
46
46
|
import { resolveAssetPath } from "./sources/resolve";
|
|
47
|
+
import { fetchWebsiteMarkdownSnapshot } from "./sources/website-ingest";
|
|
47
48
|
import { pkgVersion } from "./version";
|
|
48
49
|
import { createWorkflowAsset, formatWorkflowErrors, getWorkflowTemplate, validateWorkflowSource, } from "./workflows/authoring";
|
|
49
50
|
import { hasWorkflowSubcommand, parseWorkflowJsonObject, parseWorkflowStepState, WORKFLOW_STEP_STATES, } from "./workflows/cli";
|
|
@@ -142,10 +143,17 @@ const indexCommand = defineCommand({
|
|
|
142
143
|
},
|
|
143
144
|
async run({ args }) {
|
|
144
145
|
await runWithJsonErrors(async () => {
|
|
146
|
+
const controller = new AbortController();
|
|
147
|
+
const abort = () => controller.abort(new Error("index interrupted"));
|
|
148
|
+
process.once("SIGINT", abort);
|
|
149
|
+
process.once("SIGTERM", abort);
|
|
145
150
|
const result = await akmIndex({
|
|
146
151
|
full: args.full,
|
|
147
152
|
onProgress: args.verbose ? ({ message }) => console.error(`[index] ${message}`) : undefined,
|
|
153
|
+
signal: controller.signal,
|
|
148
154
|
});
|
|
155
|
+
process.off("SIGINT", abort);
|
|
156
|
+
process.off("SIGTERM", abort);
|
|
149
157
|
output("index", result);
|
|
150
158
|
});
|
|
151
159
|
},
|
|
@@ -1057,6 +1065,12 @@ function readKnowledgeContent(source) {
|
|
|
1057
1065
|
preferredName: path.basename(resolvedSource, path.extname(resolvedSource)),
|
|
1058
1066
|
};
|
|
1059
1067
|
}
|
|
1068
|
+
async function readKnowledgeInput(source) {
|
|
1069
|
+
if (!isHttpUrl(source))
|
|
1070
|
+
return readKnowledgeContent(source);
|
|
1071
|
+
const snapshot = await fetchWebsiteMarkdownSnapshot(source);
|
|
1072
|
+
return { content: snapshot.content, preferredName: snapshot.preferredName };
|
|
1073
|
+
}
|
|
1060
1074
|
async function writeMarkdownAsset(options) {
|
|
1061
1075
|
// Resolve write target via the v1 precedence chain (`--target` →
|
|
1062
1076
|
// `defaultWriteTarget` → working stash). Per spec §10 step 5, this is the
|
|
@@ -1609,12 +1623,12 @@ function wasRememberFlagValueConsumedAsContent(content, flagValue, flagName) {
|
|
|
1609
1623
|
const importKnowledgeCommand = defineCommand({
|
|
1610
1624
|
meta: {
|
|
1611
1625
|
name: "import",
|
|
1612
|
-
description: "Import a knowledge document into the default stash",
|
|
1626
|
+
description: "Import a knowledge document or URL into the default stash",
|
|
1613
1627
|
},
|
|
1614
1628
|
args: {
|
|
1615
1629
|
source: {
|
|
1616
1630
|
type: "positional",
|
|
1617
|
-
description: 'Source file path, or "-" to read from stdin',
|
|
1631
|
+
description: 'Source file path, URL, or "-" to read from stdin',
|
|
1618
1632
|
required: true,
|
|
1619
1633
|
},
|
|
1620
1634
|
name: {
|
|
@@ -1633,11 +1647,11 @@ const importKnowledgeCommand = defineCommand({
|
|
|
1633
1647
|
},
|
|
1634
1648
|
async run({ args }) {
|
|
1635
1649
|
return runWithJsonErrors(async () => {
|
|
1636
|
-
const { content, preferredName } =
|
|
1650
|
+
const { content, preferredName } = await readKnowledgeInput(args.source);
|
|
1637
1651
|
const result = await writeMarkdownAsset({
|
|
1638
1652
|
type: "knowledge",
|
|
1639
1653
|
content,
|
|
1640
|
-
name: args.name,
|
|
1654
|
+
name: args.name ?? (isHttpUrl(args.source) ? preferredName : undefined),
|
|
1641
1655
|
fallbackPrefix: "knowledge",
|
|
1642
1656
|
preferredName,
|
|
1643
1657
|
force: args.force,
|
|
@@ -2227,17 +2241,17 @@ const wikiSearchCommand = defineCommand({
|
|
|
2227
2241
|
const wikiStashCommand = defineCommand({
|
|
2228
2242
|
meta: {
|
|
2229
2243
|
name: "stash",
|
|
2230
|
-
description: "Copy a source into wikis/<name>/raw/<slug>.md with frontmatter. Source may be a file path or '-' for stdin.",
|
|
2244
|
+
description: "Copy a source into wikis/<name>/raw/<slug>.md with frontmatter. Source may be a file path, URL, or '-' for stdin.",
|
|
2231
2245
|
},
|
|
2232
2246
|
args: {
|
|
2233
2247
|
name: { type: "positional", description: "Wiki name", required: true },
|
|
2234
|
-
source: { type: "positional", description: "Source file path, or '-' to read from stdin", required: true },
|
|
2248
|
+
source: { type: "positional", description: "Source file path, URL, or '-' to read from stdin", required: true },
|
|
2235
2249
|
as: { type: "string", description: "Preferred slug base (defaults to source filename or first-line slug)" },
|
|
2236
2250
|
},
|
|
2237
2251
|
run({ args }) {
|
|
2238
2252
|
return runWithJsonErrors(async () => {
|
|
2239
2253
|
const { stashRaw } = await import("./wiki/wiki.js");
|
|
2240
|
-
const { content, preferredName } =
|
|
2254
|
+
const { content, preferredName } = await readKnowledgeInput(args.source);
|
|
2241
2255
|
const stashDir = resolveStashDir();
|
|
2242
2256
|
const result = stashRaw({
|
|
2243
2257
|
stashDir,
|
|
@@ -13,7 +13,7 @@ import { akmIndex } from "../indexer/indexer";
|
|
|
13
13
|
import { removeLockEntry, upsertLockEntry } from "../integrations/lockfile";
|
|
14
14
|
import { parseRegistryRef } from "../registry/resolve";
|
|
15
15
|
import { syncFromRef } from "../sources/providers/sync-from-ref";
|
|
16
|
-
import { ensureWebsiteMirror } from "../sources/
|
|
16
|
+
import { ensureWebsiteMirror } from "../sources/website-ingest";
|
|
17
17
|
import { listWikis, resolveWikisRoot } from "../wiki/wiki";
|
|
18
18
|
import { auditInstallCandidate, deriveRegistryLabels, enforceRegistryInstallPolicy, formatInstallAuditFailure, } from "./install-audit";
|
|
19
19
|
import { removeInstalledRegistryEntry, upsertInstalledRegistryEntry } from "./source-add";
|
|
@@ -9,7 +9,7 @@ import { upsertLockEntry } from "../integrations/lockfile";
|
|
|
9
9
|
import { parseRegistryRef } from "../registry/resolve";
|
|
10
10
|
import { detectStashRoot } from "../sources/providers/provider-utils";
|
|
11
11
|
import { syncFromRef } from "../sources/providers/sync-from-ref";
|
|
12
|
-
import { ensureWebsiteMirror, validateWebsiteInputUrl } from "../sources/
|
|
12
|
+
import { ensureWebsiteMirror, validateWebsiteInputUrl } from "../sources/website-ingest";
|
|
13
13
|
import { ensureWikiNameAvailable, validateWikiName } from "../wiki/wiki";
|
|
14
14
|
import { auditInstallCandidate, deriveRegistryLabels, enforceRegistryInstallPolicy, formatInstallAuditFailure, } from "./install-audit";
|
|
15
15
|
const VALID_OVERRIDE_TYPES = new Set(["wiki"]);
|
package/dist/core/common.js
CHANGED
|
@@ -146,19 +146,34 @@ function normalizeFsPathForComparison(value) {
|
|
|
146
146
|
* Fetch with an AbortController timeout.
|
|
147
147
|
* Defaults to 30 seconds if no timeout is specified.
|
|
148
148
|
*/
|
|
149
|
-
export async function fetchWithTimeout(url, opts, timeoutMs = 30_000) {
|
|
149
|
+
export async function fetchWithTimeout(url, opts, timeoutMs = 30_000, signal) {
|
|
150
150
|
const controller = new AbortController();
|
|
151
151
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
152
|
+
const abortExternal = () => controller.abort(signal?.reason);
|
|
153
|
+
if (signal) {
|
|
154
|
+
if (signal.aborted) {
|
|
155
|
+
clearTimeout(timer);
|
|
156
|
+
controller.abort(signal.reason);
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
signal.addEventListener("abort", abortExternal, { once: true });
|
|
160
|
+
}
|
|
161
|
+
}
|
|
152
162
|
try {
|
|
153
163
|
return await fetch(url, { ...opts, signal: controller.signal });
|
|
154
164
|
}
|
|
155
165
|
catch (err) {
|
|
156
166
|
if (err instanceof DOMException && err.name === "AbortError") {
|
|
167
|
+
if (signal?.aborted) {
|
|
168
|
+
throw new Error(`Request aborted: ${url}`);
|
|
169
|
+
}
|
|
157
170
|
throw new Error(`Request timed out after ${timeoutMs}ms: ${url}`);
|
|
158
171
|
}
|
|
159
172
|
throw err;
|
|
160
173
|
}
|
|
161
174
|
finally {
|
|
175
|
+
if (signal)
|
|
176
|
+
signal.removeEventListener("abort", abortExternal);
|
|
162
177
|
clearTimeout(timer);
|
|
163
178
|
}
|
|
164
179
|
}
|
package/dist/core/config.js
CHANGED
|
@@ -10,8 +10,8 @@ import { warn } from "./warn";
|
|
|
10
10
|
export const DEFAULT_CONFIG = {
|
|
11
11
|
semanticSearchMode: "auto",
|
|
12
12
|
registries: [
|
|
13
|
-
{ url: "https://raw.githubusercontent.com/itlackey/akm-registry/main/index.json", name: "
|
|
14
|
-
{ url: "https://skills.sh", name: "skills.sh", provider: "skills-sh" },
|
|
13
|
+
{ url: "https://raw.githubusercontent.com/itlackey/akm-registry/main/index.json", name: "akm-registry" },
|
|
14
|
+
{ url: "https://skills.sh", name: "skills.sh", provider: "skills-sh", enabled: false },
|
|
15
15
|
],
|
|
16
16
|
output: {
|
|
17
17
|
format: "json",
|
|
@@ -509,6 +509,9 @@ function parseLlmConfig(value) {
|
|
|
509
509
|
if (Object.keys(features).length > 0)
|
|
510
510
|
result.features = features;
|
|
511
511
|
}
|
|
512
|
+
if (typeof obj.extraParams === "object" && obj.extraParams !== null && !Array.isArray(obj.extraParams)) {
|
|
513
|
+
result.extraParams = obj.extraParams;
|
|
514
|
+
}
|
|
512
515
|
return result;
|
|
513
516
|
}
|
|
514
517
|
/**
|
|
@@ -269,7 +269,10 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
|
|
|
269
269
|
// If the query IS the asset name (or very close), this is almost certainly
|
|
270
270
|
// what the user wants. This is the single most important ranking signal.
|
|
271
271
|
const nameLower = entry.name.toLowerCase();
|
|
272
|
-
const
|
|
272
|
+
const rawNameBase = nameLower.split("/").pop() ?? nameLower; // last segment for path-based names
|
|
273
|
+
const nameBase = entry.type === "memory" && rawNameBase.endsWith(".derived")
|
|
274
|
+
? rawNameBase.slice(0, -".derived".length)
|
|
275
|
+
: rawNameBase;
|
|
273
276
|
if (nameBase === queryLower || nameLower === queryLower) {
|
|
274
277
|
// Exact match: massive boost
|
|
275
278
|
boostSum += 2.0;
|
|
@@ -301,6 +304,18 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
|
|
|
301
304
|
knowledge: 0,
|
|
302
305
|
};
|
|
303
306
|
boostSum += TYPE_BOOST[entry.type] ?? 0;
|
|
307
|
+
// ── 2.5. Derived-vs-raw memory preference ──
|
|
308
|
+
// Raw memories are user notes and may be incomplete or unvetted. Compressed
|
|
309
|
+
// `.derived` memories are the higher-signal retrieval target, but the
|
|
310
|
+
// preference should stay modest so stronger relevance signals still dominate.
|
|
311
|
+
if (entry.type === "memory") {
|
|
312
|
+
if (entry.name.toLowerCase().endsWith(".derived")) {
|
|
313
|
+
boostSum += 0.18;
|
|
314
|
+
}
|
|
315
|
+
else {
|
|
316
|
+
boostSum -= 0.08;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
304
319
|
// ── 3. Tag exact match ──
|
|
305
320
|
// Exact tag equality is a strong signal — the author explicitly tagged
|
|
306
321
|
// this asset with the user's search term.
|
|
@@ -70,7 +70,7 @@ const EMPTY_RESULT = {
|
|
|
70
70
|
* to an empty no-op result, leaving any existing `graph.json` untouched on
|
|
71
71
|
* disk.
|
|
72
72
|
*/
|
|
73
|
-
export async function runGraphExtractionPass(config, sources) {
|
|
73
|
+
export async function runGraphExtractionPass(config, sources, signal) {
|
|
74
74
|
// Gate 1 — locked feature flag (§14). Defaults to enabled; only an
|
|
75
75
|
// explicit `false` disables the pass entirely.
|
|
76
76
|
if (config.llm?.features?.graph_extraction === false)
|
|
@@ -94,7 +94,9 @@ export async function runGraphExtractionPass(config, sources) {
|
|
|
94
94
|
let totalEntities = 0;
|
|
95
95
|
let totalRelations = 0;
|
|
96
96
|
for (const candidate of eligible) {
|
|
97
|
-
|
|
97
|
+
if (signal?.aborted)
|
|
98
|
+
break;
|
|
99
|
+
const extraction = await extractGraphFromBody(llmConfig, candidate.body, signal);
|
|
98
100
|
if (extraction.entities.length === 0)
|
|
99
101
|
continue;
|
|
100
102
|
nodes.push({
|
|
@@ -134,7 +136,7 @@ export async function runGraphExtractionPass(config, sources) {
|
|
|
134
136
|
* same one the rest of the indexer uses: `<stashRoot>/<type>/...`.
|
|
135
137
|
*
|
|
136
138
|
* Inferred-child memories (frontmatter `inferred: true`) are skipped — they
|
|
137
|
-
* are
|
|
139
|
+
* are already derived summaries, with no additional internal graph structure worth
|
|
138
140
|
* extracting.
|
|
139
141
|
*
|
|
140
142
|
* Exported for direct unit testing.
|
package/dist/indexer/indexer.js
CHANGED
|
@@ -13,10 +13,16 @@ import { buildSearchText } from "./search-fields";
|
|
|
13
13
|
import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
|
|
14
14
|
import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
|
|
15
15
|
import { walkStashFlat } from "./walker";
|
|
16
|
+
function throwIfAborted(signal) {
|
|
17
|
+
if (signal?.aborted) {
|
|
18
|
+
throw signal.reason instanceof Error ? signal.reason : new Error("index interrupted");
|
|
19
|
+
}
|
|
20
|
+
}
|
|
16
21
|
// ── Indexer ──────────────────────────────────────────────────────────────────
|
|
17
22
|
export async function akmIndex(options) {
|
|
18
23
|
const stashDir = options?.stashDir || resolveStashDir();
|
|
19
24
|
const onProgress = options?.onProgress ?? (() => { });
|
|
25
|
+
const signal = options?.signal;
|
|
20
26
|
// Load config and resolve all stash sources
|
|
21
27
|
const { loadConfig } = await import("../core/config.js");
|
|
22
28
|
const config = loadConfig();
|
|
@@ -82,18 +88,19 @@ export async function akmIndex(options) {
|
|
|
82
88
|
}
|
|
83
89
|
}
|
|
84
90
|
}
|
|
85
|
-
|
|
91
|
+
throwIfAborted(signal);
|
|
92
|
+
// Memory inference pass (#201). Runs before the walk so any derived-memory
|
|
86
93
|
// children that get written are picked up by the walker in this same run
|
|
87
94
|
// and don't have to wait for the next `akm index`. Gated entirely by
|
|
88
95
|
// `resolveIndexPassLLM("memory", config)` — when the user has no
|
|
89
96
|
// `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
|
|
90
97
|
// and existing inferred children are left in place.
|
|
91
98
|
try {
|
|
92
|
-
const inferenceResult = await runMemoryInferencePass(config, allSourceEntries);
|
|
99
|
+
const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
|
|
93
100
|
if (inferenceResult.writtenFacts > 0) {
|
|
94
101
|
onProgress({
|
|
95
102
|
phase: "llm",
|
|
96
|
-
message: `Memory inference wrote ${inferenceResult.writtenFacts}
|
|
103
|
+
message: `Memory inference wrote ${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
|
|
97
104
|
});
|
|
98
105
|
}
|
|
99
106
|
}
|
|
@@ -114,7 +121,7 @@ export async function akmIndex(options) {
|
|
|
114
121
|
// `index.graph.llm` toggle) is off; the existing graph file is
|
|
115
122
|
// preserved on disk in that case.
|
|
116
123
|
try {
|
|
117
|
-
const graphResult = await runGraphExtractionPass(config, allSourceEntries);
|
|
124
|
+
const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
|
|
118
125
|
if (graphResult.written) {
|
|
119
126
|
onProgress({
|
|
120
127
|
phase: "llm",
|
|
@@ -125,6 +132,7 @@ export async function akmIndex(options) {
|
|
|
125
132
|
catch (err) {
|
|
126
133
|
warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
|
|
127
134
|
}
|
|
135
|
+
throwIfAborted(signal);
|
|
128
136
|
const tWalkStart = Date.now();
|
|
129
137
|
// Walk stash dirs and index entries.
|
|
130
138
|
// doFullDelete=true merges the wipe into the same transaction as the
|
|
@@ -150,8 +158,9 @@ export async function akmIndex(options) {
|
|
|
150
158
|
}
|
|
151
159
|
}
|
|
152
160
|
const tWalkEnd = Date.now();
|
|
161
|
+
throwIfAborted(signal);
|
|
153
162
|
// Enhance entries with LLM if configured
|
|
154
|
-
await enhanceDirsWithLlm(db, config, dirsNeedingLlm);
|
|
163
|
+
await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal);
|
|
155
164
|
onProgress({
|
|
156
165
|
phase: "llm",
|
|
157
166
|
message: resolveIndexPassLLM("enrichment", config)
|
|
@@ -159,6 +168,7 @@ export async function akmIndex(options) {
|
|
|
159
168
|
: "LLM enhancement disabled.",
|
|
160
169
|
});
|
|
161
170
|
const tLlmEnd = Date.now();
|
|
171
|
+
throwIfAborted(signal);
|
|
162
172
|
// Rebuild FTS after all inserts. Use incremental mode when this whole
|
|
163
173
|
// index run is incremental — only entries touched by `upsertEntry`
|
|
164
174
|
// since the last rebuild are re-indexed, instead of re-scanning every
|
|
@@ -200,6 +210,7 @@ export async function akmIndex(options) {
|
|
|
200
210
|
catch {
|
|
201
211
|
/* best-effort */
|
|
202
212
|
}
|
|
213
|
+
throwIfAborted(signal);
|
|
203
214
|
// Generate embeddings if semantic search is enabled
|
|
204
215
|
const embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
|
|
205
216
|
const tEmbedEnd = Date.now();
|
|
@@ -435,7 +446,7 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
|
|
|
435
446
|
insertTransaction();
|
|
436
447
|
return { scannedDirs, skippedDirs, generatedCount, warnings, dirsNeedingLlm };
|
|
437
448
|
}
|
|
438
|
-
async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
|
|
449
|
+
async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal) {
|
|
439
450
|
// Resolve per-pass LLM config via the unified shim. Returns undefined when
|
|
440
451
|
// either no `akm.llm` is configured or the user opted this pass out via
|
|
441
452
|
// `index.enrichment.llm = false`. (#208)
|
|
@@ -447,12 +458,13 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
|
|
|
447
458
|
// and leaving the user wondering why nothing got enhanced.
|
|
448
459
|
const summary = { attempted: 0, succeeded: 0, failureSamples: [] };
|
|
449
460
|
for (const { dirPath, files, currentStashDir, stash: originalStash } of dirsNeedingLlm) {
|
|
461
|
+
throwIfAborted(signal);
|
|
450
462
|
// Only enhance generated entries; user-provided overrides should not be overwritten
|
|
451
463
|
const generatedEntries = originalStash.entries.filter((e) => e.quality === "generated");
|
|
452
464
|
if (generatedEntries.length === 0)
|
|
453
465
|
continue;
|
|
454
466
|
const generatedStash = { entries: generatedEntries };
|
|
455
|
-
const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary);
|
|
467
|
+
const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary, signal);
|
|
456
468
|
// Re-upsert the enhanced entries in a single transaction so a crash
|
|
457
469
|
// cannot leave half the entries updated and the rest stale.
|
|
458
470
|
db.transaction(() => {
|
|
@@ -475,7 +487,8 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
|
|
|
475
487
|
warn(`LLM enhancement failed for ${failed}/${summary.attempted} entries — they were left un-enhanced.${sample}`);
|
|
476
488
|
}
|
|
477
489
|
}
|
|
478
|
-
async function generateEmbeddingsForDb(db, config, onProgress) {
|
|
490
|
+
async function generateEmbeddingsForDb(db, config, onProgress, signal) {
|
|
491
|
+
throwIfAborted(signal);
|
|
479
492
|
if (config.semanticSearchMode === "off") {
|
|
480
493
|
onProgress({ phase: "embeddings", message: "Semantic search disabled; skipping embeddings." });
|
|
481
494
|
return { success: false, reason: "index-missing", message: "Semantic search is disabled." };
|
|
@@ -504,6 +517,7 @@ async function generateEmbeddingsForDb(db, config, onProgress) {
|
|
|
504
517
|
try {
|
|
505
518
|
const { embedBatch } = await import("../llm/embedder.js");
|
|
506
519
|
const { estimateTokenCount } = await import("../llm/embedders/remote.js");
|
|
520
|
+
throwIfAborted(signal);
|
|
507
521
|
const allEntries = getAllEntriesForEmbedding(db);
|
|
508
522
|
if (allEntries.length === 0) {
|
|
509
523
|
onProgress({ phase: "embeddings", message: "Embeddings already up to date." });
|
|
@@ -528,7 +542,8 @@ async function generateEmbeddingsForDb(db, config, onProgress) {
|
|
|
528
542
|
warnVerbose(`[embed] ${ref} (${chars} chars, est. ${tokens} tokens) → batch ${batchNum}/${totalBatches}`);
|
|
529
543
|
}
|
|
530
544
|
}
|
|
531
|
-
const embeddings = await embedBatch(texts, config.embedding);
|
|
545
|
+
const embeddings = await embedBatch(texts, config.embedding, signal);
|
|
546
|
+
throwIfAborted(signal);
|
|
532
547
|
// Wrap all embedding upserts in a single transaction so partial
|
|
533
548
|
// state is rolled back on failure rather than leaving the table half-filled.
|
|
534
549
|
db.transaction(() => {
|
|
@@ -699,10 +714,11 @@ function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
|
|
|
699
714
|
}
|
|
700
715
|
return false;
|
|
701
716
|
}
|
|
702
|
-
async function enhanceStashWithLlm(llmConfig, stash, files, summary) {
|
|
717
|
+
async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
|
|
703
718
|
const { enhanceMetadata } = await import("../llm/metadata-enhance");
|
|
704
719
|
const enhanced = [];
|
|
705
720
|
for (const entry of stash.entries) {
|
|
721
|
+
throwIfAborted(signal);
|
|
706
722
|
summary.attempted++;
|
|
707
723
|
try {
|
|
708
724
|
const entryFile = entry.filename
|
|
@@ -717,7 +733,7 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary) {
|
|
|
717
733
|
/* ignore unreadable files */
|
|
718
734
|
}
|
|
719
735
|
}
|
|
720
|
-
const improvements = await enhanceMetadata(llmConfig, entry, fileContent);
|
|
736
|
+
const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal);
|
|
721
737
|
const updated = { ...entry };
|
|
722
738
|
if (improvements.description)
|
|
723
739
|
updated.description = improvements.description;
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Memory inference pass for `akm index` (#201).
|
|
3
3
|
*
|
|
4
|
-
* Detects memories pending inference, asks the configured LLM to
|
|
5
|
-
* into
|
|
6
|
-
* frontmatter `inferred: true` + a `source:` backref to the
|
|
4
|
+
* Detects memories pending inference, asks the configured LLM to compress each
|
|
5
|
+
* into one higher-signal derived memory, and writes the result back as a new
|
|
6
|
+
* memory file with frontmatter `inferred: true` + a `source:` backref to the
|
|
7
|
+
* parent memory.
|
|
7
8
|
*
|
|
8
9
|
* Pending predicate (see {@link isPendingMemory}):
|
|
9
10
|
* - File lives under `<stashRoot>/memories/` and ends in `.md`.
|
|
@@ -36,7 +37,7 @@ import { parseFrontmatter, parseFrontmatterBlock } from "../core/frontmatter";
|
|
|
36
37
|
import { warn } from "../core/warn";
|
|
37
38
|
import { writeAssetToSource } from "../core/write-source";
|
|
38
39
|
import { resolveIndexPassLLM } from "../llm/index-passes";
|
|
39
|
-
import {
|
|
40
|
+
import { compressMemoryToDerivedMemory } from "../llm/memory-infer";
|
|
40
41
|
/**
|
|
41
42
|
* Frontmatter keys this pass cares about. Constants so a future rename only
|
|
42
43
|
* needs to touch one site.
|
|
@@ -59,8 +60,8 @@ const FM_SOURCE = "source";
|
|
|
59
60
|
* Both must allow the call for the pass to run. Either set to `false`
|
|
60
61
|
* short-circuits to a no-op result.
|
|
61
62
|
*/
|
|
62
|
-
export async function runMemoryInferencePass(config, sources) {
|
|
63
|
-
const
|
|
63
|
+
export async function runMemoryInferencePass(config, sources, signal) {
|
|
64
|
+
const result = {
|
|
64
65
|
considered: 0,
|
|
65
66
|
splitParents: 0,
|
|
66
67
|
writtenFacts: 0,
|
|
@@ -69,38 +70,40 @@ export async function runMemoryInferencePass(config, sources) {
|
|
|
69
70
|
// Gate 1 — locked feature flag (§14). Defaults to enabled; only an
|
|
70
71
|
// explicit `false` disables the pass entirely.
|
|
71
72
|
if (config.llm?.features?.memory_inference === false)
|
|
72
|
-
return
|
|
73
|
+
return result;
|
|
73
74
|
// Gate 2 — per-pass opt-out (#208). Returns the resolved llm config or
|
|
74
75
|
// `undefined` when the pass should not run.
|
|
75
76
|
const llmConfig = resolveIndexPassLLM("memory", config);
|
|
76
77
|
if (!llmConfig)
|
|
77
|
-
return
|
|
78
|
+
return result;
|
|
78
79
|
// The pass only writes to the primary (working) stash. Read-only caches
|
|
79
80
|
// (git, npm, website) are deliberately untouched — writing inferred
|
|
80
81
|
// children there would be clobbered by the next sync().
|
|
81
82
|
const primary = sources[0];
|
|
82
83
|
if (!primary)
|
|
83
|
-
return
|
|
84
|
+
return result;
|
|
84
85
|
const pending = collectPendingMemories(primary.path);
|
|
85
|
-
|
|
86
|
+
result.considered = pending.length;
|
|
86
87
|
if (pending.length === 0)
|
|
87
|
-
return
|
|
88
|
+
return result;
|
|
88
89
|
for (const record of pending) {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
if (signal?.aborted)
|
|
91
|
+
return result;
|
|
92
|
+
const derived = await compressMemoryToDerivedMemory(llmConfig, record.body, signal);
|
|
93
|
+
if (!derived) {
|
|
94
|
+
result.skippedNoFacts += 1;
|
|
92
95
|
// Intentionally NOT marked processed — a transient LLM failure should
|
|
93
96
|
// be retried on the next index run.
|
|
94
97
|
continue;
|
|
95
98
|
}
|
|
96
|
-
const written = await
|
|
99
|
+
const written = await writeDerivedMemory(record, derived);
|
|
97
100
|
if (written > 0) {
|
|
98
101
|
markParentProcessed(record);
|
|
99
|
-
|
|
100
|
-
|
|
102
|
+
result.splitParents += 1;
|
|
103
|
+
result.writtenFacts += written;
|
|
101
104
|
}
|
|
102
105
|
}
|
|
103
|
-
return
|
|
106
|
+
return result;
|
|
104
107
|
}
|
|
105
108
|
// ── Pending detection ───────────────────────────────────────────────────────
|
|
106
109
|
/**
|
|
@@ -133,6 +136,7 @@ export function collectPendingMemories(stashRoot) {
|
|
|
133
136
|
ref: `memory:${relName}`,
|
|
134
137
|
data: parsed.data,
|
|
135
138
|
body: parsed.content,
|
|
139
|
+
name: relName,
|
|
136
140
|
});
|
|
137
141
|
}
|
|
138
142
|
return out;
|
|
@@ -177,19 +181,8 @@ function toMemoryName(memoriesDir, filePath) {
|
|
|
177
181
|
// user has organised under memories/.
|
|
178
182
|
return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
|
|
179
183
|
}
|
|
180
|
-
// ── Writing
|
|
181
|
-
async function
|
|
182
|
-
const memoriesDir = path.join(parent.stashRoot, "memories");
|
|
183
|
-
// Sibling directory layout: <parentDir>/<parentBase>.facts/fact-N.md
|
|
184
|
-
// Keeps facts grouped near the parent without polluting the top level.
|
|
185
|
-
const parentRel = path.relative(memoriesDir, parent.filePath).replace(/\\/g, "/");
|
|
186
|
-
const parentBase = parentRel.replace(/\.md$/i, "");
|
|
187
|
-
const factsDirRel = `${parentBase}.facts`;
|
|
188
|
-
// Children are routed through writeAssetToSource — the single dispatch
|
|
189
|
-
// point for kind-branching writes (CLAUDE.md / spec §10 step 5). Memory
|
|
190
|
-
// assets resolve to `<source.path>/memories/<name>.md`, so a child name
|
|
191
|
-
// of `<parentBase>.facts/fact-N` lands at exactly the documented child
|
|
192
|
-
// path scheme.
|
|
184
|
+
// ── Writing derived memories + marking parent ───────────────────────────────
|
|
185
|
+
async function writeDerivedMemory(parent, derived) {
|
|
193
186
|
const writeTarget = {
|
|
194
187
|
kind: "filesystem",
|
|
195
188
|
name: "stash",
|
|
@@ -201,39 +194,35 @@ async function writeAtomicChildren(parent, facts) {
|
|
|
201
194
|
path: parent.stashRoot,
|
|
202
195
|
writable: true,
|
|
203
196
|
};
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
const content = renderChildMemory(fact, parent.ref);
|
|
220
|
-
const childRef = parseAssetRef(childRefStr);
|
|
221
|
-
await writeAssetToSource(writeTarget, writeConfig, childRef, content);
|
|
222
|
-
written += 1;
|
|
223
|
-
}
|
|
224
|
-
catch (err) {
|
|
225
|
-
warn(`memory inference: failed to write atomic child ${childName}: ${err instanceof Error ? err.message : String(err)}`);
|
|
226
|
-
}
|
|
197
|
+
const childName = `${parent.name}.derived`;
|
|
198
|
+
const childRefStr = `memory:${childName}`;
|
|
199
|
+
const childPath = path.join(parent.stashRoot, "memories", `${childName}.md`);
|
|
200
|
+
if (fs.existsSync(childPath)) {
|
|
201
|
+
return 0;
|
|
202
|
+
}
|
|
203
|
+
try {
|
|
204
|
+
const content = renderDerivedMemory(parent, derived);
|
|
205
|
+
const childRef = parseAssetRef(childRefStr);
|
|
206
|
+
await writeAssetToSource(writeTarget, writeConfig, childRef, content);
|
|
207
|
+
return 1;
|
|
208
|
+
}
|
|
209
|
+
catch (err) {
|
|
210
|
+
warn(`memory inference: failed to write derived memory ${childName}: ${err instanceof Error ? err.message : String(err)}`);
|
|
211
|
+
return 0;
|
|
227
212
|
}
|
|
228
|
-
return written;
|
|
229
213
|
}
|
|
230
|
-
function
|
|
214
|
+
function renderDerivedMemory(parent, derived) {
|
|
231
215
|
const fm = {
|
|
232
216
|
[FM_INFERRED]: true,
|
|
233
|
-
[FM_SOURCE]:
|
|
217
|
+
[FM_SOURCE]: parent.ref,
|
|
218
|
+
description: derived.description,
|
|
219
|
+
tags: derived.tags,
|
|
220
|
+
searchHints: derived.searchHints,
|
|
221
|
+
title: derived.title,
|
|
222
|
+
derivedFrom: parent.name,
|
|
234
223
|
};
|
|
235
224
|
const yaml = yamlStringify(fm).trimEnd();
|
|
236
|
-
return `---\n${yaml}\n---\n\n${
|
|
225
|
+
return `---\n${yaml}\n---\n\n# ${derived.title.trim()}\n\n${derived.content.trim()}\n`;
|
|
237
226
|
}
|
|
238
227
|
function markParentProcessed(parent) {
|
|
239
228
|
// Frontmatter-only rewrite of an existing asset: not a new asset write,
|
|
@@ -8,7 +8,7 @@ import { resolveSourceProviderFactory } from "../sources/provider-factory";
|
|
|
8
8
|
import "../sources/providers/index";
|
|
9
9
|
import { warn } from "../core/warn";
|
|
10
10
|
import { ensureGitMirror, getCachePaths, parseGitRepoUrl } from "../sources/providers/git";
|
|
11
|
-
import { ensureWebsiteMirror } from "../sources/
|
|
11
|
+
import { ensureWebsiteMirror } from "../sources/website-ingest";
|
|
12
12
|
// Legacy "context-hub" / "github" type aliases are normalized to "git" at
|
|
13
13
|
// config-load time (see src/config.ts), so this set only contains the canonical
|
|
14
14
|
// type.
|