akm-cli 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ### Added
10
+
11
+ - **One-shot URL ingest for `akm import` and `akm wiki stash`** — both commands now accept a single HTTP/HTTPS URL in addition to file paths and stdin. `akm import <url>` fetches the exact page, converts it to markdown, and writes it into `knowledge/` using a URL-path-derived default name. `akm wiki stash <wiki> <url>` fetches the exact page, converts it to markdown, and writes it into `wikis/<wiki>/raw/`. Neither command registers a persistent website source or crawls linked pages.
12
+
13
+ ### Changed
14
+
15
+ - **Shared website ingest boundary** — website URL validation, single-page fetch/convert, and website mirror generation now live in a dedicated shared ingest module. The website source provider is a thin adapter, and `akm add`, `akm import`, and `akm wiki stash` all reuse the same core website-ingest path.
16
+
9
17
  ## [0.7.0]
10
18
 
11
19
  ### Added
package/dist/cli.js CHANGED
@@ -25,7 +25,7 @@ import { akmClone } from "./commands/source-clone";
25
25
  import { addStash } from "./commands/source-manage";
26
26
  import { parseAssetRef } from "./core/asset-ref";
27
27
  import { deriveCanonicalAssetName, resolveAssetPathFromName } from "./core/asset-spec";
28
- import { isWithin, resolveStashDir, tryReadStdinText } from "./core/common";
28
+ import { isHttpUrl, isWithin, resolveStashDir, tryReadStdinText } from "./core/common";
29
29
  import { DEFAULT_CONFIG, getConfigPath, loadConfig, loadUserConfig, saveConfig } from "./core/config";
30
30
  import { ConfigError, NotFoundError, UsageError } from "./core/errors";
31
31
  import { appendEvent } from "./core/events";
@@ -44,6 +44,7 @@ import { buildRegistryIndex, writeRegistryIndex } from "./registry/build-index";
44
44
  import { resolveSourcesForOrigin } from "./registry/origin-resolve";
45
45
  import { saveGitStash } from "./sources/providers/git";
46
46
  import { resolveAssetPath } from "./sources/resolve";
47
+ import { fetchWebsiteMarkdownSnapshot } from "./sources/website-ingest";
47
48
  import { pkgVersion } from "./version";
48
49
  import { createWorkflowAsset, formatWorkflowErrors, getWorkflowTemplate, validateWorkflowSource, } from "./workflows/authoring";
49
50
  import { hasWorkflowSubcommand, parseWorkflowJsonObject, parseWorkflowStepState, WORKFLOW_STEP_STATES, } from "./workflows/cli";
@@ -142,10 +143,17 @@ const indexCommand = defineCommand({
142
143
  },
143
144
  async run({ args }) {
144
145
  await runWithJsonErrors(async () => {
146
+ const controller = new AbortController();
147
+ const abort = () => controller.abort(new Error("index interrupted"));
148
+ process.once("SIGINT", abort);
149
+ process.once("SIGTERM", abort);
145
150
  const result = await akmIndex({
146
151
  full: args.full,
147
152
  onProgress: args.verbose ? ({ message }) => console.error(`[index] ${message}`) : undefined,
153
+ signal: controller.signal,
148
154
  });
155
+ process.off("SIGINT", abort);
156
+ process.off("SIGTERM", abort);
149
157
  output("index", result);
150
158
  });
151
159
  },
@@ -1057,6 +1065,12 @@ function readKnowledgeContent(source) {
1057
1065
  preferredName: path.basename(resolvedSource, path.extname(resolvedSource)),
1058
1066
  };
1059
1067
  }
1068
+ async function readKnowledgeInput(source) {
1069
+ if (!isHttpUrl(source))
1070
+ return readKnowledgeContent(source);
1071
+ const snapshot = await fetchWebsiteMarkdownSnapshot(source);
1072
+ return { content: snapshot.content, preferredName: snapshot.preferredName };
1073
+ }
1060
1074
  async function writeMarkdownAsset(options) {
1061
1075
  // Resolve write target via the v1 precedence chain (`--target` →
1062
1076
  // `defaultWriteTarget` → working stash). Per spec §10 step 5, this is the
@@ -1609,12 +1623,12 @@ function wasRememberFlagValueConsumedAsContent(content, flagValue, flagName) {
1609
1623
  const importKnowledgeCommand = defineCommand({
1610
1624
  meta: {
1611
1625
  name: "import",
1612
- description: "Import a knowledge document into the default stash",
1626
+ description: "Import a knowledge document or URL into the default stash",
1613
1627
  },
1614
1628
  args: {
1615
1629
  source: {
1616
1630
  type: "positional",
1617
- description: 'Source file path, or "-" to read from stdin',
1631
+ description: 'Source file path, URL, or "-" to read from stdin',
1618
1632
  required: true,
1619
1633
  },
1620
1634
  name: {
@@ -1633,11 +1647,11 @@ const importKnowledgeCommand = defineCommand({
1633
1647
  },
1634
1648
  async run({ args }) {
1635
1649
  return runWithJsonErrors(async () => {
1636
- const { content, preferredName } = readKnowledgeContent(args.source);
1650
+ const { content, preferredName } = await readKnowledgeInput(args.source);
1637
1651
  const result = await writeMarkdownAsset({
1638
1652
  type: "knowledge",
1639
1653
  content,
1640
- name: args.name,
1654
+ name: args.name ?? (isHttpUrl(args.source) ? preferredName : undefined),
1641
1655
  fallbackPrefix: "knowledge",
1642
1656
  preferredName,
1643
1657
  force: args.force,
@@ -2227,17 +2241,17 @@ const wikiSearchCommand = defineCommand({
2227
2241
  const wikiStashCommand = defineCommand({
2228
2242
  meta: {
2229
2243
  name: "stash",
2230
- description: "Copy a source into wikis/<name>/raw/<slug>.md with frontmatter. Source may be a file path or '-' for stdin.",
2244
+ description: "Copy a source into wikis/<name>/raw/<slug>.md with frontmatter. Source may be a file path, URL, or '-' for stdin.",
2231
2245
  },
2232
2246
  args: {
2233
2247
  name: { type: "positional", description: "Wiki name", required: true },
2234
- source: { type: "positional", description: "Source file path, or '-' to read from stdin", required: true },
2248
+ source: { type: "positional", description: "Source file path, URL, or '-' to read from stdin", required: true },
2235
2249
  as: { type: "string", description: "Preferred slug base (defaults to source filename or first-line slug)" },
2236
2250
  },
2237
2251
  run({ args }) {
2238
2252
  return runWithJsonErrors(async () => {
2239
2253
  const { stashRaw } = await import("./wiki/wiki.js");
2240
- const { content, preferredName } = readKnowledgeContent(args.source);
2254
+ const { content, preferredName } = await readKnowledgeInput(args.source);
2241
2255
  const stashDir = resolveStashDir();
2242
2256
  const result = stashRaw({
2243
2257
  stashDir,
@@ -13,7 +13,7 @@ import { akmIndex } from "../indexer/indexer";
13
13
  import { removeLockEntry, upsertLockEntry } from "../integrations/lockfile";
14
14
  import { parseRegistryRef } from "../registry/resolve";
15
15
  import { syncFromRef } from "../sources/providers/sync-from-ref";
16
- import { ensureWebsiteMirror } from "../sources/providers/website";
16
+ import { ensureWebsiteMirror } from "../sources/website-ingest";
17
17
  import { listWikis, resolveWikisRoot } from "../wiki/wiki";
18
18
  import { auditInstallCandidate, deriveRegistryLabels, enforceRegistryInstallPolicy, formatInstallAuditFailure, } from "./install-audit";
19
19
  import { removeInstalledRegistryEntry, upsertInstalledRegistryEntry } from "./source-add";
@@ -9,7 +9,7 @@ import { upsertLockEntry } from "../integrations/lockfile";
9
9
  import { parseRegistryRef } from "../registry/resolve";
10
10
  import { detectStashRoot } from "../sources/providers/provider-utils";
11
11
  import { syncFromRef } from "../sources/providers/sync-from-ref";
12
- import { ensureWebsiteMirror, validateWebsiteInputUrl } from "../sources/providers/website";
12
+ import { ensureWebsiteMirror, validateWebsiteInputUrl } from "../sources/website-ingest";
13
13
  import { ensureWikiNameAvailable, validateWikiName } from "../wiki/wiki";
14
14
  import { auditInstallCandidate, deriveRegistryLabels, enforceRegistryInstallPolicy, formatInstallAuditFailure, } from "./install-audit";
15
15
  const VALID_OVERRIDE_TYPES = new Set(["wiki"]);
@@ -146,19 +146,34 @@ function normalizeFsPathForComparison(value) {
146
146
  * Fetch with an AbortController timeout.
147
147
  * Defaults to 30 seconds if no timeout is specified.
148
148
  */
149
- export async function fetchWithTimeout(url, opts, timeoutMs = 30_000) {
149
+ export async function fetchWithTimeout(url, opts, timeoutMs = 30_000, signal) {
150
150
  const controller = new AbortController();
151
151
  const timer = setTimeout(() => controller.abort(), timeoutMs);
152
+ const abortExternal = () => controller.abort(signal?.reason);
153
+ if (signal) {
154
+ if (signal.aborted) {
155
+ clearTimeout(timer);
156
+ controller.abort(signal.reason);
157
+ }
158
+ else {
159
+ signal.addEventListener("abort", abortExternal, { once: true });
160
+ }
161
+ }
152
162
  try {
153
163
  return await fetch(url, { ...opts, signal: controller.signal });
154
164
  }
155
165
  catch (err) {
156
166
  if (err instanceof DOMException && err.name === "AbortError") {
167
+ if (signal?.aborted) {
168
+ throw new Error(`Request aborted: ${url}`);
169
+ }
157
170
  throw new Error(`Request timed out after ${timeoutMs}ms: ${url}`);
158
171
  }
159
172
  throw err;
160
173
  }
161
174
  finally {
175
+ if (signal)
176
+ signal.removeEventListener("abort", abortExternal);
162
177
  clearTimeout(timer);
163
178
  }
164
179
  }
@@ -10,8 +10,8 @@ import { warn } from "./warn";
10
10
  export const DEFAULT_CONFIG = {
11
11
  semanticSearchMode: "auto",
12
12
  registries: [
13
- { url: "https://raw.githubusercontent.com/itlackey/akm-registry/main/index.json", name: "official" },
14
- { url: "https://skills.sh", name: "skills.sh", provider: "skills-sh" },
13
+ { url: "https://raw.githubusercontent.com/itlackey/akm-registry/main/index.json", name: "akm-registry" },
14
+ { url: "https://skills.sh", name: "skills.sh", provider: "skills-sh", enabled: false },
15
15
  ],
16
16
  output: {
17
17
  format: "json",
@@ -509,6 +509,9 @@ function parseLlmConfig(value) {
509
509
  if (Object.keys(features).length > 0)
510
510
  result.features = features;
511
511
  }
512
+ if (typeof obj.extraParams === "object" && obj.extraParams !== null && !Array.isArray(obj.extraParams)) {
513
+ result.extraParams = obj.extraParams;
514
+ }
512
515
  return result;
513
516
  }
514
517
  /**
@@ -269,7 +269,10 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
269
269
  // If the query IS the asset name (or very close), this is almost certainly
270
270
  // what the user wants. This is the single most important ranking signal.
271
271
  const nameLower = entry.name.toLowerCase();
272
- const nameBase = nameLower.split("/").pop() ?? nameLower; // last segment for path-based names
272
+ const rawNameBase = nameLower.split("/").pop() ?? nameLower; // last segment for path-based names
273
+ const nameBase = entry.type === "memory" && rawNameBase.endsWith(".derived")
274
+ ? rawNameBase.slice(0, -".derived".length)
275
+ : rawNameBase;
273
276
  if (nameBase === queryLower || nameLower === queryLower) {
274
277
  // Exact match: massive boost
275
278
  boostSum += 2.0;
@@ -301,6 +304,18 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
301
304
  knowledge: 0,
302
305
  };
303
306
  boostSum += TYPE_BOOST[entry.type] ?? 0;
307
+ // ── 2.5. Derived-vs-raw memory preference ──
308
+ // Raw memories are user notes and may be incomplete or unvetted. Compressed
309
+ // `.derived` memories are the higher-signal retrieval target, but the
310
+ // preference should stay modest so stronger relevance signals still dominate.
311
+ if (entry.type === "memory") {
312
+ if (entry.name.toLowerCase().endsWith(".derived")) {
313
+ boostSum += 0.18;
314
+ }
315
+ else {
316
+ boostSum -= 0.08;
317
+ }
318
+ }
304
319
  // ── 3. Tag exact match ──
305
320
  // Exact tag equality is a strong signal — the author explicitly tagged
306
321
  // this asset with the user's search term.
@@ -70,7 +70,7 @@ const EMPTY_RESULT = {
70
70
  * to an empty no-op result, leaving any existing `graph.json` untouched on
71
71
  * disk.
72
72
  */
73
- export async function runGraphExtractionPass(config, sources) {
73
+ export async function runGraphExtractionPass(config, sources, signal) {
74
74
  // Gate 1 — locked feature flag (§14). Defaults to enabled; only an
75
75
  // explicit `false` disables the pass entirely.
76
76
  if (config.llm?.features?.graph_extraction === false)
@@ -94,7 +94,9 @@ export async function runGraphExtractionPass(config, sources) {
94
94
  let totalEntities = 0;
95
95
  let totalRelations = 0;
96
96
  for (const candidate of eligible) {
97
- const extraction = await extractGraphFromBody(llmConfig, candidate.body);
97
+ if (signal?.aborted)
98
+ break;
99
+ const extraction = await extractGraphFromBody(llmConfig, candidate.body, signal);
98
100
  if (extraction.entities.length === 0)
99
101
  continue;
100
102
  nodes.push({
@@ -134,7 +136,7 @@ export async function runGraphExtractionPass(config, sources) {
134
136
  * same one the rest of the indexer uses: `<stashRoot>/<type>/...`.
135
137
  *
136
138
  * Inferred-child memories (frontmatter `inferred: true`) are skipped — they
137
- * are atomic facts already, with no internal graph structure worth
139
+ * are already derived summaries, with no additional internal graph structure worth
138
140
  * extracting.
139
141
  *
140
142
  * Exported for direct unit testing.
@@ -13,10 +13,16 @@ import { buildSearchText } from "./search-fields";
13
13
  import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
14
14
  import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
15
15
  import { walkStashFlat } from "./walker";
16
+ function throwIfAborted(signal) {
17
+ if (signal?.aborted) {
18
+ throw signal.reason instanceof Error ? signal.reason : new Error("index interrupted");
19
+ }
20
+ }
16
21
  // ── Indexer ──────────────────────────────────────────────────────────────────
17
22
  export async function akmIndex(options) {
18
23
  const stashDir = options?.stashDir || resolveStashDir();
19
24
  const onProgress = options?.onProgress ?? (() => { });
25
+ const signal = options?.signal;
20
26
  // Load config and resolve all stash sources
21
27
  const { loadConfig } = await import("../core/config.js");
22
28
  const config = loadConfig();
@@ -82,18 +88,19 @@ export async function akmIndex(options) {
82
88
  }
83
89
  }
84
90
  }
85
- // Memory inference pass (#201). Runs before the walk so any atomic-fact
91
+ throwIfAborted(signal);
92
+ // Memory inference pass (#201). Runs before the walk so any derived-memory
86
93
  // children that get written are picked up by the walker in this same run
87
94
  // and don't have to wait for the next `akm index`. Gated entirely by
88
95
  // `resolveIndexPassLLM("memory", config)` — when the user has no
89
96
  // `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
90
97
  // and existing inferred children are left in place.
91
98
  try {
92
- const inferenceResult = await runMemoryInferencePass(config, allSourceEntries);
99
+ const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
93
100
  if (inferenceResult.writtenFacts > 0) {
94
101
  onProgress({
95
102
  phase: "llm",
96
- message: `Memory inference wrote ${inferenceResult.writtenFacts} atomic fact${inferenceResult.writtenFacts === 1 ? "" : "s"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
103
+ message: `Memory inference wrote ${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
97
104
  });
98
105
  }
99
106
  }
@@ -114,7 +121,7 @@ export async function akmIndex(options) {
114
121
  // `index.graph.llm` toggle) is off; the existing graph file is
115
122
  // preserved on disk in that case.
116
123
  try {
117
- const graphResult = await runGraphExtractionPass(config, allSourceEntries);
124
+ const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
118
125
  if (graphResult.written) {
119
126
  onProgress({
120
127
  phase: "llm",
@@ -125,6 +132,7 @@ export async function akmIndex(options) {
125
132
  catch (err) {
126
133
  warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
127
134
  }
135
+ throwIfAborted(signal);
128
136
  const tWalkStart = Date.now();
129
137
  // Walk stash dirs and index entries.
130
138
  // doFullDelete=true merges the wipe into the same transaction as the
@@ -150,8 +158,9 @@ export async function akmIndex(options) {
150
158
  }
151
159
  }
152
160
  const tWalkEnd = Date.now();
161
+ throwIfAborted(signal);
153
162
  // Enhance entries with LLM if configured
154
- await enhanceDirsWithLlm(db, config, dirsNeedingLlm);
163
+ await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal);
155
164
  onProgress({
156
165
  phase: "llm",
157
166
  message: resolveIndexPassLLM("enrichment", config)
@@ -159,6 +168,7 @@ export async function akmIndex(options) {
159
168
  : "LLM enhancement disabled.",
160
169
  });
161
170
  const tLlmEnd = Date.now();
171
+ throwIfAborted(signal);
162
172
  // Rebuild FTS after all inserts. Use incremental mode when this whole
163
173
  // index run is incremental — only entries touched by `upsertEntry`
164
174
  // since the last rebuild are re-indexed, instead of re-scanning every
@@ -200,6 +210,7 @@ export async function akmIndex(options) {
200
210
  catch {
201
211
  /* best-effort */
202
212
  }
213
+ throwIfAborted(signal);
203
214
  // Generate embeddings if semantic search is enabled
204
215
  const embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
205
216
  const tEmbedEnd = Date.now();
@@ -435,7 +446,7 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
435
446
  insertTransaction();
436
447
  return { scannedDirs, skippedDirs, generatedCount, warnings, dirsNeedingLlm };
437
448
  }
438
- async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
449
+ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal) {
439
450
  // Resolve per-pass LLM config via the unified shim. Returns undefined when
440
451
  // either no `akm.llm` is configured or the user opted this pass out via
441
452
  // `index.enrichment.llm = false`. (#208)
@@ -447,12 +458,13 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
447
458
  // and leaving the user wondering why nothing got enhanced.
448
459
  const summary = { attempted: 0, succeeded: 0, failureSamples: [] };
449
460
  for (const { dirPath, files, currentStashDir, stash: originalStash } of dirsNeedingLlm) {
461
+ throwIfAborted(signal);
450
462
  // Only enhance generated entries; user-provided overrides should not be overwritten
451
463
  const generatedEntries = originalStash.entries.filter((e) => e.quality === "generated");
452
464
  if (generatedEntries.length === 0)
453
465
  continue;
454
466
  const generatedStash = { entries: generatedEntries };
455
- const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary);
467
+ const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary, signal);
456
468
  // Re-upsert the enhanced entries in a single transaction so a crash
457
469
  // cannot leave half the entries updated and the rest stale.
458
470
  db.transaction(() => {
@@ -475,7 +487,8 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
475
487
  warn(`LLM enhancement failed for ${failed}/${summary.attempted} entries — they were left un-enhanced.${sample}`);
476
488
  }
477
489
  }
478
- async function generateEmbeddingsForDb(db, config, onProgress) {
490
+ async function generateEmbeddingsForDb(db, config, onProgress, signal) {
491
+ throwIfAborted(signal);
479
492
  if (config.semanticSearchMode === "off") {
480
493
  onProgress({ phase: "embeddings", message: "Semantic search disabled; skipping embeddings." });
481
494
  return { success: false, reason: "index-missing", message: "Semantic search is disabled." };
@@ -504,6 +517,7 @@ async function generateEmbeddingsForDb(db, config, onProgress) {
504
517
  try {
505
518
  const { embedBatch } = await import("../llm/embedder.js");
506
519
  const { estimateTokenCount } = await import("../llm/embedders/remote.js");
520
+ throwIfAborted(signal);
507
521
  const allEntries = getAllEntriesForEmbedding(db);
508
522
  if (allEntries.length === 0) {
509
523
  onProgress({ phase: "embeddings", message: "Embeddings already up to date." });
@@ -528,7 +542,8 @@ async function generateEmbeddingsForDb(db, config, onProgress) {
528
542
  warnVerbose(`[embed] ${ref} (${chars} chars, est. ${tokens} tokens) → batch ${batchNum}/${totalBatches}`);
529
543
  }
530
544
  }
531
- const embeddings = await embedBatch(texts, config.embedding);
545
+ const embeddings = await embedBatch(texts, config.embedding, signal);
546
+ throwIfAborted(signal);
532
547
  // Wrap all embedding upserts in a single transaction so partial
533
548
  // state is rolled back on failure rather than leaving the table half-filled.
534
549
  db.transaction(() => {
@@ -699,10 +714,11 @@ function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
699
714
  }
700
715
  return false;
701
716
  }
702
- async function enhanceStashWithLlm(llmConfig, stash, files, summary) {
717
+ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
703
718
  const { enhanceMetadata } = await import("../llm/metadata-enhance");
704
719
  const enhanced = [];
705
720
  for (const entry of stash.entries) {
721
+ throwIfAborted(signal);
706
722
  summary.attempted++;
707
723
  try {
708
724
  const entryFile = entry.filename
@@ -717,7 +733,7 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary) {
717
733
  /* ignore unreadable files */
718
734
  }
719
735
  }
720
- const improvements = await enhanceMetadata(llmConfig, entry, fileContent);
736
+ const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal);
721
737
  const updated = { ...entry };
722
738
  if (improvements.description)
723
739
  updated.description = improvements.description;
@@ -1,9 +1,10 @@
1
1
  /**
2
2
  * Memory inference pass for `akm index` (#201).
3
3
  *
4
- * Detects memories pending inference, asks the configured LLM to split each
5
- * into atomic facts, and writes the results back as new memory files with
6
- * frontmatter `inferred: true` + a `source:` backref to the parent memory.
4
+ * Detects memories pending inference, asks the configured LLM to compress each
5
+ * into one higher-signal derived memory, and writes the result back as a new
6
+ * memory file with frontmatter `inferred: true` + a `source:` backref to the
7
+ * parent memory.
7
8
  *
8
9
  * Pending predicate (see {@link isPendingMemory}):
9
10
  * - File lives under `<stashRoot>/memories/` and ends in `.md`.
@@ -36,7 +37,7 @@ import { parseFrontmatter, parseFrontmatterBlock } from "../core/frontmatter";
36
37
  import { warn } from "../core/warn";
37
38
  import { writeAssetToSource } from "../core/write-source";
38
39
  import { resolveIndexPassLLM } from "../llm/index-passes";
39
- import { splitMemoryIntoAtomicFacts } from "../llm/memory-infer";
40
+ import { compressMemoryToDerivedMemory } from "../llm/memory-infer";
40
41
  /**
41
42
  * Frontmatter keys this pass cares about. Constants so a future rename only
42
43
  * needs to touch one site.
@@ -59,8 +60,8 @@ const FM_SOURCE = "source";
59
60
  * Both must allow the call for the pass to run. Either set to `false`
60
61
  * short-circuits to a no-op result.
61
62
  */
62
- export async function runMemoryInferencePass(config, sources) {
63
- const empty = {
63
+ export async function runMemoryInferencePass(config, sources, signal) {
64
+ const result = {
64
65
  considered: 0,
65
66
  splitParents: 0,
66
67
  writtenFacts: 0,
@@ -69,38 +70,40 @@ export async function runMemoryInferencePass(config, sources) {
69
70
  // Gate 1 — locked feature flag (§14). Defaults to enabled; only an
70
71
  // explicit `false` disables the pass entirely.
71
72
  if (config.llm?.features?.memory_inference === false)
72
- return empty;
73
+ return result;
73
74
  // Gate 2 — per-pass opt-out (#208). Returns the resolved llm config or
74
75
  // `undefined` when the pass should not run.
75
76
  const llmConfig = resolveIndexPassLLM("memory", config);
76
77
  if (!llmConfig)
77
- return empty;
78
+ return result;
78
79
  // The pass only writes to the primary (working) stash. Read-only caches
79
80
  // (git, npm, website) are deliberately untouched — writing inferred
80
81
  // children there would be clobbered by the next sync().
81
82
  const primary = sources[0];
82
83
  if (!primary)
83
- return empty;
84
+ return result;
84
85
  const pending = collectPendingMemories(primary.path);
85
- empty.considered = pending.length;
86
+ result.considered = pending.length;
86
87
  if (pending.length === 0)
87
- return empty;
88
+ return result;
88
89
  for (const record of pending) {
89
- const facts = await splitMemoryIntoAtomicFacts(llmConfig, record.body);
90
- if (facts.length === 0) {
91
- empty.skippedNoFacts += 1;
90
+ if (signal?.aborted)
91
+ return result;
92
+ const derived = await compressMemoryToDerivedMemory(llmConfig, record.body, signal);
93
+ if (!derived) {
94
+ result.skippedNoFacts += 1;
92
95
  // Intentionally NOT marked processed — a transient LLM failure should
93
96
  // be retried on the next index run.
94
97
  continue;
95
98
  }
96
- const written = await writeAtomicChildren(record, facts);
99
+ const written = await writeDerivedMemory(record, derived);
97
100
  if (written > 0) {
98
101
  markParentProcessed(record);
99
- empty.splitParents += 1;
100
- empty.writtenFacts += written;
102
+ result.splitParents += 1;
103
+ result.writtenFacts += written;
101
104
  }
102
105
  }
103
- return empty;
106
+ return result;
104
107
  }
105
108
  // ── Pending detection ───────────────────────────────────────────────────────
106
109
  /**
@@ -133,6 +136,7 @@ export function collectPendingMemories(stashRoot) {
133
136
  ref: `memory:${relName}`,
134
137
  data: parsed.data,
135
138
  body: parsed.content,
139
+ name: relName,
136
140
  });
137
141
  }
138
142
  return out;
@@ -177,19 +181,8 @@ function toMemoryName(memoriesDir, filePath) {
177
181
  // user has organised under memories/.
178
182
  return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
179
183
  }
180
- // ── Writing children + marking parent ───────────────────────────────────────
181
- async function writeAtomicChildren(parent, facts) {
182
- const memoriesDir = path.join(parent.stashRoot, "memories");
183
- // Sibling directory layout: <parentDir>/<parentBase>.facts/fact-N.md
184
- // Keeps facts grouped near the parent without polluting the top level.
185
- const parentRel = path.relative(memoriesDir, parent.filePath).replace(/\\/g, "/");
186
- const parentBase = parentRel.replace(/\.md$/i, "");
187
- const factsDirRel = `${parentBase}.facts`;
188
- // Children are routed through writeAssetToSource — the single dispatch
189
- // point for kind-branching writes (CLAUDE.md / spec §10 step 5). Memory
190
- // assets resolve to `<source.path>/memories/<name>.md`, so a child name
191
- // of `<parentBase>.facts/fact-N` lands at exactly the documented child
192
- // path scheme.
184
+ // ── Writing derived memories + marking parent ───────────────────────────────
185
+ async function writeDerivedMemory(parent, derived) {
193
186
  const writeTarget = {
194
187
  kind: "filesystem",
195
188
  name: "stash",
@@ -201,39 +194,35 @@ async function writeAtomicChildren(parent, facts) {
201
194
  path: parent.stashRoot,
202
195
  writable: true,
203
196
  };
204
- let written = 0;
205
- for (let i = 0; i < facts.length; i++) {
206
- const fact = facts[i];
207
- const childName = `${factsDirRel}/fact-${i + 1}`;
208
- const childRefStr = `memory:${childName}`;
209
- const childPath = path.join(memoriesDir, `${childName}.md`);
210
- // Idempotent re-writes: if a child already exists at this slot we skip
211
- // it. The parent's `inferenceProcessed` marker is the primary idempotency
212
- // guard (we never re-enter the splitter for a processed parent), but a
213
- // partial previous run that crashed before the marker landed should not
214
- // duplicate facts.
215
- if (fs.existsSync(childPath)) {
216
- continue;
217
- }
218
- try {
219
- const content = renderChildMemory(fact, parent.ref);
220
- const childRef = parseAssetRef(childRefStr);
221
- await writeAssetToSource(writeTarget, writeConfig, childRef, content);
222
- written += 1;
223
- }
224
- catch (err) {
225
- warn(`memory inference: failed to write atomic child ${childName}: ${err instanceof Error ? err.message : String(err)}`);
226
- }
197
+ const childName = `${parent.name}.derived`;
198
+ const childRefStr = `memory:${childName}`;
199
+ const childPath = path.join(parent.stashRoot, "memories", `${childName}.md`);
200
+ if (fs.existsSync(childPath)) {
201
+ return 0;
202
+ }
203
+ try {
204
+ const content = renderDerivedMemory(parent, derived);
205
+ const childRef = parseAssetRef(childRefStr);
206
+ await writeAssetToSource(writeTarget, writeConfig, childRef, content);
207
+ return 1;
208
+ }
209
+ catch (err) {
210
+ warn(`memory inference: failed to write derived memory ${childName}: ${err instanceof Error ? err.message : String(err)}`);
211
+ return 0;
227
212
  }
228
- return written;
229
213
  }
230
- function renderChildMemory(fact, parentRef) {
214
+ function renderDerivedMemory(parent, derived) {
231
215
  const fm = {
232
216
  [FM_INFERRED]: true,
233
- [FM_SOURCE]: parentRef,
217
+ [FM_SOURCE]: parent.ref,
218
+ description: derived.description,
219
+ tags: derived.tags,
220
+ searchHints: derived.searchHints,
221
+ title: derived.title,
222
+ derivedFrom: parent.name,
234
223
  };
235
224
  const yaml = yamlStringify(fm).trimEnd();
236
- return `---\n${yaml}\n---\n\n${fact.trim()}\n`;
225
+ return `---\n${yaml}\n---\n\n# ${derived.title.trim()}\n\n${derived.content.trim()}\n`;
237
226
  }
238
227
  function markParentProcessed(parent) {
239
228
  // Frontmatter-only rewrite of an existing asset: not a new asset write,
@@ -8,7 +8,7 @@ import { resolveSourceProviderFactory } from "../sources/provider-factory";
8
8
  import "../sources/providers/index";
9
9
  import { warn } from "../core/warn";
10
10
  import { ensureGitMirror, getCachePaths, parseGitRepoUrl } from "../sources/providers/git";
11
- import { ensureWebsiteMirror } from "../sources/providers/website";
11
+ import { ensureWebsiteMirror } from "../sources/website-ingest";
12
12
  // Legacy "context-hub" / "github" type aliases are normalized to "git" at
13
13
  // config-load time (see src/config.ts), so this set only contains the canonical
14
14
  // type.