npm - akm-cli - Versions diffs - 0.7.1 → 0.7.2 - Mend

akm-cli 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/CHANGELOG.md +8 -0
package/dist/cli.js +22 -8
package/dist/commands/installed-stashes.js +1 -1
package/dist/commands/source-add.js +1 -1
package/dist/core/common.js +16 -1
package/dist/core/config.js +5 -2
package/dist/indexer/db-search.js +16 -1
package/dist/indexer/graph-extraction.js +5 -3
package/dist/indexer/indexer.js +27 -11
package/dist/indexer/memory-inference.js +47 -58
package/dist/indexer/search-source.js +1 -1
package/dist/llm/client.js +61 -1
package/dist/llm/embedder.js +8 -5
package/dist/llm/embedders/local.js +8 -2
package/dist/llm/embedders/remote.js +4 -2
package/dist/llm/graph-extract.js +4 -4
package/dist/llm/memory-infer.js +61 -33
package/dist/llm/metadata-enhance.js +2 -2
package/dist/output/cli-hints.js +2 -0
package/dist/setup/setup.js +30 -20
package/dist/sources/providers/website.js +4 -460
package/dist/sources/website-ingest.js +470 -0
package/docs/README.md +7 -0
package/docs/migration/release-notes/0.7.0.md +14 -0
package/package.json +4 -1

package/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 ## [Unreleased]
+### Added
+- **One-shot URL ingest for `akm import` and `akm wiki stash`** — both commands now accept a single HTTP/HTTPS URL in addition to file paths and stdin. `akm import <url>` fetches the exact page, converts it to markdown, and writes it into `knowledge/` using a URL-path-derived default name. `akm wiki stash <wiki> <url>` fetches the exact page, converts it to markdown, and writes it into `wikis/<wiki>/raw/`. Neither command registers a persistent website source or crawls linked pages.
+### Changed
+- **Shared website ingest boundary** — website URL validation, single-page fetch/convert, and website mirror generation now live in a dedicated shared ingest module. The website source provider is a thin adapter, and `akm add`, `akm import`, and `akm wiki stash` all reuse the same core website-ingest path.
 ## [0.7.0]
 ### Added

package/dist/cli.js CHANGED Viewed

@@ -25,7 +25,7 @@ import { akmClone } from "./commands/source-clone";
 import { addStash } from "./commands/source-manage";
 import { parseAssetRef } from "./core/asset-ref";
 import { deriveCanonicalAssetName, resolveAssetPathFromName } from "./core/asset-spec";
-import { isWithin, resolveStashDir, tryReadStdinText } from "./core/common";
+import { isHttpUrl, isWithin, resolveStashDir, tryReadStdinText } from "./core/common";
 import { DEFAULT_CONFIG, getConfigPath, loadConfig, loadUserConfig, saveConfig } from "./core/config";
 import { ConfigError, NotFoundError, UsageError } from "./core/errors";
 import { appendEvent } from "./core/events";
@@ -44,6 +44,7 @@ import { buildRegistryIndex, writeRegistryIndex } from "./registry/build-index";
 import { resolveSourcesForOrigin } from "./registry/origin-resolve";
 import { saveGitStash } from "./sources/providers/git";
 import { resolveAssetPath } from "./sources/resolve";
+import { fetchWebsiteMarkdownSnapshot } from "./sources/website-ingest";
 import { pkgVersion } from "./version";
 import { createWorkflowAsset, formatWorkflowErrors, getWorkflowTemplate, validateWorkflowSource, } from "./workflows/authoring";
 import { hasWorkflowSubcommand, parseWorkflowJsonObject, parseWorkflowStepState, WORKFLOW_STEP_STATES, } from "./workflows/cli";
@@ -142,10 +143,17 @@ const indexCommand = defineCommand({
     },
     async run({ args }) {
         await runWithJsonErrors(async () => {
+            const controller = new AbortController();
+            const abort = () => controller.abort(new Error("index interrupted"));
+            process.once("SIGINT", abort);
+            process.once("SIGTERM", abort);
             const result = await akmIndex({
                 full: args.full,
                 onProgress: args.verbose ? ({ message }) => console.error(`[index] ${message}`) : undefined,
+                signal: controller.signal,
             });
+            process.off("SIGINT", abort);
+            process.off("SIGTERM", abort);
             output("index", result);
         });
     },
@@ -1057,6 +1065,12 @@ function readKnowledgeContent(source) {
         preferredName: path.basename(resolvedSource, path.extname(resolvedSource)),
     };
 }
+async function readKnowledgeInput(source) {
+    if (!isHttpUrl(source))
+        return readKnowledgeContent(source);
+    const snapshot = await fetchWebsiteMarkdownSnapshot(source);
+    return { content: snapshot.content, preferredName: snapshot.preferredName };
+}
 async function writeMarkdownAsset(options) {
     // Resolve write target via the v1 precedence chain (`--target` →
     // `defaultWriteTarget` → working stash). Per spec §10 step 5, this is the
@@ -1609,12 +1623,12 @@ function wasRememberFlagValueConsumedAsContent(content, flagValue, flagName) {
 const importKnowledgeCommand = defineCommand({
     meta: {
         name: "import",
-        description: "Import a knowledge document into the default stash",
+        description: "Import a knowledge document or URL into the default stash",
     },
     args: {
         source: {
             type: "positional",
-            description: 'Source file path, or "-" to read from stdin',
+            description: 'Source file path, URL, or "-" to read from stdin',
             required: true,
         },
         name: {
@@ -1633,11 +1647,11 @@ const importKnowledgeCommand = defineCommand({
     },
     async run({ args }) {
         return runWithJsonErrors(async () => {
-            const { content, preferredName } = readKnowledgeContent(args.source);
+            const { content, preferredName } = await readKnowledgeInput(args.source);
             const result = await writeMarkdownAsset({
                 type: "knowledge",
                 content,
-                name: args.name,
+                name: args.name ?? (isHttpUrl(args.source) ? preferredName : undefined),
                 fallbackPrefix: "knowledge",
                 preferredName,
                 force: args.force,
@@ -2227,17 +2241,17 @@ const wikiSearchCommand = defineCommand({
 const wikiStashCommand = defineCommand({
     meta: {
         name: "stash",
-        description: "Copy a source into wikis/<name>/raw/<slug>.md with frontmatter. Source may be a file path or '-' for stdin.",
+        description: "Copy a source into wikis/<name>/raw/<slug>.md with frontmatter. Source may be a file path, URL, or '-' for stdin.",
     },
     args: {
         name: { type: "positional", description: "Wiki name", required: true },
-        source: { type: "positional", description: "Source file path, or '-' to read from stdin", required: true },
+        source: { type: "positional", description: "Source file path, URL, or '-' to read from stdin", required: true },
         as: { type: "string", description: "Preferred slug base (defaults to source filename or first-line slug)" },
     },
     run({ args }) {
         return runWithJsonErrors(async () => {
             const { stashRaw } = await import("./wiki/wiki.js");
-            const { content, preferredName } = readKnowledgeContent(args.source);
+            const { content, preferredName } = await readKnowledgeInput(args.source);
             const stashDir = resolveStashDir();
             const result = stashRaw({
                 stashDir,

package/dist/commands/installed-stashes.js CHANGED Viewed

@@ -13,7 +13,7 @@ import { akmIndex } from "../indexer/indexer";
 import { removeLockEntry, upsertLockEntry } from "../integrations/lockfile";
 import { parseRegistryRef } from "../registry/resolve";
 import { syncFromRef } from "../sources/providers/sync-from-ref";
-import { ensureWebsiteMirror } from "../sources/providers/website";
+import { ensureWebsiteMirror } from "../sources/website-ingest";
 import { listWikis, resolveWikisRoot } from "../wiki/wiki";
 import { auditInstallCandidate, deriveRegistryLabels, enforceRegistryInstallPolicy, formatInstallAuditFailure, } from "./install-audit";
 import { removeInstalledRegistryEntry, upsertInstalledRegistryEntry } from "./source-add";

package/dist/commands/source-add.js CHANGED Viewed

@@ -9,7 +9,7 @@ import { upsertLockEntry } from "../integrations/lockfile";
 import { parseRegistryRef } from "../registry/resolve";
 import { detectStashRoot } from "../sources/providers/provider-utils";
 import { syncFromRef } from "../sources/providers/sync-from-ref";
-import { ensureWebsiteMirror, validateWebsiteInputUrl } from "../sources/providers/website";
+import { ensureWebsiteMirror, validateWebsiteInputUrl } from "../sources/website-ingest";
 import { ensureWikiNameAvailable, validateWikiName } from "../wiki/wiki";
 import { auditInstallCandidate, deriveRegistryLabels, enforceRegistryInstallPolicy, formatInstallAuditFailure, } from "./install-audit";
 const VALID_OVERRIDE_TYPES = new Set(["wiki"]);

package/dist/core/common.js CHANGED Viewed

@@ -146,19 +146,34 @@ function normalizeFsPathForComparison(value) {
  * Fetch with an AbortController timeout.
  * Defaults to 30 seconds if no timeout is specified.
  */
-export async function fetchWithTimeout(url, opts, timeoutMs = 30_000) {
+export async function fetchWithTimeout(url, opts, timeoutMs = 30_000, signal) {
     const controller = new AbortController();
     const timer = setTimeout(() => controller.abort(), timeoutMs);
+    const abortExternal = () => controller.abort(signal?.reason);
+    if (signal) {
+        if (signal.aborted) {
+            clearTimeout(timer);
+            controller.abort(signal.reason);
+        }
+        else {
+            signal.addEventListener("abort", abortExternal, { once: true });
+        }
+    }
     try {
         return await fetch(url, { ...opts, signal: controller.signal });
     }
     catch (err) {
         if (err instanceof DOMException && err.name === "AbortError") {
+            if (signal?.aborted) {
+                throw new Error(`Request aborted: ${url}`);
+            }
             throw new Error(`Request timed out after ${timeoutMs}ms: ${url}`);
         }
         throw err;
     }
     finally {
+        if (signal)
+            signal.removeEventListener("abort", abortExternal);
         clearTimeout(timer);
     }
 }

package/dist/core/config.js CHANGED Viewed

@@ -10,8 +10,8 @@ import { warn } from "./warn";
 export const DEFAULT_CONFIG = {
     semanticSearchMode: "auto",
     registries: [
-        { url: "https://raw.githubusercontent.com/itlackey/akm-registry/main/index.json", name: "official" },
-        { url: "https://skills.sh", name: "skills.sh", provider: "skills-sh" },
+        { url: "https://raw.githubusercontent.com/itlackey/akm-registry/main/index.json", name: "akm-registry" },
+        { url: "https://skills.sh", name: "skills.sh", provider: "skills-sh", enabled: false },
     ],
     output: {
         format: "json",
@@ -509,6 +509,9 @@ function parseLlmConfig(value) {
         if (Object.keys(features).length > 0)
             result.features = features;
     }
+    if (typeof obj.extraParams === "object" && obj.extraParams !== null && !Array.isArray(obj.extraParams)) {
+        result.extraParams = obj.extraParams;
+    }
     return result;
 }
 /**

package/dist/indexer/db-search.js CHANGED Viewed

@@ -269,7 +269,10 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
         // If the query IS the asset name (or very close), this is almost certainly
         // what the user wants. This is the single most important ranking signal.
         const nameLower = entry.name.toLowerCase();
-        const nameBase = nameLower.split("/").pop() ?? nameLower; // last segment for path-based names
+        const rawNameBase = nameLower.split("/").pop() ?? nameLower; // last segment for path-based names
+        const nameBase = entry.type === "memory" && rawNameBase.endsWith(".derived")
+            ? rawNameBase.slice(0, -".derived".length)
+            : rawNameBase;
         if (nameBase === queryLower || nameLower === queryLower) {
             // Exact match: massive boost
             boostSum += 2.0;
@@ -301,6 +304,18 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allSourceD
             knowledge: 0,
         };
         boostSum += TYPE_BOOST[entry.type] ?? 0;
+        // ── 2.5. Derived-vs-raw memory preference ──
+        // Raw memories are user notes and may be incomplete or unvetted. Compressed
+        // `.derived` memories are the higher-signal retrieval target, but the
+        // preference should stay modest so stronger relevance signals still dominate.
+        if (entry.type === "memory") {
+            if (entry.name.toLowerCase().endsWith(".derived")) {
+                boostSum += 0.18;
+            }
+            else {
+                boostSum -= 0.08;
+            }
+        }
         // ── 3. Tag exact match ──
         // Exact tag equality is a strong signal — the author explicitly tagged
         // this asset with the user's search term.

package/dist/indexer/graph-extraction.js CHANGED Viewed

@@ -70,7 +70,7 @@ const EMPTY_RESULT = {
  * to an empty no-op result, leaving any existing `graph.json` untouched on
  * disk.
  */
-export async function runGraphExtractionPass(config, sources) {
+export async function runGraphExtractionPass(config, sources, signal) {
     // Gate 1 — locked feature flag (§14). Defaults to enabled; only an
     // explicit `false` disables the pass entirely.
     if (config.llm?.features?.graph_extraction === false)
@@ -94,7 +94,9 @@ export async function runGraphExtractionPass(config, sources) {
     let totalEntities = 0;
     let totalRelations = 0;
     for (const candidate of eligible) {
-        const extraction = await extractGraphFromBody(llmConfig, candidate.body);
+        if (signal?.aborted)
+            break;
+        const extraction = await extractGraphFromBody(llmConfig, candidate.body, signal);
         if (extraction.entities.length === 0)
             continue;
         nodes.push({
@@ -134,7 +136,7 @@ export async function runGraphExtractionPass(config, sources) {
  * same one the rest of the indexer uses: `<stashRoot>/<type>/...`.
  *
  * Inferred-child memories (frontmatter `inferred: true`) are skipped — they
- * are atomic facts already, with no internal graph structure worth
+ * are already derived summaries, with no additional internal graph structure worth
  * extracting.
  *
  * Exported for direct unit testing.

package/dist/indexer/indexer.js CHANGED Viewed

@@ -13,10 +13,16 @@ import { buildSearchText } from "./search-fields";
 import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
 import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
 import { walkStashFlat } from "./walker";
+function throwIfAborted(signal) {
+    if (signal?.aborted) {
+        throw signal.reason instanceof Error ? signal.reason : new Error("index interrupted");
+    }
+}
 // ── Indexer ──────────────────────────────────────────────────────────────────
 export async function akmIndex(options) {
     const stashDir = options?.stashDir || resolveStashDir();
     const onProgress = options?.onProgress ?? (() => { });
+    const signal = options?.signal;
     // Load config and resolve all stash sources
     const { loadConfig } = await import("../core/config.js");
     const config = loadConfig();
@@ -82,18 +88,19 @@ export async function akmIndex(options) {
                 }
             }
         }
-        // Memory inference pass (#201). Runs before the walk so any atomic-fact
+        throwIfAborted(signal);
+        // Memory inference pass (#201). Runs before the walk so any derived-memory
         // children that get written are picked up by the walker in this same run
         // and don't have to wait for the next `akm index`. Gated entirely by
         // `resolveIndexPassLLM("memory", config)` — when the user has no
         // `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
         // and existing inferred children are left in place.
         try {
-            const inferenceResult = await runMemoryInferencePass(config, allSourceEntries);
+            const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
             if (inferenceResult.writtenFacts > 0) {
                 onProgress({
                     phase: "llm",
-                    message: `Memory inference wrote ${inferenceResult.writtenFacts} atomic fact${inferenceResult.writtenFacts === 1 ? "" : "s"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
+                    message: `Memory inference wrote ${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
                 });
             }
         }
@@ -114,7 +121,7 @@ export async function akmIndex(options) {
         // `index.graph.llm` toggle) is off; the existing graph file is
         // preserved on disk in that case.
         try {
-            const graphResult = await runGraphExtractionPass(config, allSourceEntries);
+            const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
             if (graphResult.written) {
                 onProgress({
                     phase: "llm",
@@ -125,6 +132,7 @@ export async function akmIndex(options) {
         catch (err) {
             warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
         }
+        throwIfAborted(signal);
         const tWalkStart = Date.now();
         // Walk stash dirs and index entries.
         // doFullDelete=true merges the wipe into the same transaction as the
@@ -150,8 +158,9 @@ export async function akmIndex(options) {
             }
         }
         const tWalkEnd = Date.now();
+        throwIfAborted(signal);
         // Enhance entries with LLM if configured
-        await enhanceDirsWithLlm(db, config, dirsNeedingLlm);
+        await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal);
         onProgress({
             phase: "llm",
             message: resolveIndexPassLLM("enrichment", config)
@@ -159,6 +168,7 @@ export async function akmIndex(options) {
                 : "LLM enhancement disabled.",
         });
         const tLlmEnd = Date.now();
+        throwIfAborted(signal);
         // Rebuild FTS after all inserts. Use incremental mode when this whole
         // index run is incremental — only entries touched by `upsertEntry`
         // since the last rebuild are re-indexed, instead of re-scanning every
@@ -200,6 +210,7 @@ export async function akmIndex(options) {
         catch {
             /* best-effort */
         }
+        throwIfAborted(signal);
         // Generate embeddings if semantic search is enabled
         const embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
         const tEmbedEnd = Date.now();
@@ -435,7 +446,7 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, doFu
     insertTransaction();
     return { scannedDirs, skippedDirs, generatedCount, warnings, dirsNeedingLlm };
 }
-async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
+async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal) {
     // Resolve per-pass LLM config via the unified shim. Returns undefined when
     // either no `akm.llm` is configured or the user opted this pass out via
     // `index.enrichment.llm = false`. (#208)
@@ -447,12 +458,13 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
     // and leaving the user wondering why nothing got enhanced.
     const summary = { attempted: 0, succeeded: 0, failureSamples: [] };
     for (const { dirPath, files, currentStashDir, stash: originalStash } of dirsNeedingLlm) {
+        throwIfAborted(signal);
         // Only enhance generated entries; user-provided overrides should not be overwritten
         const generatedEntries = originalStash.entries.filter((e) => e.quality === "generated");
         if (generatedEntries.length === 0)
             continue;
         const generatedStash = { entries: generatedEntries };
-        const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary);
+        const enhanced = await enhanceStashWithLlm(llmConfig, generatedStash, files, summary, signal);
         // Re-upsert the enhanced entries in a single transaction so a crash
         // cannot leave half the entries updated and the rest stale.
         db.transaction(() => {
@@ -475,7 +487,8 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm) {
         warn(`LLM enhancement failed for ${failed}/${summary.attempted} entries — they were left un-enhanced.${sample}`);
     }
 }
-async function generateEmbeddingsForDb(db, config, onProgress) {
+async function generateEmbeddingsForDb(db, config, onProgress, signal) {
+    throwIfAborted(signal);
     if (config.semanticSearchMode === "off") {
         onProgress({ phase: "embeddings", message: "Semantic search disabled; skipping embeddings." });
         return { success: false, reason: "index-missing", message: "Semantic search is disabled." };
@@ -504,6 +517,7 @@ async function generateEmbeddingsForDb(db, config, onProgress) {
     try {
         const { embedBatch } = await import("../llm/embedder.js");
         const { estimateTokenCount } = await import("../llm/embedders/remote.js");
+        throwIfAborted(signal);
         const allEntries = getAllEntriesForEmbedding(db);
         if (allEntries.length === 0) {
             onProgress({ phase: "embeddings", message: "Embeddings already up to date." });
@@ -528,7 +542,8 @@ async function generateEmbeddingsForDb(db, config, onProgress) {
                 warnVerbose(`[embed] ${ref} (${chars} chars, est. ${tokens} tokens) → batch ${batchNum}/${totalBatches}`);
             }
         }
-        const embeddings = await embedBatch(texts, config.embedding);
+        const embeddings = await embedBatch(texts, config.embedding, signal);
+        throwIfAborted(signal);
         // Wrap all embedding upserts in a single transaction so partial
         // state is rolled back on failure rather than leaving the table half-filled.
         db.transaction(() => {
@@ -699,10 +714,11 @@ function isDirStale(dirPath, currentFiles, previousEntries, builtAtMs) {
     }
     return false;
 }
-async function enhanceStashWithLlm(llmConfig, stash, files, summary) {
+async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
     const { enhanceMetadata } = await import("../llm/metadata-enhance");
     const enhanced = [];
     for (const entry of stash.entries) {
+        throwIfAborted(signal);
         summary.attempted++;
         try {
             const entryFile = entry.filename
@@ -717,7 +733,7 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary) {
                     /* ignore unreadable files */
                 }
             }
-            const improvements = await enhanceMetadata(llmConfig, entry, fileContent);
+            const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal);
             const updated = { ...entry };
             if (improvements.description)
                 updated.description = improvements.description;

package/dist/indexer/memory-inference.js CHANGED Viewed

@@ -1,9 +1,10 @@
 /**
  * Memory inference pass for `akm index` (#201).
  *
- * Detects memories pending inference, asks the configured LLM to split each
- * into atomic facts, and writes the results back as new memory files with
- * frontmatter `inferred: true` + a `source:` backref to the parent memory.
+ * Detects memories pending inference, asks the configured LLM to compress each
+ * into one higher-signal derived memory, and writes the result back as a new
+ * memory file with frontmatter `inferred: true` + a `source:` backref to the
+ * parent memory.
  *
  * Pending predicate (see {@link isPendingMemory}):
  *   - File lives under `<stashRoot>/memories/` and ends in `.md`.
@@ -36,7 +37,7 @@ import { parseFrontmatter, parseFrontmatterBlock } from "../core/frontmatter";
 import { warn } from "../core/warn";
 import { writeAssetToSource } from "../core/write-source";
 import { resolveIndexPassLLM } from "../llm/index-passes";
-import { splitMemoryIntoAtomicFacts } from "../llm/memory-infer";
+import { compressMemoryToDerivedMemory } from "../llm/memory-infer";
 /**
  * Frontmatter keys this pass cares about. Constants so a future rename only
  * needs to touch one site.
@@ -59,8 +60,8 @@ const FM_SOURCE = "source";
  * Both must allow the call for the pass to run. Either set to `false`
  * short-circuits to a no-op result.
  */
-export async function runMemoryInferencePass(config, sources) {
-    const empty = {
+export async function runMemoryInferencePass(config, sources, signal) {
+    const result = {
         considered: 0,
         splitParents: 0,
         writtenFacts: 0,
@@ -69,38 +70,40 @@ export async function runMemoryInferencePass(config, sources) {
     // Gate 1 — locked feature flag (§14). Defaults to enabled; only an
     // explicit `false` disables the pass entirely.
     if (config.llm?.features?.memory_inference === false)
-        return empty;
+        return result;
     // Gate 2 — per-pass opt-out (#208). Returns the resolved llm config or
     // `undefined` when the pass should not run.
     const llmConfig = resolveIndexPassLLM("memory", config);
     if (!llmConfig)
-        return empty;
+        return result;
     // The pass only writes to the primary (working) stash. Read-only caches
     // (git, npm, website) are deliberately untouched — writing inferred
     // children there would be clobbered by the next sync().
     const primary = sources[0];
     if (!primary)
-        return empty;
+        return result;
     const pending = collectPendingMemories(primary.path);
-    empty.considered = pending.length;
+    result.considered = pending.length;
     if (pending.length === 0)
-        return empty;
+        return result;
     for (const record of pending) {
-        const facts = await splitMemoryIntoAtomicFacts(llmConfig, record.body);
-        if (facts.length === 0) {
-            empty.skippedNoFacts += 1;
+        if (signal?.aborted)
+            return result;
+        const derived = await compressMemoryToDerivedMemory(llmConfig, record.body, signal);
+        if (!derived) {
+            result.skippedNoFacts += 1;
             // Intentionally NOT marked processed — a transient LLM failure should
             // be retried on the next index run.
             continue;
         }
-        const written = await writeAtomicChildren(record, facts);
+        const written = await writeDerivedMemory(record, derived);
         if (written > 0) {
             markParentProcessed(record);
-            empty.splitParents += 1;
-            empty.writtenFacts += written;
+            result.splitParents += 1;
+            result.writtenFacts += written;
         }
     }
-    return empty;
+    return result;
 }
 // ── Pending detection ───────────────────────────────────────────────────────
 /**
@@ -133,6 +136,7 @@ export function collectPendingMemories(stashRoot) {
             ref: `memory:${relName}`,
             data: parsed.data,
             body: parsed.content,
+            name: relName,
         });
     }
     return out;
@@ -177,19 +181,8 @@ function toMemoryName(memoriesDir, filePath) {
     // user has organised under memories/.
     return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
 }
-// ── Writing children + marking parent ───────────────────────────────────────
-async function writeAtomicChildren(parent, facts) {
-    const memoriesDir = path.join(parent.stashRoot, "memories");
-    // Sibling directory layout: <parentDir>/<parentBase>.facts/fact-N.md
-    // Keeps facts grouped near the parent without polluting the top level.
-    const parentRel = path.relative(memoriesDir, parent.filePath).replace(/\\/g, "/");
-    const parentBase = parentRel.replace(/\.md$/i, "");
-    const factsDirRel = `${parentBase}.facts`;
-    // Children are routed through writeAssetToSource — the single dispatch
-    // point for kind-branching writes (CLAUDE.md / spec §10 step 5). Memory
-    // assets resolve to `<source.path>/memories/<name>.md`, so a child name
-    // of `<parentBase>.facts/fact-N` lands at exactly the documented child
-    // path scheme.
+// ── Writing derived memories + marking parent ───────────────────────────────
+async function writeDerivedMemory(parent, derived) {
     const writeTarget = {
         kind: "filesystem",
         name: "stash",
@@ -201,39 +194,35 @@ async function writeAtomicChildren(parent, facts) {
         path: parent.stashRoot,
         writable: true,
     };
-    let written = 0;
-    for (let i = 0; i < facts.length; i++) {
-        const fact = facts[i];
-        const childName = `${factsDirRel}/fact-${i + 1}`;
-        const childRefStr = `memory:${childName}`;
-        const childPath = path.join(memoriesDir, `${childName}.md`);
-        // Idempotent re-writes: if a child already exists at this slot we skip
-        // it. The parent's `inferenceProcessed` marker is the primary idempotency
-        // guard (we never re-enter the splitter for a processed parent), but a
-        // partial previous run that crashed before the marker landed should not
-        // duplicate facts.
-        if (fs.existsSync(childPath)) {
-            continue;
-        }
-        try {
-            const content = renderChildMemory(fact, parent.ref);
-            const childRef = parseAssetRef(childRefStr);
-            await writeAssetToSource(writeTarget, writeConfig, childRef, content);
-            written += 1;
-        }
-        catch (err) {
-            warn(`memory inference: failed to write atomic child ${childName}: ${err instanceof Error ? err.message : String(err)}`);
-        }
+    const childName = `${parent.name}.derived`;
+    const childRefStr = `memory:${childName}`;
+    const childPath = path.join(parent.stashRoot, "memories", `${childName}.md`);
+    if (fs.existsSync(childPath)) {
+        return 0;
+    }
+    try {
+        const content = renderDerivedMemory(parent, derived);
+        const childRef = parseAssetRef(childRefStr);
+        await writeAssetToSource(writeTarget, writeConfig, childRef, content);
+        return 1;
+    }
+    catch (err) {
+        warn(`memory inference: failed to write derived memory ${childName}: ${err instanceof Error ? err.message : String(err)}`);
+        return 0;
     }
-    return written;
 }
-function renderChildMemory(fact, parentRef) {
+function renderDerivedMemory(parent, derived) {
     const fm = {
         [FM_INFERRED]: true,
-        [FM_SOURCE]: parentRef,
+        [FM_SOURCE]: parent.ref,
+        description: derived.description,
+        tags: derived.tags,
+        searchHints: derived.searchHints,
+        title: derived.title,
+        derivedFrom: parent.name,
     };
     const yaml = yamlStringify(fm).trimEnd();
-    return `---\n${yaml}\n---\n\n${fact.trim()}\n`;
+    return `---\n${yaml}\n---\n\n# ${derived.title.trim()}\n\n${derived.content.trim()}\n`;
 }
 function markParentProcessed(parent) {
     // Frontmatter-only rewrite of an existing asset: not a new asset write,

package/dist/indexer/search-source.js CHANGED Viewed

@@ -8,7 +8,7 @@ import { resolveSourceProviderFactory } from "../sources/provider-factory";
 import "../sources/providers/index";
 import { warn } from "../core/warn";
 import { ensureGitMirror, getCachePaths, parseGitRepoUrl } from "../sources/providers/git";
-import { ensureWebsiteMirror } from "../sources/providers/website";
+import { ensureWebsiteMirror } from "../sources/website-ingest";
 // Legacy "context-hub" / "github" type aliases are normalized to "git" at
 // config-load time (see src/config.ts), so this set only contains the canonical
 // type.