npm - akm-cli - Versions diffs - 0.8.0-rc2 → 0.8.0 - Mend

akm-cli 0.8.0-rc2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (295) hide show

package/{.github/CHANGELOG.md → CHANGELOG.md} +191 -3
package/README.md +22 -6
package/SECURITY.md +93 -0
package/dist/cli/config-migrate.js +144 -0
package/dist/cli/config-validate.js +39 -0
package/dist/cli/confirm.js +73 -0
package/dist/cli/parse-args.js +93 -3
package/dist/cli/shared.js +129 -0
package/dist/cli.js +2141 -1268
package/dist/commands/add-cli.js +279 -0
package/dist/commands/agent-dispatch.js +20 -12
package/dist/commands/agent-support.js +11 -5
package/dist/commands/completions.js +3 -0
package/dist/commands/config-cli.js +129 -517
package/dist/commands/consolidate.js +1533 -144
package/dist/commands/curate.js +44 -3
package/dist/commands/db-cli.js +23 -0
package/dist/commands/distill-promotion-policy.js +5 -3
package/dist/commands/distill.js +906 -100
package/dist/commands/env.js +213 -0
package/dist/commands/eval-cases.js +3 -0
package/dist/commands/events.js +3 -0
package/dist/commands/extract-cli.js +127 -0
package/dist/commands/extract-prompt.js +204 -0
package/dist/commands/extract.js +477 -0
package/dist/commands/feedback-cli.js +331 -0
package/dist/commands/graph.js +260 -5
package/dist/commands/health.js +977 -51
package/dist/commands/help/help-accept.md +6 -3
package/dist/commands/help/help-improve.md +36 -8
package/dist/commands/help/help-proposals.md +7 -4
package/dist/commands/help/help-reject.md +5 -2
package/dist/commands/history.js +51 -16
package/dist/commands/improve-auto-accept.js +97 -0
package/dist/commands/improve-cli.js +236 -0
package/dist/commands/improve-profiles.js +184 -0
package/dist/commands/improve-result-file.js +167 -0
package/dist/commands/improve.js +1725 -332
package/dist/commands/info.js +3 -0
package/dist/commands/init.js +49 -1
package/dist/commands/installed-stashes.js +6 -23
package/dist/commands/knowledge.js +3 -0
package/dist/commands/lint/agent-linter.js +3 -0
package/dist/commands/lint/base-linter.js +199 -5
package/dist/commands/lint/command-linter.js +3 -0
package/dist/commands/lint/default-linter.js +3 -0
package/dist/commands/lint/env-key-rules.js +154 -0
package/dist/commands/lint/index.js +92 -3
package/dist/commands/lint/knowledge-linter.js +3 -0
package/dist/commands/lint/markdown-insertion.js +343 -0
package/dist/commands/lint/memory-linter.js +3 -0
package/dist/commands/lint/registry.js +3 -0
package/dist/commands/lint/skill-linter.js +3 -0
package/dist/commands/lint/task-linter.js +15 -12
package/dist/commands/lint/types.js +3 -0
package/dist/commands/lint/workflow-linter.js +3 -0
package/dist/commands/lint.js +3 -0
package/dist/commands/migration-help.js +5 -2
package/dist/commands/proposal-drain-policies.js +128 -0
package/dist/commands/proposal-drain.js +477 -0
package/dist/commands/proposal.js +60 -6
package/dist/commands/propose.js +24 -19
package/dist/commands/reflect.js +1004 -94
package/dist/commands/registry-cli.js +150 -0
package/dist/commands/registry-search.js +3 -0
package/dist/commands/remember-cli.js +257 -0
package/dist/commands/remember.js +15 -6
package/dist/commands/schema-repair.js +88 -15
package/dist/commands/search.js +99 -14
package/dist/commands/secret.js +173 -0
package/dist/commands/self-update.js +3 -0
package/dist/commands/show.js +32 -13
package/dist/commands/source-add.js +7 -35
package/dist/commands/source-clone.js +3 -0
package/dist/commands/source-manage.js +3 -0
package/dist/commands/tasks.js +161 -95
package/dist/commands/url-checker.js +3 -0
package/dist/core/action-contributors.js +3 -0
package/dist/core/asset-ref.js +13 -2
package/dist/core/asset-registry.js +9 -2
package/dist/core/asset-serialize.js +88 -0
package/dist/core/asset-spec.js +61 -5
package/dist/core/common.js +93 -5
package/dist/core/concurrent.js +3 -0
package/dist/core/config-io.js +347 -0
package/dist/core/config-migration.js +622 -0
package/dist/core/config-schema.js +558 -0
package/dist/core/config-sources.js +108 -0
package/dist/core/config-types.js +4 -0
package/dist/core/config-walker.js +337 -0
package/dist/core/config.js +366 -1077
package/dist/core/errors.js +42 -20
package/dist/core/events.js +31 -25
package/dist/core/file-lock.js +104 -0
package/dist/core/frontmatter.js +75 -10
package/dist/core/lesson-lint.js +3 -0
package/dist/core/markdown.js +3 -0
package/dist/core/memory-belief.js +62 -0
package/dist/core/memory-contradiction-detect.js +274 -0
package/dist/core/memory-improve.js +142 -14
package/dist/core/parse.js +3 -0
package/dist/core/paths.js +218 -50
package/dist/core/proposal-quality-validators.js +380 -0
package/dist/core/proposal-validators.js +11 -3
package/dist/core/proposals.js +464 -5
package/dist/core/state-db.js +349 -56
package/dist/core/text-truncation.js +107 -0
package/dist/core/time.js +3 -0
package/dist/core/tty.js +59 -0
package/dist/core/warn.js +7 -2
package/dist/core/write-source.js +12 -0
package/dist/indexer/db-backup.js +391 -0
package/dist/indexer/db-search.js +136 -28
package/dist/indexer/db.js +661 -166
package/dist/indexer/ensure-index.js +3 -0
package/dist/indexer/file-context.js +3 -0
package/dist/indexer/graph-boost.js +162 -40
package/dist/indexer/graph-db.js +241 -51
package/dist/indexer/graph-dedup.js +3 -7
package/dist/indexer/graph-extraction.js +242 -149
package/dist/indexer/index-context.js +3 -9
package/dist/indexer/indexer.js +84 -14
package/dist/indexer/llm-cache.js +24 -19
package/dist/indexer/manifest.js +3 -0
package/dist/indexer/matchers.js +184 -11
package/dist/indexer/memory-inference.js +94 -50
package/dist/indexer/metadata-contributors.js +3 -0
package/dist/indexer/metadata.js +110 -50
package/dist/indexer/path-resolver.js +3 -0
package/dist/indexer/project-context.js +192 -0
package/dist/indexer/ranking-contributors.js +134 -7
package/dist/indexer/ranking.js +8 -1
package/dist/indexer/search-fields.js +5 -9
package/dist/indexer/search-hit-enrichers.js +91 -2
package/dist/indexer/search-source.js +20 -1
package/dist/indexer/semantic-status.js +4 -1
package/dist/indexer/staleness-detect.js +447 -0
package/dist/indexer/usage-events.js +12 -9
package/dist/indexer/walker.js +3 -0
package/dist/integrations/agent/builders.js +135 -0
package/dist/integrations/agent/config.js +121 -401
package/dist/integrations/agent/detect.js +3 -0
package/dist/integrations/agent/index.js +6 -14
package/dist/integrations/agent/model-aliases.js +55 -0
package/dist/integrations/agent/profiles.js +3 -0
package/dist/integrations/agent/prompts.js +137 -8
package/dist/integrations/agent/runner.js +208 -0
package/dist/integrations/agent/sdk-runner.js +8 -2
package/dist/integrations/agent/spawn.js +54 -14
package/dist/integrations/github.js +3 -0
package/dist/integrations/lockfile.js +22 -51
package/dist/integrations/session-logs/index.js +4 -0
package/dist/integrations/session-logs/inline-refs.js +35 -0
package/dist/integrations/session-logs/pre-filter.js +152 -0
package/dist/integrations/session-logs/providers/claude-code.js +226 -0
package/dist/integrations/session-logs/providers/opencode.js +231 -25
package/dist/integrations/session-logs/types.js +3 -0
package/dist/llm/call-ai.js +14 -26
package/dist/llm/client.js +16 -2
package/dist/llm/embedder.js +20 -29
package/dist/llm/embedders/cache.js +3 -7
package/dist/llm/embedders/local.js +42 -1
package/dist/llm/embedders/remote.js +20 -8
package/dist/llm/embedders/types.js +3 -7
package/dist/llm/feature-gate.js +92 -56
package/dist/llm/graph-extract.js +401 -30
package/dist/llm/index-passes.js +44 -29
package/dist/llm/memory-infer.js +30 -2
package/dist/llm/metadata-enhance.js +3 -7
package/dist/llm/prompts/extract-session.md +80 -0
package/dist/llm/prompts/graph-extract-user-prompt.md +24 -1
package/dist/output/cli-hints-full.md +60 -32
package/dist/output/cli-hints-short.md +10 -7
package/dist/output/cli-hints.js +5 -2
package/dist/output/context.js +60 -8
package/dist/output/renderers.js +170 -194
package/dist/output/shapes/curate.js +56 -0
package/dist/output/shapes/distill.js +10 -0
package/dist/output/shapes/env-list.js +19 -0
package/dist/output/shapes/events.js +11 -0
package/dist/output/shapes/helpers.js +424 -0
package/dist/output/shapes/history.js +7 -0
package/dist/output/shapes/passthrough.js +105 -0
package/dist/output/shapes/proposal-accept.js +7 -0
package/dist/output/shapes/proposal-diff.js +7 -0
package/dist/output/shapes/proposal-list.js +7 -0
package/dist/output/shapes/proposal-producer.js +11 -0
package/dist/output/shapes/proposal-reject.js +7 -0
package/dist/output/shapes/proposal-show.js +7 -0
package/dist/output/shapes/registry-search.js +6 -0
package/dist/output/shapes/registry.js +30 -0
package/dist/output/shapes/search.js +6 -0
package/dist/output/shapes/secret-list.js +19 -0
package/dist/output/shapes/show.js +6 -0
package/dist/output/shapes/vault-list.js +19 -0
package/dist/output/shapes.js +51 -549
package/dist/output/text/add.js +6 -0
package/dist/output/text/clone.js +6 -0
package/dist/output/text/config.js +6 -0
package/dist/output/text/curate.js +6 -0
package/dist/output/text/distill.js +7 -0
package/dist/output/text/enable-disable.js +7 -0
package/dist/output/text/events.js +10 -0
package/dist/output/text/feedback.js +6 -0
package/dist/output/text/helpers.js +1059 -0
package/dist/output/text/history.js +7 -0
package/dist/output/text/import.js +6 -0
package/dist/output/text/index.js +6 -0
package/dist/output/text/info.js +6 -0
package/dist/output/text/init.js +6 -0
package/dist/output/text/list.js +6 -0
package/dist/output/text/proposal-producer.js +8 -0
package/dist/output/text/proposal.js +12 -0
package/dist/output/text/registry-commands.js +11 -0
package/dist/output/text/registry.js +30 -0
package/dist/output/text/remember.js +6 -0
package/dist/output/text/remove.js +6 -0
package/dist/output/text/save.js +6 -0
package/dist/output/text/search.js +6 -0
package/dist/output/text/show.js +6 -0
package/dist/output/text/update.js +6 -0
package/dist/output/text/upgrade.js +6 -0
package/dist/output/text/vault.js +16 -0
package/dist/output/text/wiki.js +15 -0
package/dist/output/text/workflow.js +14 -0
package/dist/output/text.js +44 -1329
package/dist/registry/build-index.js +3 -0
package/dist/registry/create-provider-registry.js +3 -0
package/dist/registry/factory.js +4 -1
package/dist/registry/origin-resolve.js +3 -0
package/dist/registry/providers/index.js +3 -0
package/dist/registry/providers/skills-sh.js +11 -2
package/dist/registry/providers/static-index.js +10 -1
package/dist/registry/providers/types.js +3 -24
package/dist/registry/resolve.js +11 -16
package/dist/registry/types.js +3 -0
package/dist/scripts/migrate-storage.js +17767 -0
package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
package/dist/scripts/migrations/v16-to-v17.js +141 -0
package/dist/setup/detect.js +3 -0
package/dist/setup/ripgrep-install.js +3 -0
package/dist/setup/ripgrep-resolve.js +3 -0
package/dist/setup/setup.js +306 -67
package/dist/setup/steps.js +3 -15
package/dist/sources/include.js +3 -0
package/dist/sources/provider-factory.js +3 -11
package/dist/sources/provider.js +3 -20
package/dist/sources/providers/filesystem.js +19 -23
package/dist/sources/providers/git.js +171 -21
package/dist/sources/providers/index.js +3 -0
package/dist/sources/providers/install-types.js +3 -13
package/dist/sources/providers/npm.js +3 -4
package/dist/sources/providers/provider-utils.js +3 -0
package/dist/sources/providers/sync-from-ref.js +3 -11
package/dist/sources/providers/tar-utils.js +3 -0
package/dist/sources/providers/website.js +18 -22
package/dist/sources/resolve.js +3 -0
package/dist/sources/types.js +3 -0
package/dist/sources/website-ingest.js +3 -0
package/dist/tasks/backends/cron.js +3 -0
package/dist/tasks/backends/exec-utils.js +3 -0
package/dist/tasks/backends/index.js +3 -11
package/dist/tasks/backends/launchd.js +3 -0
package/dist/tasks/backends/schtasks.js +3 -0
package/dist/tasks/parser.js +51 -38
package/dist/tasks/resolveAkmBin.js +3 -0
package/dist/tasks/runner.js +35 -9
package/dist/tasks/schedule.js +20 -1
package/dist/tasks/schema.js +5 -3
package/dist/tasks/validator.js +6 -3
package/dist/version.js +3 -0
package/dist/wiki/wiki-templates.js +3 -0
package/dist/wiki/wiki.js +3 -0
package/dist/workflows/authoring.js +3 -0
package/dist/workflows/cli.js +3 -0
package/dist/workflows/db.js +140 -10
package/dist/workflows/document-cache.js +3 -10
package/dist/workflows/parser.js +3 -0
package/dist/workflows/renderer.js +3 -0
package/dist/workflows/runs.js +18 -1
package/dist/workflows/schema.js +3 -0
package/dist/workflows/scope-key.js +3 -0
package/dist/workflows/validator.js +5 -9
package/docs/README.md +7 -2
package/docs/data-and-telemetry.md +225 -0
package/docs/migration/release-notes/0.7.5.md +2 -2
package/docs/migration/release-notes/0.8.0.md +57 -5
package/docs/migration/v0.7-to-v0.8.md +1378 -0
package/package.json +28 -11
package/.github/LICENSE +0 -374
package/dist/commands/install-audit.js +0 -385
package/dist/commands/vault.js +0 -310
package/dist/indexer/match-contributors.js +0 -141
package/dist/integrations/agent/pipeline.js +0 -39
package/dist/integrations/agent/runners.js +0 -31

package/dist/llm/graph-extract.js CHANGED Viewed

@@ -1,3 +1,6 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
 /**
  * LLM helper for the `akm index` graph-extraction pass (#207).
  *
@@ -18,7 +21,7 @@
  * straight through.
  */
 import { toErrorMessage } from "../core/common";
-import { warn } from "../core/warn";
+import { warn, warnVerbose } from "../core/warn";
 import { chatCompletion, parseEmbeddedJsonResponse } from "./client";
 import { tryLlmFeature } from "./feature-gate";
 import userPromptTemplate from "./prompts/graph-extract-user-prompt.md" with { type: "text" };
@@ -27,8 +30,13 @@ import userPromptTemplate from "./prompts/graph-extract-user-prompt.md" with { t
  * Chosen to be visually clear and unlikely to appear verbatim in asset bodies.
  */
 const BATCH_ASSET_SEPARATOR = "=== ASSET";
-/** Hard cap on body chars sent to the model. */
-const MAX_BODY_CHARS = 4000;
+export const GRAPH_EXTRACT_PROMPT_VERSION = "v2";
+/** Asset bodies longer than this are chunked instead of truncated. */
+const MAX_CHUNK_BODY_CHARS = 1600;
+/** Bodies longer than this are excluded from multi-asset batch prompts. */
+const MAX_BATCH_BODY_CHARS = 1600;
+const MIN_RELATION_CONFIDENCE = 0.5;
+const NON_ARRAY_BATCH_DISABLE_THRESHOLD = 2;
 /** Hard cap on entities returned per asset — guards against runaway LLM output. */
 const MAX_ENTITIES_PER_ASSET = 32;
 /** Hard cap on relations returned per asset. */
@@ -37,6 +45,42 @@ const SYSTEM_PROMPT = "You extract a knowledge graph from developer notes. Retur
 const USER_PROMPT_PREFIX = userPromptTemplate
     .replace("{{MAX_ENTITIES}}", String(MAX_ENTITIES_PER_ASSET))
     .replace("{{MAX_RELATIONS}}", String(MAX_RELATIONS_PER_ASSET));
+/**
+ * Detect whether an error message indicates a context size exceeded condition.
+ * Covers common patterns from OpenAI-compatible APIs (LM Studio, Ollama, etc).
+ */
+function isContextSizeError(message) {
+    const lower = message.toLowerCase();
+    return (lower.includes("context size") ||
+        lower.includes("context length") ||
+        lower.includes("context_window") ||
+        lower.includes("prompt too long") ||
+        (lower.includes("exceeds") && lower.includes("context")));
+}
+const GENERIC_ENTITIES = new Set([
+    "agent",
+    "application",
+    "assistant",
+    "code",
+    "content",
+    "data",
+    "developer",
+    "document",
+    "file",
+    "knowledge",
+    "memory",
+    "note",
+    "notes",
+    "project",
+    "service",
+    "system",
+    "task",
+    "team",
+    "text",
+    "thing",
+    "user",
+]);
+const GENERIC_RELATION_TYPES = new Set(["has", "is", "mentions", "references", "related to"]);
 function parseConfidence(raw) {
     if (typeof raw !== "number" || !Number.isFinite(raw))
         return undefined;
@@ -68,12 +112,205 @@ function normalizeRelationType(raw) {
         return "integrates with";
     return normalized;
 }
+function normalizeEntityKey(raw) {
+    return normalizeEntityName(raw).toLowerCase();
+}
+function bumpTelemetry(telemetry, key, amount = 1) {
+    if (!telemetry)
+        return;
+    telemetry[key] = (telemetry[key] ?? 0) + amount;
+}
+function normalizeBatchState(state) {
+    if (!state)
+        return undefined;
+    state.batchingDisabled = state.batchingDisabled === true;
+    state.nonArrayBatchFailures = Math.max(0, state.nonArrayBatchFailures ?? 0);
+    return state;
+}
+function splitParagraph(text, maxChars) {
+    if (text.length <= maxChars)
+        return { chunks: [text], truncationCount: 0 };
+    const chunks = [];
+    let truncationCount = 0;
+    let remaining = text;
+    while (remaining.length > maxChars) {
+        let splitAt = remaining.lastIndexOf(" ", maxChars);
+        if (splitAt < Math.floor(maxChars * 0.6))
+            splitAt = maxChars;
+        const piece = remaining.slice(0, splitAt).trim();
+        if (piece)
+            chunks.push(piece);
+        remaining = remaining.slice(splitAt).trim();
+        truncationCount += 1;
+    }
+    if (remaining)
+        chunks.push(remaining);
+    return { chunks, truncationCount };
+}
+function splitBodyIntoChunks(body, maxChars = MAX_CHUNK_BODY_CHARS) {
+    const sections = body
+        .split(/\n(?=#{1,6}\s)/)
+        .map((section) => section.trim())
+        .filter(Boolean);
+    if (sections.length === 0)
+        return { chunks: [body.trim()].filter(Boolean), truncationCount: 0 };
+    const chunks = [];
+    let current = "";
+    let truncationCount = 0;
+    const flush = () => {
+        const trimmed = current.trim();
+        if (trimmed)
+            chunks.push(trimmed);
+        current = "";
+    };
+    for (const section of sections) {
+        if (section.length <= maxChars) {
+            const candidate = current ? `${current}\n\n${section}` : section;
+            if (candidate.length <= maxChars)
+                current = candidate;
+            else {
+                flush();
+                current = section;
+            }
+            continue;
+        }
+        const paragraphs = section
+            .split(/\n\s*\n/)
+            .map((part) => part.trim())
+            .filter(Boolean);
+        for (const paragraph of paragraphs) {
+            if (paragraph.length <= maxChars) {
+                const candidate = current ? `${current}\n\n${paragraph}` : paragraph;
+                if (candidate.length <= maxChars)
+                    current = candidate;
+                else {
+                    flush();
+                    current = paragraph;
+                }
+                continue;
+            }
+            flush();
+            const split = splitParagraph(paragraph, maxChars);
+            truncationCount += split.truncationCount;
+            for (const piece of split.chunks) {
+                if (piece.length <= maxChars)
+                    chunks.push(piece);
+            }
+        }
+    }
+    flush();
+    return { chunks, truncationCount };
+}
+/** Consistency weight for blending chunk-agreement with LLM confidence. */
+const CONSISTENCY_WEIGHT = 0.4;
+function mergeGraphExtractions(extractions) {
+    const totalChunks = extractions.length;
+    const entityCanonical = new Map();
+    const entityChunkCounts = new Map();
+    const relationByKey = new Map();
+    const relationChunkCounts = new Map();
+    let confidence;
+    let truncationCount = 0;
+    let filteredGenericEntities = 0;
+    let filteredInvalidRelations = 0;
+    let filteredLowConfidenceRelations = 0;
+    let firstFailureReason;
+    for (const extraction of extractions) {
+        truncationCount += extraction.truncationCount ?? 0;
+        filteredGenericEntities += extraction.filteredGenericEntities ?? 0;
+        filteredInvalidRelations += extraction.filteredInvalidRelations ?? 0;
+        filteredLowConfidenceRelations += extraction.filteredLowConfidenceRelations ?? 0;
+        if (extraction.status === "failed" && !firstFailureReason)
+            firstFailureReason = extraction.reason;
+        const nextConfidence = parseConfidence(extraction.confidence);
+        if (nextConfidence !== undefined)
+            confidence = confidence === undefined ? nextConfidence : Math.max(confidence, nextConfidence);
+        for (const entity of extraction.entities) {
+            const key = normalizeEntityKey(entity);
+            if (!key)
+                continue;
+            if (!entityCanonical.has(key))
+                entityCanonical.set(key, entity);
+            entityChunkCounts.set(key, (entityChunkCounts.get(key) ?? 0) + 1);
+        }
+    }
+    for (const extraction of extractions) {
+        for (const relation of extraction.relations) {
+            const fromKey = normalizeEntityKey(relation.from);
+            const toKey = normalizeEntityKey(relation.to);
+            const type = normalizeRelationType(relation.type ?? "");
+            if (!fromKey || !toKey || !type)
+                continue;
+            const from = entityCanonical.get(fromKey);
+            const to = entityCanonical.get(toKey);
+            if (!from || !to)
+                continue;
+            const key = `${fromKey}\u0000${toKey}\u0000${type}`;
+            if (!relationByKey.has(key)) {
+                relationByKey.set(key, {
+                    from,
+                    to,
+                    type,
+                });
+                relationChunkCounts.set(key, 0);
+            }
+            relationChunkCounts.set(key, (relationChunkCounts.get(key) ?? 0) + 1);
+            const nextConfidence = parseConfidence(relation.confidence);
+            const existing = relationByKey.get(key);
+            if (existing && nextConfidence !== undefined) {
+                const current = parseConfidence(existing.confidence) ?? 0;
+                if (nextConfidence > current)
+                    existing.confidence = nextConfidence;
+            }
+        }
+    }
+    function blendConsistency(llmConfidence, chunkCount) {
+        const consistency = totalChunks > 1 ? chunkCount / totalChunks : 1;
+        if (llmConfidence === undefined)
+            return consistency;
+        return (1 - CONSISTENCY_WEIGHT) * llmConfidence + CONSISTENCY_WEIGHT * consistency;
+    }
+    const entities = [...entityCanonical.values()].slice(0, MAX_ENTITIES_PER_ASSET);
+    const relations = [...relationByKey.values()].slice(0, MAX_RELATIONS_PER_ASSET);
+    for (const relation of relations) {
+        const fromKey = normalizeEntityKey(relation.from);
+        const toKey = normalizeEntityKey(relation.to);
+        const type = normalizeRelationType(relation.type ?? "");
+        if (!fromKey || !toKey || !type)
+            continue;
+        const key = `${fromKey}\u0000${toKey}\u0000${type}`;
+        const chunkCount = relationChunkCounts.get(key) ?? 1;
+        relation.confidence = blendConsistency(relation.confidence, chunkCount);
+    }
+    const status = entities.length > 0 ? "extracted" : firstFailureReason ? "failed" : "empty";
+    const reason = status === "extracted" ? "none" : (firstFailureReason ?? "no_graph_content");
+    const mergedConfidence = confidence !== undefined ? blendConsistency(confidence, totalChunks) : totalChunks > 1 ? 1 : undefined;
+    return {
+        entities,
+        relations,
+        ...(mergedConfidence !== undefined ? { confidence: mergedConfidence } : {}),
+        status,
+        reason,
+        chunkCount: extractions.length,
+        truncationCount,
+        filteredGenericEntities,
+        filteredInvalidRelations,
+        filteredLowConfidenceRelations,
+    };
+}
 function parseGraphExtraction(raw) {
-    const empty = { entities: [], relations: [] };
+    const empty = (reason = "no_graph_content") => ({
+        entities: [],
+        relations: [],
+        status: reason === "llm_error" || reason === "invalid_json" || reason === "context_limit" ? "failed" : "empty",
+        reason,
+    });
     if (typeof raw !== "object" || raw === null || Array.isArray(raw))
-        return empty;
+        return empty();
     const item = raw;
+    const extractionConfidence = parseConfidence(item.confidence);
     const entityCanonical = new Map();
+    let filteredGenericEntities = 0;
     if (Array.isArray(item.entities)) {
         for (const value of item.entities) {
             if (typeof value !== "string")
@@ -81,6 +318,11 @@ function parseGraphExtraction(raw) {
             const normalized = normalizeEntityName(value);
             if (!normalized)
                 continue;
+            const normalizedKey = normalized.toLowerCase();
+            if (!/[a-z0-9]/i.test(normalized) || GENERIC_ENTITIES.has(normalizedKey)) {
+                filteredGenericEntities += 1;
+                continue;
+            }
             const key = normalized.toLowerCase();
             if (!entityCanonical.has(key))
                 entityCanonical.set(key, normalized);
@@ -90,21 +332,37 @@ function parseGraphExtraction(raw) {
     }
     const entities = Array.from(entityCanonical.values());
     const relations = [];
+    let filteredInvalidRelations = 0;
+    let filteredLowConfidenceRelations = 0;
     if (Array.isArray(item.relations)) {
         for (const relation of item.relations) {
-            if (typeof relation !== "object" || relation === null || Array.isArray(relation))
+            if (typeof relation !== "object" || relation === null || Array.isArray(relation)) {
+                filteredInvalidRelations += 1;
                 continue;
+            }
             const rel = relation;
             const fromRaw = typeof rel.from === "string" ? normalizeEntityName(rel.from) : "";
             const toRaw = typeof rel.to === "string" ? normalizeEntityName(rel.to) : "";
-            if (!fromRaw || !toRaw)
+            if (!fromRaw || !toRaw) {
+                filteredInvalidRelations += 1;
                 continue;
+            }
             const from = entityCanonical.get(fromRaw.toLowerCase());
             const to = entityCanonical.get(toRaw.toLowerCase());
-            if (!from || !to)
+            if (!from || !to || from.toLowerCase() === to.toLowerCase()) {
+                filteredInvalidRelations += 1;
                 continue;
+            }
             const type = typeof rel.type === "string" ? normalizeRelationType(rel.type) : undefined;
+            if (type !== undefined && GENERIC_RELATION_TYPES.has(type)) {
+                filteredInvalidRelations += 1;
+                continue;
+            }
             const confidence = parseConfidence(rel.confidence);
+            if (confidence !== undefined && confidence < MIN_RELATION_CONFIDENCE) {
+                filteredLowConfidenceRelations += 1;
+                continue;
+            }
             relations.push({
                 from,
                 to,
@@ -115,10 +373,17 @@ function parseGraphExtraction(raw) {
                 break;
         }
     }
-    const confidence = parseConfidence(item.confidence);
+    const confidence = extractionConfidence;
+    const status = entities.length > 0 ? "extracted" : "empty";
+    const reason = entities.length > 0 ? "none" : filteredGenericEntities > 0 ? "generic_entities_only" : "no_graph_content";
     return {
         entities,
         relations,
+        status,
+        reason,
+        filteredGenericEntities,
+        filteredInvalidRelations,
+        filteredLowConfidenceRelations,
         ...(confidence !== undefined ? { confidence } : {}),
     };
 }
@@ -161,9 +426,7 @@ function buildBatchSystemPrompt() {
 }
 function buildBatchUserPrompt(bodies) {
     const count = bodies.length;
-    const assetBlocks = bodies
-        .map((body, i) => `${BATCH_ASSET_SEPARATOR} ${i + 1} ===\n${body.trim().slice(0, MAX_BODY_CHARS)}`)
-        .join("\n\n");
+    const assetBlocks = bodies.map((body, i) => `${BATCH_ASSET_SEPARATOR} ${i + 1} ===\n${body.trim()}`).join("\n\n");
     return (`Extract entities and relations from the N=${count} assets below.\n\n` +
         `Rules:\n` +
         `- Output ONLY a JSON array of exactly ${count} objects, one per asset, preserving input order.\n` +
@@ -177,6 +440,9 @@ function buildBatchUserPrompt(bodies) {
         `- The array MUST have exactly ${count} elements — one placeholder per asset even if empty.\n\n` +
         assetBlocks);
 }
+function formatContextHint(llmConfig) {
+    return llmConfig.contextLength ? `, configured contextLength=${llmConfig.contextLength}` : "";
+}
 /**
  * Parse and validate a single item from the batch response array.
  * Mirrors the validation logic in `extractGraphFromBody`.
@@ -207,14 +473,15 @@ function parseBatchItem(raw) {
  * @param akmConfig - Full AKM config (for feature-gate checks).
  * @param onFallback - Optional fallback event sink.
  */
-export async function extractGraphFromBodies(llmConfig, bodies, signal, akmConfig, onFallback) {
+export async function extractGraphFromBodies(llmConfig, bodies, signal, akmConfig, onFallback, options = {}) {
     const empty = () => ({ entities: [], relations: [] });
+    const batchState = normalizeBatchState(options.batchState);
     // Degenerate case: no bodies → empty array (not an error).
     if (bodies.length === 0)
         return [];
     // Single body: delegate to the single-asset path for identical behaviour.
     if (bodies.length === 1) {
-        const result = await extractGraphFromBody(llmConfig, bodies[0] ?? "", signal, akmConfig, onFallback);
+        const result = await extractGraphFromBody(llmConfig, bodies[0] ?? "", signal, akmConfig, onFallback, options);
         return [result];
     }
     // Filter out bodies that are empty so we don't waste tokens, but keep
@@ -222,17 +489,37 @@ export async function extractGraphFromBodies(llmConfig, bodies, signal, akmConfi
     const results = bodies.map(empty);
     const nonEmptyIndices = [];
     const nonEmptyBodies = [];
+    const oversizedIndices = [];
     for (let i = 0; i < bodies.length; i++) {
         const trimmed = (bodies[i] ?? "").trim();
         if (trimmed) {
-            nonEmptyIndices.push(i);
-            nonEmptyBodies.push(trimmed);
+            if (trimmed.length > MAX_BATCH_BODY_CHARS) {
+                oversizedIndices.push(i);
+            }
+            else {
+                nonEmptyIndices.push(i);
+                nonEmptyBodies.push(trimmed);
+            }
         }
     }
+    if (oversizedIndices.length > 0) {
+        await Promise.all(oversizedIndices.map(async (index) => {
+            results[index] = await extractGraphFromBody(llmConfig, bodies[index] ?? "", signal, akmConfig, onFallback, options);
+        }));
+    }
     if (nonEmptyBodies.length === 0)
         return results;
+    if (batchState?.batchingDisabled) {
+        return Promise.all(bodies.map((body) => extractGraphFromBody(llmConfig, body, signal, akmConfig, onFallback, options)));
+    }
     const systemPrompt = buildBatchSystemPrompt();
     const userPrompt = buildBatchUserPrompt(nonEmptyBodies);
+    const truncatedBodies = nonEmptyBodies.filter((body) => body.length > MAX_BATCH_BODY_CHARS).length;
+    if (truncatedBodies > 0) {
+        warnVerbose(`graph extraction (batch): ${truncatedBodies}/${nonEmptyBodies.length} asset body/bodies exceed the batch body threshold of ${MAX_BATCH_BODY_CHARS} chars.`);
+    }
+    let batchContextError = false;
+    let nonArrayResponse = false;
     const batchResult = await tryLlmFeature("graph_extraction", akmConfig, async () => {
         try {
             const raw = await chatCompletion(llmConfig, [
@@ -247,13 +534,32 @@ export async function extractGraphFromBodies(llmConfig, bodies, signal, akmConfi
                 return null;
             const parsed = parseEmbeddedJsonResponse(raw);
             if (!Array.isArray(parsed)) {
-                warn("graph extraction (batch): LLM response was not a JSON array; will fall back per-asset.");
+                nonArrayResponse = true;
+                bumpTelemetry(options.telemetry, "nonArrayBatchFailures");
+                if (batchState) {
+                    batchState.nonArrayBatchFailures += 1;
+                    if (batchState.nonArrayBatchFailures >= NON_ARRAY_BATCH_DISABLE_THRESHOLD) {
+                        batchState.batchingDisabled = true;
+                    }
+                }
+                warn(`graph extraction (batch): LLM response was not a JSON array for ${nonEmptyBodies.length} asset(s); ` +
+                    `will fall back per-asset. promptChars=${userPrompt.length}${formatContextHint(llmConfig)}`);
                 return null;
             }
             return parsed;
         }
         catch (err) {
-            warn(`graph extraction (batch) failed: ${toErrorMessage(err)}`);
+            const errMsg = toErrorMessage(err);
+            if (isContextSizeError(errMsg)) {
+                batchContextError = true;
+                bumpTelemetry(options.telemetry, "contextBatchRetries");
+                warn(`graph extraction (batch): context size exceeded for ${nonEmptyBodies.length} asset(s); ` +
+                    `skipping batch. promptChars=${userPrompt.length}${formatContextHint(llmConfig)}`);
+            }
+            else {
+                warn(`graph extraction (batch) failed for ${nonEmptyBodies.length} asset(s); ` +
+                    `promptChars=${userPrompt.length}${formatContextHint(llmConfig)}: ${errMsg}`);
+            }
             return null;
         }
     }, null, {
@@ -262,6 +568,12 @@ export async function extractGraphFromBodies(llmConfig, bodies, signal, akmConfi
     });
     // Map successful batch results back to their original indices.
     if (batchResult !== null) {
+        if (batchState)
+            batchState.nonArrayBatchFailures = 0;
+        if (batchResult.length > nonEmptyBodies.length) {
+            warn(`graph extraction (batch): response had ${batchResult.length} items for ${nonEmptyBodies.length} assets; ` +
+                `ignoring ${batchResult.length - nonEmptyBodies.length} extra item(s).`);
+        }
         for (let j = 0; j < nonEmptyBodies.length; j++) {
             const originalIndex = nonEmptyIndices[j];
             if (originalIndex === undefined)
@@ -272,10 +584,26 @@ export async function extractGraphFromBodies(llmConfig, bodies, signal, akmConfi
             // j >= batchResult.length → partial failure; handled below.
         }
     }
+    if (batchContextError && nonEmptyBodies.length > 1) {
+        const splitAt = Math.ceil(nonEmptyBodies.length / 2);
+        const left = await extractGraphFromBodies(llmConfig, nonEmptyBodies.slice(0, splitAt), signal, akmConfig, onFallback, options);
+        const right = await extractGraphFromBodies(llmConfig, nonEmptyBodies.slice(splitAt), signal, akmConfig, onFallback, options);
+        const combined = [...left, ...right];
+        for (let j = 0; j < nonEmptyIndices.length; j++) {
+            const origIdx = nonEmptyIndices[j];
+            if (origIdx === undefined)
+                continue;
+            results[origIdx] = combined[j] ?? empty();
+        }
+        return results;
+    }
     // Partial-failure fallback: any non-empty body whose result is still the
     // empty placeholder (either because batchResult was null or the array was
-    // shorter than expected) gets an individual retry.
+    // shorter than expected) gets an individual retry — unless the batch failed
+    // due to context size, in which case individual calls would also fail.
     const fallbackIndices = nonEmptyIndices.filter((_origIdx, j) => {
+        if (batchContextError)
+            return false; // skip individual retries on context error
         // Result is still empty → needs a fallback call.
         if (batchResult === null)
             return true;
@@ -291,9 +619,16 @@ export async function extractGraphFromBodies(llmConfig, bodies, signal, akmConfi
         }
         await Promise.all(fallbackIndices.map(async (origIdx) => {
             const body = bodies[origIdx] ?? "";
-            results[origIdx] = await extractGraphFromBody(llmConfig, body, signal, akmConfig, onFallback);
+            results[origIdx] = await extractGraphFromBody(llmConfig, body, signal, akmConfig, onFallback, options);
         }));
     }
+    else if (batchContextError) {
+        warn(`graph extraction (batch): skipped ${nonEmptyBodies.length} asset(s) due to context size error; ` +
+            `consider increasing llm.contextLength or reducing index.graph.graphExtractionBatchSize to 1.`);
+    }
+    else if (nonArrayResponse && batchState?.batchingDisabled) {
+        warn("graph extraction (batch): disabling batching for the rest of this run after repeated non-array responses.");
+    }
     return results;
 }
 /**
@@ -306,12 +641,31 @@ export async function extractGraphFromBodies(llmConfig, bodies, signal, akmConfi
  * Routes through `tryLlmFeature("graph_extraction", ...)` so the feature gate
  * and onFallback hook are honoured uniformly (Fix C5).
  */
-export async function extractGraphFromBody(llmConfig, body, signal, akmConfig, onFallback) {
-    const empty = { entities: [], relations: [] };
+export async function extractGraphFromBody(llmConfig, body, signal, akmConfig, onFallback, options = {}) {
+    const empty = (reason, status) => ({
+        entities: [],
+        relations: [],
+        ...(status ? { status } : {}),
+        ...(reason ? { reason } : {}),
+    });
     const trimmedBody = body.trim();
     if (!trimmedBody)
-        return empty;
-    const userPrompt = `${USER_PROMPT_PREFIX}${trimmedBody.slice(0, MAX_BODY_CHARS)}`;
+        return empty();
+    const chunked = splitBodyIntoChunks(trimmedBody, MAX_CHUNK_BODY_CHARS);
+    if (chunked.truncationCount > 0) {
+        bumpTelemetry(options.telemetry, "truncationCount", chunked.truncationCount);
+        warnVerbose(`graph extraction: split a long asset into ${chunked.chunks.length} chunk(s) with ${chunked.truncationCount} hard split(s).`);
+    }
+    if (chunked.chunks.length > 1) {
+        const chunkResults = [];
+        for (const chunk of chunked.chunks) {
+            chunkResults.push(await extractGraphFromBody(llmConfig, chunk, signal, akmConfig, onFallback, options));
+        }
+        const merged = mergeGraphExtractions(chunkResults);
+        merged.truncationCount = (merged.truncationCount ?? 0) + chunked.truncationCount;
+        return merged;
+    }
+    const userPrompt = `${USER_PROMPT_PREFIX}${trimmedBody}`;
     return tryLlmFeature("graph_extraction", akmConfig, async () => {
         try {
             const raw = await chatCompletion(llmConfig, [
@@ -319,19 +673,36 @@ export async function extractGraphFromBody(llmConfig, body, signal, akmConfig, o
                 { role: "user", content: userPrompt },
             ], { temperature: 0.1, timeoutMs: llmConfig.timeoutMs, signal });
             if (!raw)
-                return empty;
+                return empty();
             const parsed = parseEmbeddedJsonResponse(raw);
             if (!parsed) {
                 warn("graph extraction: invalid JSON response from LLM; skipping asset.");
-                return empty;
+                bumpTelemetry(options.telemetry, "failureCount");
+                return empty("invalid_json", "failed");
             }
-            return parseGraphExtraction(parsed);
+            const extraction = parseGraphExtraction(parsed);
+            bumpTelemetry(options.telemetry, "filteredGenericEntities", extraction.filteredGenericEntities ?? 0);
+            bumpTelemetry(options.telemetry, "filteredInvalidRelations", extraction.filteredInvalidRelations ?? 0);
+            bumpTelemetry(options.telemetry, "filteredLowConfidenceRelations", extraction.filteredLowConfidenceRelations ?? 0);
+            if (extraction.status === "failed")
+                bumpTelemetry(options.telemetry, "failureCount");
+            return extraction;
         }
         catch (err) {
-            warn(`graph extraction failed: ${toErrorMessage(err)}`);
-            return empty;
+            const errMsg = toErrorMessage(err);
+            if (isContextSizeError(errMsg)) {
+                bumpTelemetry(options.telemetry, "failureCount");
+                warn(`graph extraction: context size exceeded for asset; promptChars=${userPrompt.length}${formatContextHint(llmConfig)}. ` +
+                    `Consider increasing llm.contextLength in config.json.`);
+                return empty("context_limit", "failed");
+            }
+            else {
+                bumpTelemetry(options.telemetry, "failureCount");
+                warn(`graph extraction failed for asset; promptChars=${userPrompt.length}${formatContextHint(llmConfig)}: ${errMsg}`);
+                return empty("llm_error", "failed");
+            }
         }
-    }, empty, {
+    }, empty(), {
         timeoutMs: llmConfig.timeoutMs,
         onFallback,
     });

package/dist/llm/index-passes.js CHANGED Viewed

@@ -1,35 +1,50 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+import { getDefaultLlmConfig, getIndexPassConfig } from "../core/config";
 /**
- * Per-pass LLM config resolution for `akm index`.
- *
- * Locked v1 contract (#208):
- * - There is exactly one provider/model configuration: `akm.llm`.
- * - Every LLM-using pass inside `akm index` defaults to that block.
- * - A pass can be opted out individually with `index.<passName>.llm = false`.
- * - Any attempt to supply provider/model fields under `index.<passName>` is
- *   rejected at config-load time by `parseIndexConfig` in
- *   {@link ../core/config.ts} (`ConfigError("INVALID_CONFIG_FILE")`).
- *
- * Passes plug in by calling {@link resolveIndexPassLLM} with their pass
- * name (e.g. `"memory"` for #201's memory-inference pass, `"graph"` for
- * #207's graph-extraction pass). They do not read `config.llm` directly.
- * This keeps the config surface small and the wiring uniform.
- */
-/**
- * Resolve the {@link LlmConnectionConfig} a single index pass should use, or
- * `undefined` when the pass should run without an LLM.
- *
- * Returns `undefined` if any of:
- * - No top-level `akm.llm` block is configured.
- * - The pass is explicitly opted out (`index.<passName>.llm === false`).
- *
- * Otherwise returns the shared `akm.llm` config. There is no per-pass
- * provider override; that decision is locked by §9 of the v1 spec.
+ * Map a pass name (as used by callers — "memory", "graph", etc.) to the
+ * matching key under `profiles.improve.default.processes`. Pass names with
+ * no improve-profile counterpart return undefined and resolve via the
+ * default LLM only.
  */
+function improveProcessKeyForPass(passName) {
+    switch (passName) {
+        case "memory":
+            return "memoryInference";
+        case "graph":
+            return "graphExtraction";
+        default:
+            return undefined;
+    }
+}
 export function resolveIndexPassLLM(passName, config) {
-    if (!config.llm)
-        return undefined;
-    const passConfig = config.index?.[passName];
+    // Gate 1 — explicit opt-out via the index-config block stays authoritative.
+    const passConfig = getIndexPassConfig(config.index, passName);
     if (passConfig?.llm === false)
         return undefined;
-    return config.llm;
+    // Gate 2 — per-process profile from the improve profile, when present.
+    // This is the path that lets
+    //   profiles.improve.default.processes.graphExtraction.profile = "ministral-3b"
+    // actually take effect on the graph pass instead of being silently
+    // ignored.
+    const processKey = improveProcessKeyForPass(passName);
+    if (processKey) {
+        const processConfig = config.profiles?.improve?.default?.processes?.[processKey];
+        // Honor enabled === false here too — an explicit disable wins.
+        if (processConfig?.enabled === false)
+            return undefined;
+        const profileName = processConfig?.profile;
+        if (profileName) {
+            const profile = config.profiles?.llm?.[profileName];
+            if (profile)
+                return profile;
+            // A named-but-missing profile is a configuration error in spirit, but
+            // we fall through to default rather than throwing — callers gracefully
+            // treat `undefined` as "pass disabled" and emitting a hard throw here
+            // would take the whole improve run down on a typo.
+        }
+    }
+    // Gate 3 — fall back to the default LLM profile.
+    return getDefaultLlmConfig(config);
 }