agentfootprint 6.44.0 → 6.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ai-instructions/setup.sh +0 -0
- package/dist/core/agent/buildAgentChart.js +2 -1
- package/dist/core/agent/buildAgentChart.js.map +1 -1
- package/dist/core/agent/buildDynamicAgentChart.js +3 -1
- package/dist/core/agent/buildDynamicAgentChart.js.map +1 -1
- package/dist/core/agent/stages/pickEntry.js +3 -1
- package/dist/core/agent/stages/pickEntry.js.map +1 -1
- package/dist/esm/adapters/identity/agentcore.d.ts +100 -0
- package/dist/esm/adapters/llm/AnthropicProvider.d.ts +130 -0
- package/dist/esm/adapters/llm/BedrockProvider.d.ts +100 -0
- package/dist/esm/adapters/llm/BrowserAnthropicProvider.d.ts +41 -0
- package/dist/esm/adapters/llm/BrowserOpenAIProvider.d.ts +95 -0
- package/dist/esm/adapters/llm/MockProvider.d.ts +153 -0
- package/dist/esm/adapters/llm/OpenAIProvider.d.ts +220 -0
- package/dist/esm/adapters/llm/createProvider.d.ts +85 -0
- package/dist/esm/adapters/memory/agentcore.d.ts +146 -0
- package/dist/esm/adapters/memory/bedrockAgentMemory.d.ts +95 -0
- package/dist/esm/adapters/memory/redis.d.ts +127 -0
- package/dist/esm/adapters/observability/agentcore.d.ts +67 -0
- package/dist/esm/adapters/observability/audit.d.ts +254 -0
- package/dist/esm/adapters/observability/cloudwatch.d.ts +96 -0
- package/dist/esm/adapters/observability/otel.d.ts +237 -0
- package/dist/esm/adapters/observability/xray.d.ts +88 -0
- package/dist/esm/adapters/types.d.ts +378 -0
- package/dist/esm/bridge/eventMeta.d.ts +59 -0
- package/dist/esm/cache/CacheDecisionSubflow.d.ts +85 -0
- package/dist/esm/cache/CacheGateDecider.d.ts +127 -0
- package/dist/esm/cache/applyCachePolicy.d.ts +37 -0
- package/dist/esm/cache/cacheRecorder.d.ts +85 -0
- package/dist/esm/cache/index.d.ts +33 -0
- package/dist/esm/cache/strategies/AnthropicCacheStrategy.d.ts +38 -0
- package/dist/esm/cache/strategies/BedrockCacheStrategy.d.ts +33 -0
- package/dist/esm/cache/strategies/NoOpCacheStrategy.d.ts +29 -0
- package/dist/esm/cache/strategies/OpenAICacheStrategy.d.ts +36 -0
- package/dist/esm/cache/strategyRegistry.d.ts +45 -0
- package/dist/esm/cache/types.d.ts +243 -0
- package/dist/esm/conventions.d.ts +203 -0
- package/dist/esm/core/Agent.d.ts +355 -0
- package/dist/esm/core/LLMCall.d.ts +139 -0
- package/dist/esm/core/RunnerBase.d.ts +267 -0
- package/dist/esm/core/agent/AgentBuilder.d.ts +565 -0
- package/dist/esm/core/agent/buildAgentChart.d.ts +109 -0
- package/dist/esm/core/agent/buildAgentChart.js +2 -1
- package/dist/esm/core/agent/buildAgentChart.js.map +1 -1
- package/dist/esm/core/agent/buildAgentMessageApiChart.d.ts +40 -0
- package/dist/esm/core/agent/buildCacheSubflow.d.ts +35 -0
- package/dist/esm/core/agent/buildDynamicAgentChart.d.ts +56 -0
- package/dist/esm/core/agent/buildDynamicAgentChart.js +3 -1
- package/dist/esm/core/agent/buildDynamicAgentChart.js.map +1 -1
- package/dist/esm/core/agent/buildMessageApiChart.d.ts +47 -0
- package/dist/esm/core/agent/buildToolRegistry.d.ts +61 -0
- package/dist/esm/core/agent/memoryRecallInjections.d.ts +8 -0
- package/dist/esm/core/agent/stages/breakFinal.d.ts +22 -0
- package/dist/esm/core/agent/stages/callLLM.d.ts +74 -0
- package/dist/esm/core/agent/stages/pickEntry.d.ts +19 -0
- package/dist/esm/core/agent/stages/pickEntry.js +3 -1
- package/dist/esm/core/agent/stages/pickEntry.js.map +1 -1
- package/dist/esm/core/agent/stages/prepareFinal.d.ts +19 -0
- package/dist/esm/core/agent/stages/reliabilityExecution.d.ts +135 -0
- package/dist/esm/core/agent/stages/route.d.ts +18 -0
- package/dist/esm/core/agent/stages/seed.d.ts +53 -0
- package/dist/esm/core/agent/stages/toolCalls.d.ts +76 -0
- package/dist/esm/core/agent/toolArgsValidation.d.ts +62 -0
- package/dist/esm/core/agent/types.d.ts +382 -0
- package/dist/esm/core/agent/validators.d.ts +52 -0
- package/dist/esm/core/cost.d.ts +38 -0
- package/dist/esm/core/flowchartAsTool.d.ts +200 -0
- package/dist/esm/core/humanizeLLMError.d.ts +23 -0
- package/dist/esm/core/outputFallback.d.ts +139 -0
- package/dist/esm/core/outputSchema.d.ts +127 -0
- package/dist/esm/core/pause.d.ts +74 -0
- package/dist/esm/core/runCheckpoint.d.ts +179 -0
- package/dist/esm/core/runner.d.ts +203 -0
- package/dist/esm/core/slots/buildMessagesSlot.d.ts +40 -0
- package/dist/esm/core/slots/buildSystemPromptSlot.d.ts +41 -0
- package/dist/esm/core/slots/buildThinkingSubflow.d.ts +40 -0
- package/dist/esm/core/slots/buildToolsSlot.d.ts +60 -0
- package/dist/esm/core/slots/helpers.d.ts +27 -0
- package/dist/esm/core/toolContract.d.ts +43 -0
- package/dist/esm/core/tools.d.ts +90 -0
- package/dist/esm/core/translator.d.ts +94 -0
- package/dist/esm/core-flow/Conditional.d.ts +119 -0
- package/dist/esm/core-flow/Loop.d.ts +160 -0
- package/dist/esm/core-flow/Parallel.d.ts +360 -0
- package/dist/esm/core-flow/Sequence.d.ts +133 -0
- package/dist/esm/events/dispatcher.d.ts +134 -0
- package/dist/esm/events/payloads.d.ts +761 -0
- package/dist/esm/events/registry.d.ts +198 -0
- package/dist/esm/events/types.d.ts +70 -0
- package/dist/esm/identity/kinds.d.ts +36 -0
- package/dist/esm/identity/staticTokens.d.ts +28 -0
- package/dist/esm/identity/types.d.ts +113 -0
- package/dist/esm/identity/withCredentialRetry.d.ts +64 -0
- package/dist/esm/identity.d.ts +31 -0
- package/dist/esm/index.d.ts +64 -0
- package/dist/esm/index.js +2 -0
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/injection-engine.d.ts +4 -0
- package/dist/esm/lib/canonicalJson.d.ts +56 -0
- package/dist/esm/lib/context-bisect/ablation.d.ts +109 -0
- package/dist/esm/lib/context-bisect/bisect.d.ts +75 -0
- package/dist/esm/lib/context-bisect/cost.d.ts +37 -0
- package/dist/esm/lib/context-bisect/index.d.ts +31 -0
- package/dist/esm/lib/context-bisect/index.js +3 -0
- package/dist/esm/lib/context-bisect/index.js.map +1 -1
- package/dist/esm/lib/context-bisect/llmEdgeWeigher.d.ts +124 -0
- package/dist/esm/lib/context-bisect/localize.d.ts +152 -0
- package/dist/esm/lib/context-bisect/localize.js +2 -0
- package/dist/esm/lib/context-bisect/localize.js.map +1 -1
- package/dist/esm/lib/context-bisect/loop-recall.d.ts +97 -0
- package/dist/esm/lib/context-bisect/missingContext.d.ts +71 -0
- package/dist/esm/lib/context-bisect/restoration.d.ts +39 -0
- package/dist/esm/lib/context-bisect/toBacktrackTrace.d.ts +138 -0
- package/dist/esm/lib/context-bisect/trajectory.d.ts +168 -0
- package/dist/esm/lib/context-bisect/types.d.ts +448 -0
- package/dist/esm/lib/context-bisect/walk-to-root.d.ts +103 -0
- package/dist/esm/lib/influence-core/attributability.d.ts +72 -0
- package/dist/esm/lib/influence-core/cache.d.ts +94 -0
- package/dist/esm/lib/influence-core/contrastive.d.ts +26 -0
- package/dist/esm/lib/influence-core/index.d.ts +34 -0
- package/dist/esm/lib/influence-core/margin.d.ts +33 -0
- package/dist/esm/lib/influence-core/signals.d.ts +128 -0
- package/dist/esm/lib/influence-core/similarity.d.ts +25 -0
- package/dist/esm/lib/influence-core/types.d.ts +252 -0
- package/dist/esm/lib/injection-engine/SkillRegistry.d.ts +147 -0
- package/dist/esm/lib/injection-engine/buildInjectionEngineSubflow.d.ts +110 -0
- package/dist/esm/lib/injection-engine/buildInjectionEngineSubflow.js +1 -0
- package/dist/esm/lib/injection-engine/buildInjectionEngineSubflow.js.map +1 -1
- package/dist/esm/lib/injection-engine/entryScorer.d.ts +93 -0
- package/dist/esm/lib/injection-engine/entryScorer.js +172 -0
- package/dist/esm/lib/injection-engine/entryScorer.js.map +1 -0
- package/dist/esm/lib/injection-engine/evaluator.d.ts +24 -0
- package/dist/esm/lib/injection-engine/factories/defineFact.d.ts +60 -0
- package/dist/esm/lib/injection-engine/factories/defineInjection.d.ts +41 -0
- package/dist/esm/lib/injection-engine/factories/defineInstruction.d.ts +78 -0
- package/dist/esm/lib/injection-engine/factories/defineRelevanceHint.d.ts +26 -0
- package/dist/esm/lib/injection-engine/factories/defineSkill.d.ts +160 -0
- package/dist/esm/lib/injection-engine/factories/defineSteering.d.ts +39 -0
- package/dist/esm/lib/injection-engine/index.d.ts +22 -0
- package/dist/esm/lib/injection-engine/index.js +1 -0
- package/dist/esm/lib/injection-engine/index.js.map +1 -1
- package/dist/esm/lib/injection-engine/skillContract.d.ts +29 -0
- package/dist/esm/lib/injection-engine/skillGraph.d.ts +289 -0
- package/dist/esm/lib/injection-engine/skillGraph.js +35 -41
- package/dist/esm/lib/injection-engine/skillGraph.js.map +1 -1
- package/dist/esm/lib/injection-engine/skillGraphCheckup.d.ts +49 -0
- package/dist/esm/lib/injection-engine/skillTools.d.ts +72 -0
- package/dist/esm/lib/injection-engine/softmax.d.ts +11 -0
- package/dist/esm/lib/injection-engine/types.d.ts +229 -0
- package/dist/esm/lib/injection-engine/types.js.map +1 -1
- package/dist/esm/lib/lazyRequire.d.ts +29 -0
- package/dist/esm/lib/mcp/index.d.ts +9 -0
- package/dist/esm/lib/mcp/mcpClient.d.ts +46 -0
- package/dist/esm/lib/mcp/mockMcpClient.d.ts +65 -0
- package/dist/esm/lib/mcp/types.d.ts +133 -0
- package/dist/esm/lib/rag/defineRAG.d.ts +139 -0
- package/dist/esm/lib/rag/index.d.ts +7 -0
- package/dist/esm/lib/rag/indexDocuments.d.ts +105 -0
- package/dist/esm/lib/tool-lint/analyze.d.ts +83 -0
- package/dist/esm/lib/tool-lint/cli.d.ts +43 -0
- package/dist/esm/lib/tool-lint/format.d.ts +18 -0
- package/dist/esm/lib/tool-lint/index.d.ts +23 -0
- package/dist/esm/lib/tool-lint/rules.d.ts +85 -0
- package/dist/esm/lib/tool-lint/types.d.ts +155 -0
- package/dist/esm/lib/trace-toolpack/bounded.d.ts +47 -0
- package/dist/esm/lib/trace-toolpack/debugPrompt.d.ts +19 -0
- package/dist/esm/lib/trace-toolpack/index.d.ts +20 -0
- package/dist/esm/lib/trace-toolpack/lazyToolpack.d.ts +35 -0
- package/dist/esm/lib/trace-toolpack/selfExplain.d.ts +100 -0
- package/dist/esm/lib/trace-toolpack/traceDebugAgent.d.ts +42 -0
- package/dist/esm/lib/trace-toolpack/traceToolpack.d.ts +69 -0
- package/dist/esm/lib/trace-toolpack/types.d.ts +59 -0
- package/dist/esm/llm-providers.d.ts +26 -0
- package/dist/esm/locales/index.d.ts +132 -0
- package/dist/esm/memory/beats/extractBeats.d.ts +61 -0
- package/dist/esm/memory/beats/extractor.d.ts +47 -0
- package/dist/esm/memory/beats/formatAsNarrative.d.ts +62 -0
- package/dist/esm/memory/beats/heuristicExtractor.d.ts +37 -0
- package/dist/esm/memory/beats/index.d.ts +12 -0
- package/dist/esm/memory/beats/llmExtractor.d.ts +56 -0
- package/dist/esm/memory/beats/types.d.ts +60 -0
- package/dist/esm/memory/beats/writeBeats.d.ts +22 -0
- package/dist/esm/memory/causal/evidenceRecorder.d.ts +66 -0
- package/dist/esm/memory/causal/index.d.ts +6 -0
- package/dist/esm/memory/causal/loadSnapshot.d.ts +51 -0
- package/dist/esm/memory/causal/snapshotPipeline.d.ts +35 -0
- package/dist/esm/memory/causal/types.d.ts +130 -0
- package/dist/esm/memory/causal/writeSnapshot.d.ts +73 -0
- package/dist/esm/memory/define.d.ts +63 -0
- package/dist/esm/memory/define.types.d.ts +276 -0
- package/dist/esm/memory/define.types.js +1 -1
- package/dist/esm/memory/embedding/cosine.d.ts +18 -0
- package/dist/esm/memory/embedding/embedMessages.d.ts +58 -0
- package/dist/esm/memory/embedding/index.d.ts +8 -0
- package/dist/esm/memory/embedding/loadRelevant.d.ts +51 -0
- package/dist/esm/memory/embedding/mockEmbedder.d.ts +22 -0
- package/dist/esm/memory/embedding/types.d.ts +46 -0
- package/dist/esm/memory/entry/decay.d.ts +36 -0
- package/dist/esm/memory/entry/index.d.ts +2 -0
- package/dist/esm/memory/entry/types.d.ts +140 -0
- package/dist/esm/memory/facts/extractFacts.d.ts +50 -0
- package/dist/esm/memory/facts/extractor.d.ts +33 -0
- package/dist/esm/memory/facts/formatFacts.d.ts +60 -0
- package/dist/esm/memory/facts/index.d.ts +14 -0
- package/dist/esm/memory/facts/llmFactExtractor.d.ts +64 -0
- package/dist/esm/memory/facts/loadFacts.d.ts +43 -0
- package/dist/esm/memory/facts/patternFactExtractor.d.ts +2 -0
- package/dist/esm/memory/facts/types.d.ts +68 -0
- package/dist/esm/memory/facts/writeFacts.d.ts +19 -0
- package/dist/esm/memory/identity/index.d.ts +2 -0
- package/dist/esm/memory/identity/types.d.ts +49 -0
- package/dist/esm/memory/index.d.ts +19 -0
- package/dist/esm/memory/pipeline/auto.d.ts +59 -0
- package/dist/esm/memory/pipeline/default.d.ts +49 -0
- package/dist/esm/memory/pipeline/ephemeral.d.ts +28 -0
- package/dist/esm/memory/pipeline/fact.d.ts +26 -0
- package/dist/esm/memory/pipeline/index.d.ts +13 -0
- package/dist/esm/memory/pipeline/narrative.d.ts +35 -0
- package/dist/esm/memory/pipeline/semantic.d.ts +37 -0
- package/dist/esm/memory/pipeline/types.d.ts +31 -0
- package/dist/esm/memory/stages/formatDefault.d.ts +64 -0
- package/dist/esm/memory/stages/index.d.ts +13 -0
- package/dist/esm/memory/stages/loadRecent.d.ts +49 -0
- package/dist/esm/memory/stages/pickByBudget.d.ts +63 -0
- package/dist/esm/memory/stages/summarize.d.ts +87 -0
- package/dist/esm/memory/stages/tokenize.d.ts +43 -0
- package/dist/esm/memory/stages/types.d.ts +75 -0
- package/dist/esm/memory/stages/writeMessages.d.ts +71 -0
- package/dist/esm/memory/store/InMemoryStore.d.ts +62 -0
- package/dist/esm/memory/store/index.d.ts +2 -0
- package/dist/esm/memory/store/types.d.ts +223 -0
- package/dist/esm/memory/wire/index.d.ts +2 -0
- package/dist/esm/memory/wire/mountMemoryPipeline.d.ts +108 -0
- package/dist/esm/memory-providers.d.ts +37 -0
- package/dist/esm/observability/contextError/finders/compareFinders.d.ts +19 -0
- package/dist/esm/observability/contextError/finders/index.d.ts +22 -0
- package/dist/esm/observability/contextError/finders/rankSuspects.d.ts +2 -0
- package/dist/esm/observability/contextError/finders/removeAndRetry.d.ts +11 -0
- package/dist/esm/observability/contextError/finders/shrinkToCause.d.ts +11 -0
- package/dist/esm/observability/contextError/finders/testManyCombos.d.ts +11 -0
- package/dist/esm/observability/contextError/finders/traceSteps.d.ts +2 -0
- package/dist/esm/observability/contextError/finders/types.d.ts +88 -0
- package/dist/esm/observability-providers.d.ts +46 -0
- package/dist/esm/observe.d.ts +62 -0
- package/dist/esm/patterns/Debate.d.ts +39 -0
- package/dist/esm/patterns/MapReduce.d.ts +66 -0
- package/dist/esm/patterns/Reflection.d.ts +51 -0
- package/dist/esm/patterns/SelfConsistency.d.ts +43 -0
- package/dist/esm/patterns/Swarm.d.ts +60 -0
- package/dist/esm/patterns/ToT.d.ts +53 -0
- package/dist/esm/patterns/index.d.ts +22 -0
- package/dist/esm/providers.d.ts +33 -0
- package/dist/esm/recorders/core/AgentRecorder.d.ts +15 -0
- package/dist/esm/recorders/core/CompositionRecorder.d.ts +17 -0
- package/dist/esm/recorders/core/ContextEvaluatedRecorder.d.ts +23 -0
- package/dist/esm/recorders/core/ContextRecorder.d.ts +47 -0
- package/dist/esm/recorders/core/CostRecorder.d.ts +14 -0
- package/dist/esm/recorders/core/EmitBridge.d.ts +30 -0
- package/dist/esm/recorders/core/ErrorBridge.d.ts +38 -0
- package/dist/esm/recorders/core/EvalRecorder.d.ts +16 -0
- package/dist/esm/recorders/core/MemoryRecorder.d.ts +17 -0
- package/dist/esm/recorders/core/PermissionRecorder.d.ts +16 -0
- package/dist/esm/recorders/core/ReliabilityRecorder.d.ts +24 -0
- package/dist/esm/recorders/core/SkillRecorder.d.ts +14 -0
- package/dist/esm/recorders/core/StreamRecorder.d.ts +15 -0
- package/dist/esm/recorders/core/ToolsRecorder.d.ts +18 -0
- package/dist/esm/recorders/core/ValidationRecorder.d.ts +16 -0
- package/dist/esm/recorders/core/contextEngineering.d.ts +136 -0
- package/dist/esm/recorders/core/typedEmit.d.ts +34 -0
- package/dist/esm/recorders/core/types.d.ts +97 -0
- package/dist/esm/recorders/observability/AgentThinkingTraceRecorder.d.ts +117 -0
- package/dist/esm/recorders/observability/BoundaryRecorder.d.ts +546 -0
- package/dist/esm/recorders/observability/FlowchartRecorder.d.ts +220 -0
- package/dist/esm/recorders/observability/LiveStateRecorder.d.ts +250 -0
- package/dist/esm/recorders/observability/LoggingRecorder.d.ts +91 -0
- package/dist/esm/recorders/observability/RouteRecorder.d.ts +83 -0
- package/dist/esm/recorders/observability/RunStepRecorder.d.ts +231 -0
- package/dist/esm/recorders/observability/StatusRecorder.d.ts +36 -0
- package/dist/esm/recorders/observability/ToolChoiceRecorder.d.ts +164 -0
- package/dist/esm/recorders/observability/ToolLineageRecorder.d.ts +71 -0
- package/dist/esm/recorders/observability/commentary/commentaryTemplates.d.ts +105 -0
- package/dist/esm/recorders/observability/internal/ActorArrowClassifier.d.ts +25 -0
- package/dist/esm/recorders/observability/internal/CandidateAnswerBuffer.d.ts +28 -0
- package/dist/esm/recorders/observability/internal/ForkTracker.d.ts +60 -0
- package/dist/esm/recorders/observability/internal/RootInferrer.d.ts +51 -0
- package/dist/esm/recorders/observability/internal/SequenceSiblingTracker.d.ts +24 -0
- package/dist/esm/recorders/observability/localObservability.d.ts +48 -0
- package/dist/esm/recorders/observability/observeRunId.d.ts +36 -0
- package/dist/esm/recorders/observability/status/statusTemplates.d.ts +106 -0
- package/dist/esm/recorders/observability/trace.d.ts +119 -0
- package/dist/esm/reliability/CircuitBreaker.d.ts +75 -0
- package/dist/esm/reliability/buildReliabilityGateChart.d.ts +53 -0
- package/dist/esm/reliability/classifyError.d.ts +28 -0
- package/dist/esm/reliability/index.d.ts +35 -0
- package/dist/esm/reliability/types.d.ts +327 -0
- package/dist/esm/resilience/fallbackProvider.d.ts +33 -0
- package/dist/esm/resilience/index.d.ts +21 -0
- package/dist/esm/resilience/withCircuitBreaker.d.ts +129 -0
- package/dist/esm/resilience/withFallback.d.ts +45 -0
- package/dist/esm/resilience/withRetry.d.ts +71 -0
- package/dist/esm/security/PermissionPolicy.d.ts +124 -0
- package/dist/esm/security/PolicyHaltError.d.ts +72 -0
- package/dist/esm/security/extractSequence.d.ts +46 -0
- package/dist/esm/security/index.d.ts +44 -0
- package/dist/esm/security/thinkingRedaction.d.ts +50 -0
- package/dist/esm/status.d.ts +48 -0
- package/dist/esm/strategies/attach.d.ts +47 -0
- package/dist/esm/strategies/compose.d.ts +48 -0
- package/dist/esm/strategies/defaults/chatBubbleLiveStatus.d.ts +36 -0
- package/dist/esm/strategies/defaults/consoleObservability.d.ts +42 -0
- package/dist/esm/strategies/defaults/inMemorySinkCost.d.ts +50 -0
- package/dist/esm/strategies/defaults/index.d.ts +30 -0
- package/dist/esm/strategies/defaults/noopLens.d.ts +28 -0
- package/dist/esm/strategies/index.d.ts +36 -0
- package/dist/esm/strategies/registry.d.ts +70 -0
- package/dist/esm/strategies/types.d.ts +303 -0
- package/dist/esm/stream.d.ts +82 -0
- package/dist/esm/thinking/AnthropicThinkingHandler.d.ts +42 -0
- package/dist/esm/thinking/MockThinkingHandler.d.ts +50 -0
- package/dist/esm/thinking/OpenAIThinkingHandler.d.ts +37 -0
- package/dist/esm/thinking/index.d.ts +51 -0
- package/dist/esm/thinking/registry.d.ts +33 -0
- package/dist/esm/thinking/types.d.ts +162 -0
- package/dist/esm/tool-providers/gatedTools.d.ts +36 -0
- package/dist/esm/tool-providers/index.d.ts +41 -0
- package/dist/esm/tool-providers/skillScopedTools.d.ts +45 -0
- package/dist/esm/tool-providers/staticTools.d.ts +21 -0
- package/dist/esm/tool-providers/types.d.ts +138 -0
- package/dist/index.js +6 -2
- package/dist/index.js.map +1 -1
- package/dist/lib/context-bisect/index.js +3 -0
- package/dist/lib/context-bisect/index.js.map +1 -1
- package/dist/lib/context-bisect/localize.js +2 -0
- package/dist/lib/context-bisect/localize.js.map +1 -1
- package/dist/lib/injection-engine/buildInjectionEngineSubflow.js +1 -0
- package/dist/lib/injection-engine/buildInjectionEngineSubflow.js.map +1 -1
- package/dist/lib/injection-engine/entryScorer.js +178 -0
- package/dist/lib/injection-engine/entryScorer.js.map +1 -0
- package/dist/lib/injection-engine/index.js +5 -1
- package/dist/lib/injection-engine/index.js.map +1 -1
- package/dist/lib/injection-engine/skillGraph.js +35 -41
- package/dist/lib/injection-engine/skillGraph.js.map +1 -1
- package/dist/lib/injection-engine/types.js.map +1 -1
- package/dist/memory/define.types.js +1 -1
- package/dist/types/core/agent/buildAgentChart.d.ts.map +1 -1
- package/dist/types/core/agent/buildDynamicAgentChart.d.ts.map +1 -1
- package/dist/types/core/agent/stages/pickEntry.d.ts.map +1 -1
- package/dist/types/core/agent/types.d.ts +8 -4
- package/dist/types/core/agent/types.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/lib/context-bisect/index.d.ts +3 -0
- package/dist/types/lib/context-bisect/index.d.ts.map +1 -1
- package/dist/types/lib/context-bisect/localize.d.ts +2 -0
- package/dist/types/lib/context-bisect/localize.d.ts.map +1 -1
- package/dist/types/lib/injection-engine/buildInjectionEngineSubflow.d.ts.map +1 -1
- package/dist/types/lib/injection-engine/entryScorer.d.ts +94 -0
- package/dist/types/lib/injection-engine/entryScorer.d.ts.map +1 -0
- package/dist/types/lib/injection-engine/index.d.ts +1 -0
- package/dist/types/lib/injection-engine/index.d.ts.map +1 -1
- package/dist/types/lib/injection-engine/skillGraph.d.ts +21 -24
- package/dist/types/lib/injection-engine/skillGraph.d.ts.map +1 -1
- package/dist/types/lib/injection-engine/types.d.ts +9 -4
- package/dist/types/lib/injection-engine/types.d.ts.map +1 -1
- package/dist/types/memory/define.types.d.ts +1 -1
- package/package.json +203 -56
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* context-bisect types — RFC-003 Part B: the contextual-bug localizer
|
|
3
|
+
* ("git bisect for context").
|
|
4
|
+
*
|
|
5
|
+
* Pattern: assembly contract. Part B is pure ASSEMBLY over shipped pieces:
|
|
6
|
+
* footprintjs 9.8.0's complete causal DAG (control edges, honesty
|
|
7
|
+
* markers, `EdgeWeigher` hook) × influence-core scoring (D6) ×
|
|
8
|
+
* consumer-run counterfactual ablation. No new engine features,
|
|
9
|
+
* no new typed events.
|
|
10
|
+
* Role: `src/lib/context-bisect/` leaf. Exported via
|
|
11
|
+
* `agentfootprint/observe`.
|
|
12
|
+
*
|
|
13
|
+
* ## The two-tier honest-claims discipline (RFC-003 §B2)
|
|
14
|
+
*
|
|
15
|
+
* Every number in these types belongs to exactly ONE of two tiers, and the
|
|
16
|
+
* docs say which:
|
|
17
|
+
*
|
|
18
|
+
* - **CORRELATIONAL** — edge weights, suspect scores, rankings. These are
|
|
19
|
+
* deterministic embedding-geometry PROXIES (influence-core composite:
|
|
20
|
+
* semantic alignment between what a source wrote and what the LLM step
|
|
21
|
+
* produced). They mean "high semantic alignment", never "the model
|
|
22
|
+
* answered BECAUSE of this". A report without reruns stops here and is
|
|
23
|
+
* marked `mode: 'correlational'`.
|
|
24
|
+
*
|
|
25
|
+
* - **CAUSAL** — ablation verdicts ONLY. A suspect earns `verdict:
|
|
26
|
+
* 'confirmed'` exclusively by counterfactual evidence: the consumer's
|
|
27
|
+
* `AblationRunner` re-ran the scenario WITHOUT the suspect N seeded
|
|
28
|
+
* times and the outcome flipped (with baseline stability checked and
|
|
29
|
+
* variance reported — never a single-run verdict).
|
|
30
|
+
*
|
|
31
|
+
* Slice completeness is bounded by tracking — and SAYS so: untracked reads
|
|
32
|
+
* (`$getArgs()` / env / silent reads), missing control-dependence lookups,
|
|
33
|
+
* missing read tracking, and depth/node truncation all surface as
|
|
34
|
+
* `honestyFlags` on the report, mirrored from footprintjs's own A2/A4
|
|
35
|
+
* markers.
|
|
36
|
+
*/
|
|
37
|
+
import type { CommitBundle, RuntimeSnapshot, StageSnapshot } from 'footprintjs/advanced';
|
|
38
|
+
import type { ControlDepLookup } from 'footprintjs/trace';
|
|
39
|
+
import type { Embedder, InfluenceWeights } from '../influence-core/index.js';
|
|
40
|
+
/**
|
|
41
|
+
* Minimal structural envelope for captured typed events — satisfied by
|
|
42
|
+
* `AgentfootprintEvent` (collect with `agent.on('*', e => events.push(e))`).
|
|
43
|
+
* Structural so a consumer can hand in any array shaped like this.
|
|
44
|
+
*/
|
|
45
|
+
export interface CapturedEventLike {
|
|
46
|
+
readonly type: string;
|
|
47
|
+
readonly payload: unknown;
|
|
48
|
+
readonly meta: {
|
|
49
|
+
readonly runtimeStageId: string;
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Minimal per-step quality lookup for trigger derivation — satisfied by
|
|
54
|
+
* footprintjs's `QualityRecorder` (structural, decoupled).
|
|
55
|
+
*/
|
|
56
|
+
export interface QualityTriggerLookup {
|
|
57
|
+
getLowest(): {
|
|
58
|
+
runtimeStageId: string;
|
|
59
|
+
entry: {
|
|
60
|
+
score: number;
|
|
61
|
+
stageName: string;
|
|
62
|
+
};
|
|
63
|
+
} | undefined;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* The frozen evidence of one completed run — a structural SUPERSET of the
|
|
67
|
+
* trace-toolpack's `TraceToolpackArtifacts`, so the same bag literal can
|
|
68
|
+
* serve `traceToolpack(...)` and `localizeContextBug(...)`: every
|
|
69
|
+
* runtimeStageId in the report drills straight into the toolpack tools.
|
|
70
|
+
*
|
|
71
|
+
* - `snapshot` — `executor.getSnapshot()` / `agent.getLastSnapshot()`.
|
|
72
|
+
* - `controlDeps` — OPTIONAL `controlDepRecorder().asLookup()` from the
|
|
73
|
+
* run. With it, the slice includes `[control: <rule label>]` edges to
|
|
74
|
+
* the deciders that routed execution. Without it, the report carries the
|
|
75
|
+
* `no-control-deps` honesty flag.
|
|
76
|
+
* - `quality` — OPTIONAL `QualityRecorder` from the run; its
|
|
77
|
+
* lowest-scoring step is the default trigger when `atStep` is absent.
|
|
78
|
+
* - `events` — OPTIONAL captured typed events; used to extract LLM-call
|
|
79
|
+
* step ids (`stream.llm_start`) when `llmCallIds` is not given.
|
|
80
|
+
* - `llmCallIds` — explicit override: the runtimeStageIds of LLM-call
|
|
81
|
+
* executions (the steps whose parent edges D7 weighs).
|
|
82
|
+
*/
|
|
83
|
+
export interface ContextBugArtifacts {
|
|
84
|
+
readonly snapshot: RuntimeSnapshot;
|
|
85
|
+
readonly controlDeps?: ControlDepLookup;
|
|
86
|
+
readonly quality?: QualityTriggerLookup;
|
|
87
|
+
readonly events?: readonly CapturedEventLike[];
|
|
88
|
+
readonly llmCallIds?: readonly string[];
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* What kind of context source a suspect is — picks which ablation adapter
|
|
92
|
+
* applies. `'stage'` is the honest fallback for slice nodes the classifier
|
|
93
|
+
* cannot map to an ablatable source (pipeline plumbing, plain stages).
|
|
94
|
+
*/
|
|
95
|
+
export type SuspectKind = 'tool' | 'injection' | 'memory' | 'arg' | 'stage';
|
|
96
|
+
/** Kind-specific identity + the text the semantic refinement embedded. */
|
|
97
|
+
export interface SuspectDetail {
|
|
98
|
+
/** Tool name (kind 'tool'). */
|
|
99
|
+
readonly toolName?: string;
|
|
100
|
+
/** Injection id (kind 'injection' / 'memory') — `Injection.id`. */
|
|
101
|
+
readonly injectionId?: string;
|
|
102
|
+
/** Injection flavor (fact / skill / rag / memory / …), when known. */
|
|
103
|
+
readonly flavor?: string;
|
|
104
|
+
/**
|
|
105
|
+
* The suspect's own content text (tool result / injection rawContent),
|
|
106
|
+
* already redaction-scrubbed by footprintjs at commit time. This is
|
|
107
|
+
* what the embedder saw for `semanticScore`.
|
|
108
|
+
*/
|
|
109
|
+
readonly text?: string;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* One hop of a suspect's evidence path — the argmax-weight chain from the
|
|
113
|
+
* trigger step back to the suspect. Control edges carry `kind: 'control'`
|
|
114
|
+
* and (when decide() supplied one) the rule label in `key`.
|
|
115
|
+
*/
|
|
116
|
+
export interface EdgePathStep {
|
|
117
|
+
/** Child (downstream) step. The first hop's `from` is the trigger. */
|
|
118
|
+
readonly from: string;
|
|
119
|
+
readonly fromName: string;
|
|
120
|
+
/** Parent (upstream) step. The last hop's `to` is the suspect. */
|
|
121
|
+
readonly to: string;
|
|
122
|
+
readonly toName: string;
|
|
123
|
+
readonly kind: 'data' | 'control';
|
|
124
|
+
/** State key (data) or decide() rule label (control), when present. */
|
|
125
|
+
readonly key?: string;
|
|
126
|
+
/** Edge weight — 1.0 unless D7 weighed it (child was an LLM call). */
|
|
127
|
+
readonly weight: number;
|
|
128
|
+
}
|
|
129
|
+
/** Per-run similarity statistics — variance ALWAYS reported (D9). */
|
|
130
|
+
export interface SimilarityStats {
|
|
131
|
+
readonly mean: number;
|
|
132
|
+
readonly min: number;
|
|
133
|
+
readonly max: number;
|
|
134
|
+
/** Population standard deviation across the N seeded reruns. */
|
|
135
|
+
readonly stdev: number;
|
|
136
|
+
}
|
|
137
|
+
/** Min/median/max of a cost metric across a probe's seeded reruns. */
|
|
138
|
+
export interface CostRange {
|
|
139
|
+
readonly median: number;
|
|
140
|
+
readonly min: number;
|
|
141
|
+
readonly max: number;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Per-probe COST (loops/tokens) across its seeded reruns — the second readout
|
|
145
|
+
* (proposal 004). Present on `AblationRunStats` ONLY when the runner reported
|
|
146
|
+
* `RunCost`. Absent otherwise (quality-only, byte-identical to before).
|
|
147
|
+
*/
|
|
148
|
+
export interface CostStats {
|
|
149
|
+
readonly samples: number;
|
|
150
|
+
readonly loops?: CostRange;
|
|
151
|
+
readonly tokens?: CostRange;
|
|
152
|
+
}
|
|
153
|
+
/** Evidence from N seeded ablation reruns of one probe. */
|
|
154
|
+
export interface AblationRunStats {
|
|
155
|
+
/** Seeded reruns performed (the consumer's runner was called N times). */
|
|
156
|
+
readonly samples: number;
|
|
157
|
+
/** Runs where `outcomeChanged(original, ablated)` was true. */
|
|
158
|
+
readonly flips: number;
|
|
159
|
+
/** Embedding similarity of each ablated output to the original. */
|
|
160
|
+
readonly similarity: SimilarityStats;
|
|
161
|
+
/** COST readout across the same reruns — present only when the runner
|
|
162
|
+
* reported `RunCost` (proposal 004). */
|
|
163
|
+
readonly cost?: CostStats;
|
|
164
|
+
}
|
|
165
|
+
export type AblationVerdictKind = 'confirmed' | 'not-confirmed' | 'inconclusive';
|
|
166
|
+
/**
|
|
167
|
+
* The ONLY causal claim in the report (§B2). `'confirmed'` = ablating the
|
|
168
|
+
* suspect flipped the outcome in a MAJORITY of N seeded reruns while the
|
|
169
|
+
* un-ablated baseline stayed stable. `'inconclusive'` = mixed flips, or an
|
|
170
|
+
* unstable baseline (the scenario itself doesn't reproduce — no ablation
|
|
171
|
+
* verdict is trustworthy then). `'not-confirmed'` = no flip observed; the
|
|
172
|
+
* suspect's score remains a correlational proxy only.
|
|
173
|
+
*/
|
|
174
|
+
export interface AblationVerdict {
|
|
175
|
+
readonly verdict: AblationVerdictKind;
|
|
176
|
+
/** Human-readable claim, phrased at the right tier (causal vs proxy). */
|
|
177
|
+
readonly claim: string;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* A missing-context candidate (interface #3): a unit that was available for the
|
|
181
|
+
* turn but never reached the model. The mirror of a `Suspect`'s ablation
|
|
182
|
+
* verdict — `confirmed` here means RESTORING the unit flipped the outcome in a
|
|
183
|
+
* majority of seeded reruns on a stable baseline. Without a restoration runner
|
|
184
|
+
* it carries only `id`/`content` (a candidate, not a causal claim).
|
|
185
|
+
*/
|
|
186
|
+
export interface RestoredCandidate {
|
|
187
|
+
readonly id: string;
|
|
188
|
+
readonly content?: string;
|
|
189
|
+
/** Seeded restoration-probe stats (causal tier only). */
|
|
190
|
+
readonly runs?: AblationRunStats;
|
|
191
|
+
/** Restoration verdict (causal tier only). */
|
|
192
|
+
readonly verdict?: AblationVerdict;
|
|
193
|
+
}
|
|
194
|
+
/** One ranked suspect. */
|
|
195
|
+
export interface Suspect {
|
|
196
|
+
/**
|
|
197
|
+
* runtimeStageId of the slice node this suspect lives at — drillable
|
|
198
|
+
* with the trace-toolpack tools (`trace_node(source)` etc.).
|
|
199
|
+
*/
|
|
200
|
+
readonly source: string;
|
|
201
|
+
readonly stageName: string;
|
|
202
|
+
readonly kind: SuspectKind;
|
|
203
|
+
readonly detail?: SuspectDetail;
|
|
204
|
+
/**
|
|
205
|
+
* The ranking key — CORRELATIONAL proxy (§B2):
|
|
206
|
+
* `structuralScore × semanticScore` when a semantic refinement exists,
|
|
207
|
+
* else `structuralScore`. Means "semantically aligned and causally
|
|
208
|
+
* UPSTREAM", never "caused".
|
|
209
|
+
*
|
|
210
|
+
* Comparison caveat: a suspect WITHOUT content text (kind 'stage'/'arg',
|
|
211
|
+
* or a path through control edges only) keeps its bare structural score
|
|
212
|
+
* — an UPPER BOUND with no content evidence behind it. Plumbing can
|
|
213
|
+
* legitimately rank above ablatable sources; the ablation verdicts (and
|
|
214
|
+
* `semanticScore`'s presence) are what disambiguate.
|
|
215
|
+
*/
|
|
216
|
+
readonly score: number;
|
|
217
|
+
/**
|
|
218
|
+
* Max-product of edge weights along the best path from the trigger to
|
|
219
|
+
* this node (1.0 when no D7-weighted LLM edge is on the path).
|
|
220
|
+
*/
|
|
221
|
+
readonly structuralScore: number;
|
|
222
|
+
/**
|
|
223
|
+
* Influence-core composite of the suspect's own content vs the trigger
|
|
224
|
+
* step's output (clamped to [0, 1]); only for suspects with a known
|
|
225
|
+
* content text. The same proxy disclaimers as D6 apply.
|
|
226
|
+
*/
|
|
227
|
+
readonly semanticScore?: number;
|
|
228
|
+
/**
|
|
229
|
+
* TRUE when `score` includes a content signal (semanticScore present).
|
|
230
|
+
* FALSE = path-only structural score — an UPPER BOUND that can reach 1.0
|
|
231
|
+
* through control-edge paths alone; rank such suspects with care and
|
|
232
|
+
* prefer ablation verdicts to disambiguate. (Machine-readable twin of
|
|
233
|
+
* the "path only — no content signal" report marking.)
|
|
234
|
+
*/
|
|
235
|
+
readonly hasContentEvidence: boolean;
|
|
236
|
+
/** Evidence path, trigger → … → suspect, control edges labeled. */
|
|
237
|
+
readonly edgePath: readonly EdgePathStep[];
|
|
238
|
+
/** The counterfactual to run — absent for kind 'stage'. */
|
|
239
|
+
readonly ablation?: AblationSpec;
|
|
240
|
+
/** CAUSAL tier — present only when an `AblationRunner` was supplied. */
|
|
241
|
+
readonly verdict?: AblationVerdict;
|
|
242
|
+
/** The rerun evidence behind `verdict`. */
|
|
243
|
+
readonly runs?: AblationRunStats;
|
|
244
|
+
/** The COST readout (proposal 004) — present only when the runner reported
|
|
245
|
+
* `RunCost`. A WEAKER tier than `verdict`: it shows removal reduced cost
|
|
246
|
+
* beyond a placebo band, NOT that the work was "wasted". */
|
|
247
|
+
readonly cost?: CostVerdict;
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* The COST verdict for a suspect — a context bug's second cost (proposal 004).
|
|
251
|
+
* A WEAKER, gated tier than the flip `verdict`: it shows removing the suspect
|
|
252
|
+
* REDUCED cost (loops/tokens) beyond a length-matched placebo band with a stable
|
|
253
|
+
* sign — **necessity for the cost, NOT proof the work was "wasted".**
|
|
254
|
+
*/
|
|
255
|
+
export interface CostVerdict {
|
|
256
|
+
/** Removing the suspect reduced cost beyond the placebo band, stably. */
|
|
257
|
+
readonly reducedCostOnRemoval: boolean;
|
|
258
|
+
/** Loops saved by removal (baseline median − suspect median). */
|
|
259
|
+
readonly loopsSaved: number;
|
|
260
|
+
/** Tokens saved by removal. */
|
|
261
|
+
readonly tokensSaved: number;
|
|
262
|
+
/** Sign stable across seeds AND a placebo band existed to clear. When false,
|
|
263
|
+
* `reducedCostOnRemoval` is not trustworthy (treat as no cost evidence). */
|
|
264
|
+
readonly stable: boolean;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* The 2×2 class a suspect falls in, derived by `classifySuspect` from its flip
|
|
268
|
+
* `verdict` (quality) and its `cost` verdict. The no-bug cell is
|
|
269
|
+
* `'no-detected-effect'` — never "innocent": a piece can matter in ways neither
|
|
270
|
+
* axis sees (overdetermination, same-loops-different-path).
|
|
271
|
+
*/
|
|
272
|
+
export type SuspectClass = 'content-bug' | 'cost-cause' | 'both' | 'no-detected-effect';
|
|
273
|
+
/**
|
|
274
|
+
* What to remove for one counterfactual rerun. The library BUILDS specs
|
|
275
|
+
* (one per suspect kind) and provides `applyAblations` to apply them to
|
|
276
|
+
* the inputs an agent is constructed from; the consumer's `AblationRunner`
|
|
277
|
+
* re-runs the scenario with the spec applied.
|
|
278
|
+
*
|
|
279
|
+
* THE TOOL SEAM (documented because `AgentOptions` has no `ignoredTools`):
|
|
280
|
+
* agentfootprint has no runtime tool kill-switch — tools enter an agent at
|
|
281
|
+
* construction (`.tools([...])` / injection `inject.tools`). Tool ablation
|
|
282
|
+
* therefore happens where tools are DECLARED: the runner rebuilds the
|
|
283
|
+
* agent with `applyAblations(specs, { tools }).tools`. The same pattern
|
|
284
|
+
* covers injections (rebuild without the excluded `Injection.id`s) and
|
|
285
|
+
* memory (filter `MemoryEntry`s by id before attaching).
|
|
286
|
+
*/
|
|
287
|
+
export type AblationSpec =
|
|
288
|
+
/** Drop these tools from the catalog the agent is built with. */
|
|
289
|
+
{
|
|
290
|
+
readonly kind: 'tool';
|
|
291
|
+
readonly ignoredTools: readonly string[];
|
|
292
|
+
}
|
|
293
|
+
/** Drop these injections (facts / skills / instructions / steering / rag). */
|
|
294
|
+
| {
|
|
295
|
+
readonly kind: 'injection';
|
|
296
|
+
readonly excludeInjectionIds: readonly string[];
|
|
297
|
+
}
|
|
298
|
+
/** Drop these memory entries (matched by `MemoryEntry.id`). */
|
|
299
|
+
| {
|
|
300
|
+
readonly kind: 'memory';
|
|
301
|
+
readonly excludeMemoryIds: readonly string[];
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* The suspect is run INPUT (`$getArgs()` / seed args) — there is nothing
|
|
305
|
+
* the library can filter. The consumer's runner must override the arg
|
|
306
|
+
* itself (e.g. re-run with a neutralized field). `source` names the step
|
|
307
|
+
* that consumed the untracked input.
|
|
308
|
+
*/
|
|
309
|
+
| {
|
|
310
|
+
readonly kind: 'arg';
|
|
311
|
+
readonly source: string;
|
|
312
|
+
readonly note: string;
|
|
313
|
+
};
|
|
314
|
+
/**
|
|
315
|
+
* Consumer-supplied counterfactual runner: re-run the SAME scenario with
|
|
316
|
+
* every spec in `specs` applied, and return the run's output text.
|
|
317
|
+
*
|
|
318
|
+
* Contract:
|
|
319
|
+
* - `specs` may be empty — that is the BASELINE probe (re-run unchanged);
|
|
320
|
+
* its outputs measure the scenario's natural variance.
|
|
321
|
+
* - `run.seed` varies 0..N-1 across the N samples of one probe. Thread it
|
|
322
|
+
* into any stochastic knob (sampling temperature seed, mock script
|
|
323
|
+
* selection) so reruns are deterministic-but-distinct; ignore it for
|
|
324
|
+
* fully deterministic scenarios.
|
|
325
|
+
* - Build a FRESH agent/provider per call — scripted mock providers are
|
|
326
|
+
* stateful (replies consume in order).
|
|
327
|
+
*/
|
|
328
|
+
/**
|
|
329
|
+
* The COST of one re-run — the second readout the two-score localizer reads
|
|
330
|
+
* from the SAME ablation (proposal 004). Both optional; cost scoring is opt-in
|
|
331
|
+
* by a runner that reports them. `loops` = agent loop iterations the run took;
|
|
332
|
+
* `tokens` = total tokens it consumed.
|
|
333
|
+
*/
|
|
334
|
+
export interface RunCost {
|
|
335
|
+
readonly loops?: number;
|
|
336
|
+
readonly tokens?: number;
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Consumer-supplied counterfactual runner (above). Returns the run's output
|
|
340
|
+
* text — OR, to ALSO unlock the cost score (proposal 004), `{ output, cost }`
|
|
341
|
+
* from the SAME re-run (one ablation, two readouts). Backward-compatible:
|
|
342
|
+
* `runAblationProbe` normalizes both shapes, so a bare-string runner keeps
|
|
343
|
+
* quality-only behavior unchanged.
|
|
344
|
+
*/
|
|
345
|
+
export type AblationRunner = (specs: readonly AblationSpec[], run: {
|
|
346
|
+
readonly seed: number;
|
|
347
|
+
}) => Promise<string | {
|
|
348
|
+
readonly output: string;
|
|
349
|
+
readonly cost?: RunCost;
|
|
350
|
+
}>;
|
|
351
|
+
/**
|
|
352
|
+
* Did the ablated output mean something DIFFERENT from the original?
|
|
353
|
+
* Default: embedding similarity below `flipThreshold`. Override with a
|
|
354
|
+
* domain comparator (e.g. compare extracted decisions) — recommended with
|
|
355
|
+
* `mockEmbedder`, whose cosine compresses prose into ~0.85–0.97 (the C1
|
|
356
|
+
* calibration note: absolute thresholds only with real embedders).
|
|
357
|
+
*/
|
|
358
|
+
export type OutcomeComparator = (original: string, ablated: string) => boolean | Promise<boolean>;
|
|
359
|
+
/** The rerun configuration that upgrades the report to the causal tier. */
|
|
360
|
+
export interface AblationRerun {
|
|
361
|
+
readonly runner: AblationRunner;
|
|
362
|
+
/** The original (buggy) output the reruns are compared against. */
|
|
363
|
+
readonly originalOutput: string;
|
|
364
|
+
/** Seeded reruns per probe. Default 3. Never below 2 (no single-run verdicts — D9). */
|
|
365
|
+
readonly samples?: number;
|
|
366
|
+
/** Outcome-flip comparator. Default: similarity < `flipThreshold`. */
|
|
367
|
+
readonly outcomeChanged?: OutcomeComparator;
|
|
368
|
+
/** Similarity floor for the DEFAULT comparator. Default 0.8. */
|
|
369
|
+
readonly flipThreshold?: number;
|
|
370
|
+
/** Ablate only the top-K ranked suspects that carry a spec. Default 5. */
|
|
371
|
+
readonly maxSuspects?: number;
|
|
372
|
+
}
|
|
373
|
+
/** Slice-shape numbers — how much evidence the ranking stands on. */
|
|
374
|
+
export interface SliceStats {
|
|
375
|
+
readonly nodes: number;
|
|
376
|
+
readonly dataEdges: number;
|
|
377
|
+
readonly controlEdges: number;
|
|
378
|
+
/** Edges that received a D7 (LLM-influence) weight. */
|
|
379
|
+
readonly weightedEdges: number;
|
|
380
|
+
/** Nodes that ALSO consumed untracked sources (args/env/silent reads). */
|
|
381
|
+
readonly incompleteNodes: number;
|
|
382
|
+
readonly maxDepth: number;
|
|
383
|
+
readonly maxNodes: number;
|
|
384
|
+
/** Present when a limit actually cut the slice (footprintjs A4). */
|
|
385
|
+
readonly truncated?: {
|
|
386
|
+
readonly byDepth: boolean;
|
|
387
|
+
readonly byNodes: boolean;
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
export type HonestyFlagKind = 'slice-truncated' | 'untracked-sources' | 'no-control-deps' | 'no-read-tracking' | 'no-llm-call-ids' | 'baseline-unstable';
|
|
391
|
+
export interface HonestyFlag {
|
|
392
|
+
readonly flag: HonestyFlagKind;
|
|
393
|
+
readonly note: string;
|
|
394
|
+
}
|
|
395
|
+
/** The localizer's full output (D8). */
|
|
396
|
+
export interface ContextBugReport {
|
|
397
|
+
/** The trigger step the slice was rooted at. */
|
|
398
|
+
readonly step: string;
|
|
399
|
+
readonly stepName: string;
|
|
400
|
+
/** Where the trigger came from. */
|
|
401
|
+
readonly triggerSource: 'explicit' | 'quality' | 'custom';
|
|
402
|
+
/** The quality score that selected the trigger (quality source only). */
|
|
403
|
+
readonly triggerScore?: number;
|
|
404
|
+
/**
|
|
405
|
+
* `'correlational'` — no `AblationRunner` supplied: the report STOPS at
|
|
406
|
+
* the ranking; every score is a proxy and no causal claim is made.
|
|
407
|
+
* `'causal'` — suspects additionally carry ablation verdicts (§B2: the
|
|
408
|
+
* verdicts are the only causal claims; the scores stay proxies).
|
|
409
|
+
*/
|
|
410
|
+
readonly mode: 'correlational' | 'causal';
|
|
411
|
+
/** Ranked suspects, best (most aligned + upstream) first. */
|
|
412
|
+
readonly suspects: readonly Suspect[];
|
|
413
|
+
/**
|
|
414
|
+
* Missing-context candidates (interface #3): units that were available for
|
|
415
|
+
* the turn but never reached the model. Each carries a RESTORATION verdict
|
|
416
|
+
* when a restoration runner was supplied (causal tier — restoring it flipped
|
|
417
|
+
* the outcome). Absent when no `missingContext` option was passed.
|
|
418
|
+
*/
|
|
419
|
+
readonly dropped?: readonly RestoredCandidate[];
|
|
420
|
+
readonly sliceStats: SliceStats;
|
|
421
|
+
/** ⚠ everything that bounds what this report can honestly claim. */
|
|
422
|
+
readonly honestyFlags: readonly HonestyFlag[];
|
|
423
|
+
/** Ablation baseline probe stats (causal mode only). */
|
|
424
|
+
readonly baseline?: AblationRunStats;
|
|
425
|
+
/** Restoration baseline probe stats — the missing-context tier's twin of
|
|
426
|
+
* `baseline` (present only when a restoration runner ran). An unstable one
|
|
427
|
+
* also raises a `baseline-unstable` honesty flag. */
|
|
428
|
+
readonly restorationBaseline?: AblationRunStats;
|
|
429
|
+
}
|
|
430
|
+
export declare const CONTEXT_BISECT_DEFAULTS: {
|
|
431
|
+
/** Slice depth budget (forwarded to `causalChain`). */
|
|
432
|
+
readonly maxDepth: 12;
|
|
433
|
+
/** Slice node budget (forwarded to `causalChain`). */
|
|
434
|
+
readonly maxNodes: 80;
|
|
435
|
+
/** Ranked suspects kept on the report. */
|
|
436
|
+
readonly maxSuspects: 12;
|
|
437
|
+
/** Chars of written content embedded per step text (D7). */
|
|
438
|
+
readonly maxTextChars: 2000;
|
|
439
|
+
/** Seeded reruns per ablation probe (D9 — never single-run verdicts). */
|
|
440
|
+
readonly samples: 3;
|
|
441
|
+
/** Default similarity floor for the default outcome comparator. */
|
|
442
|
+
readonly flipThreshold: 0.8;
|
|
443
|
+
/** Ablation probes budget for `bisectCulprits`. */
|
|
444
|
+
readonly maxProbes: 24;
|
|
445
|
+
/** Independent-culprit search rounds for `bisectCulprits`. */
|
|
446
|
+
readonly maxCulprits: 4;
|
|
447
|
+
};
|
|
448
|
+
export type { CommitBundle, ControlDepLookup, Embedder, InfluenceWeights, RuntimeSnapshot, StageSnapshot, };
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* walk-to-root — L4: the influence-guided backtracking debugger (proposal 007).
|
|
3
|
+
*
|
|
4
|
+
* `walkToRoot` walks BACKWARD from the symptom across loops to find the ROOT context source of a
|
|
5
|
+
* DECISION bug (root ≠ proximate). Per hop: NARROW with per-loop influence (L3) → HOP along
|
|
6
|
+
* `writerId` provenance to the loop that produced the culprit → ISOLATE with run-wide ablation (L2).
|
|
7
|
+
* Pure orchestration over shipped tiers — no new scorer, no new ablation.
|
|
8
|
+
*
|
|
9
|
+
* Home: `context-bisect` (consumes Trajectory + shortlist + the localizer's ablation).
|
|
10
|
+
*
|
|
11
|
+
* HONESTY (the proxy CAN misdirect the hop — folded in from review):
|
|
12
|
+
* - The per-hop narrow is a correlational PROXY (FA-dominated) — it points at a neighborhood and
|
|
13
|
+
* CANNOT separate a planted instruction from an innocent same-topic sibling. Three guards: (1) BEAM
|
|
14
|
+
* not top-1 (keep siblings in play); (2) ablation is the ONLY discriminator (a wrong-branch hop that
|
|
15
|
+
* doesn't flip is never `root`); (3) the narrow reorders, never drops. Remaining caveat: if the narrow
|
|
16
|
+
* never surfaces the true root into the beam, ablation never tests it and the walk stops shallow — a
|
|
17
|
+
* recall blind spot, amplified in a single chain.
|
|
18
|
+
* - `root` is CAUSAL (ablation-only): set only when run-wide ablation flips on a stable baseline.
|
|
19
|
+
* Without a `rerun`, the walk is correlational and `root` is absent.
|
|
20
|
+
* - FLAT charts only for the cross-loop hop — grouped loop frames are scope-isolated (degraded + flagged).
|
|
21
|
+
* - Three first-class honest stops (never silent): `unseparated-siblings`, `overdetermined-or-incomplete`,
|
|
22
|
+
* `untracked-origin`.
|
|
23
|
+
*
|
|
24
|
+
* VALIDATION (the descent edge is now populated on real agents — proposal 008): `assembleTrajectory`
|
|
25
|
+
* surfaces each loop's proximate `lastToolResult` on the WALK-ONLY `LoopFrame.proximateToolSource`
|
|
26
|
+
* field (writer = the PRODUCING loop's tool-calls stage — a cross-loop edge). `writtenByOf` reads it,
|
|
27
|
+
* so on a real flat agent with tool calls the cross-loop DESCENT fires (proven at the component level:
|
|
28
|
+
* the enrichment populates the edge on a real trajectory + the algorithm descends on it). It is
|
|
29
|
+
* WALK-ONLY — not in `contextSources` — so L3's narrow + its measured recall are untouched.
|
|
30
|
+
* END-TO-END VALIDATED (ctxbug/harness/eval-l4-walk.mjs): on a REAL agentfootprint misdirect agent
|
|
31
|
+
* with a realistic embedder (bge) for the narrow and a REAL causal ablation (rebuild the agent WITHOUT
|
|
32
|
+
* the planted fact → the outcome flips), the walk BURIES the plant at the symptom, DESCENDS via the
|
|
33
|
+
* proximate tool edge to the wrong-decision loop, and ablation convicts `root = the planted
|
|
34
|
+
* instruction` — where flat single-trigger localize does not. FLAT only — grouped's run-level
|
|
35
|
+
* `lastToolResult` lives outside the per-scope inner logs (deferred, degrade-flagged below).
|
|
36
|
+
*/
|
|
37
|
+
import type { Embedder } from '../influence-core/index.js';
|
|
38
|
+
import { type SuspectClassifier } from './localize.js';
|
|
39
|
+
import type { Trajectory } from './trajectory.js';
|
|
40
|
+
import type { AblationRerun, AblationVerdict, ContextBugArtifacts, HonestyFlag, SuspectKind } from './types.js';
|
|
41
|
+
/** One honest stop reason when the walk cannot cleanly descend/convict. */
|
|
42
|
+
export type RootCauseNote = 'unseparated-siblings' | 'overdetermined-or-incomplete' | 'untracked-origin';
|
|
43
|
+
/** One hop of the symptom→root walk. */
|
|
44
|
+
export interface RootCauseHop {
|
|
45
|
+
/** The loop this hop examined. */
|
|
46
|
+
readonly loopIndex: number;
|
|
47
|
+
/** The narrowed culprit at this hop (joins a localizer Suspect 1:1). */
|
|
48
|
+
readonly suspectId: string;
|
|
49
|
+
readonly kind: SuspectKind;
|
|
50
|
+
/** The narrow is text-similarity — a PROXY, never causal. */
|
|
51
|
+
readonly narrowedBy: 'text-similarity';
|
|
52
|
+
/** The causal convict — present only when a `rerun` ablated this hop's suspect. */
|
|
53
|
+
readonly verdict?: AblationVerdict;
|
|
54
|
+
/** The provenance writer this hop's culprit came from (runtimeStageId). */
|
|
55
|
+
readonly writtenBy?: string;
|
|
56
|
+
/** The loopIndex the walk descended to next (the writer's frame); absent if it stopped here. */
|
|
57
|
+
readonly cameFrom?: number;
|
|
58
|
+
/** A first-class honest stop, when applicable. */
|
|
59
|
+
readonly note?: RootCauseNote;
|
|
60
|
+
}
|
|
61
|
+
export interface RootCausePath {
|
|
62
|
+
/** Symptom → … in walk order. */
|
|
63
|
+
readonly hops: readonly RootCauseHop[];
|
|
64
|
+
/** The DEEPEST ablation-convicted hop (CAUSAL). Absent without a flip / without a rerun. */
|
|
65
|
+
readonly root?: RootCauseHop;
|
|
66
|
+
readonly honestyFlags: readonly HonestyFlag[];
|
|
67
|
+
readonly truncated?: {
|
|
68
|
+
readonly byHops: boolean;
|
|
69
|
+
readonly byAblations: boolean;
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
export interface WalkToRootOptions {
|
|
73
|
+
readonly embedder: Embedder;
|
|
74
|
+
/** The convict tier. Without it the walk is correlational and `root` is absent. */
|
|
75
|
+
readonly rerun?: AblationRerun;
|
|
76
|
+
/** Top-k writers ablated per hop (NOT top-1 — the proxy can't separate same-topic siblings). Default 2. */
|
|
77
|
+
readonly beamK?: number;
|
|
78
|
+
/** Forwarded to the per-loop narrow. */
|
|
79
|
+
readonly recencyDecay?: number;
|
|
80
|
+
readonly k?: number;
|
|
81
|
+
/** Walk-depth budget. Default 8. */
|
|
82
|
+
readonly maxHops?: number;
|
|
83
|
+
/** Total ablation-probe budget across the whole walk. Default 24. */
|
|
84
|
+
readonly maxAblations?: number;
|
|
85
|
+
readonly classifier?: SuspectClassifier;
|
|
86
|
+
readonly signal?: AbortSignal;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Resolve every `writerId` to the index of the frame whose `bodyIds` contains it (NOT via
|
|
90
|
+
* `parseRuntimeStageId` — that yields executionIndex, not loopIndex). A writer not in any frame
|
|
91
|
+
* (run prelude / root-seeded) maps to `undefined`. Invariant: each id lands in exactly one frame.
|
|
92
|
+
*/
|
|
93
|
+
export declare function buildWriterFrameIndex(trajectory: Trajectory): Map<string, number>;
|
|
94
|
+
/**
|
|
95
|
+
* Walk backward from the symptom to the root context source — from a recorded run.
|
|
96
|
+
* Thin wrapper: `assembleTrajectory` then {@link walkTrajectory}. See module docs for the honesty model.
|
|
97
|
+
*/
|
|
98
|
+
export declare function walkToRoot(artifacts: ContextBugArtifacts, opts: WalkToRootOptions): Promise<RootCausePath>;
|
|
99
|
+
/**
|
|
100
|
+
* The walk itself, over an already-assembled {@link Trajectory} (composable + directly testable).
|
|
101
|
+
* FLAT charts only for the cross-loop hop; grouped charts degrade (within-loop) with a flag.
|
|
102
|
+
*/
|
|
103
|
+
export declare function walkTrajectory(trajectory: Trajectory, opts: WalkToRootOptions): Promise<RootCausePath>;
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* rankingConfidence — honesty marker for an influence ranking (RFC-003 honesty
|
|
3
|
+
* marker; influence-core block D6). Internal concept: "attributability".
|
|
4
|
+
*
|
|
5
|
+
* Pattern: pure, embedder-free function over a `scoreInfluence` result.
|
|
6
|
+
* Deterministic; no I/O.
|
|
7
|
+
* Role: `src/lib/influence-core/` leaf. The honesty companion to the
|
|
8
|
+
* four-signal scorer: it says when the ranking is a SHORTLIST,
|
|
9
|
+
* not a verdict.
|
|
10
|
+
*
|
|
11
|
+
* ## Why this exists (the measured blind spot)
|
|
12
|
+
*
|
|
13
|
+
* Output-similarity influence ranks sources by how much they resemble the
|
|
14
|
+
* final answer. That is structurally blind to ABSENCE / CROWDING bugs: a
|
|
15
|
+
* culprit that caused the error by *displacing* context (history truncation,
|
|
16
|
+
* context dilution) need not resemble the answer — so it ranks low, or below
|
|
17
|
+
* an innocent that the answer happens to talk about. The tell is not a low
|
|
18
|
+
* absolute score; it is a FLAT top — no source clearly dominates. This
|
|
19
|
+
* function reports that flatness honestly so consumers escalate to ablation
|
|
20
|
+
* (the causal tier) instead of trusting a confident-but-wrong rank-1.
|
|
21
|
+
*
|
|
22
|
+
* Honest claim (RFC-002 §2 discipline): `clearWinner` is a proxy for "the
|
|
23
|
+
* ranking has a clear lead", never "the lead is the cause". A clear lead can
|
|
24
|
+
* still be an innocent the answer rationalizes over — only ablation makes
|
|
25
|
+
* causal claims.
|
|
26
|
+
*/
|
|
27
|
+
import type { ConfidenceStrategy, InfluenceScore, RankingConfidence } from './types.js';
|
|
28
|
+
/**
|
|
29
|
+
* Default strategy: ABSOLUTE top-2 gap `s0 − s1 >= threshold`. Simple and
|
|
30
|
+
* interpretable, but embedder-relative (the gap scale depends on the embedding
|
|
31
|
+
* geometry). Use `ratioStrategy` for cross-embedder transfer.
|
|
32
|
+
*/
|
|
33
|
+
export declare function marginStrategy(threshold?: number): ConfidenceStrategy;
|
|
34
|
+
/**
|
|
35
|
+
* Scale-invariant strategy: top-2 gap as a FRACTION of the top score,
|
|
36
|
+
* `(s0 − s1) / |s0| >= threshold`. Transfers across embedders / answer lengths
|
|
37
|
+
* where the absolute margin does not. A zero (or all-equal) top is never a
|
|
38
|
+
* clear winner.
|
|
39
|
+
*/
|
|
40
|
+
export declare function ratioStrategy(threshold?: number): ConfidenceStrategy;
|
|
41
|
+
export interface RankingConfidenceOptions {
|
|
42
|
+
/**
|
|
43
|
+
* The decisiveness rule. Default: `marginStrategy(clearWinnerMargin)`.
|
|
44
|
+
* When set, it WINS — `clearWinnerMargin` is ignored. Bring your own
|
|
45
|
+
* (e.g. entropy / dispersion) or use the shipped `ratioStrategy`.
|
|
46
|
+
*/
|
|
47
|
+
readonly strategy?: ConfidenceStrategy;
|
|
48
|
+
/** Threshold for the DEFAULT margin strategy (ignored when `strategy` is
|
|
49
|
+
* set). Default: `DEFAULT_CLEAR_WINNER_MARGIN` (0.05). */
|
|
50
|
+
readonly clearWinnerMargin?: number;
|
|
51
|
+
/** Score band below the top defining the shortlist to double-check.
|
|
52
|
+
* Default: `DEFAULT_SHORTLIST_BAND` (0.1). Recommended >=
|
|
53
|
+
* `clearWinnerMargin` so the shortlist is at least as wide as the
|
|
54
|
+
* winning gap (the function also guarantees the runner-up is shortlisted
|
|
55
|
+
* when there is no clear winner, so a smaller value is safe). */
|
|
56
|
+
readonly shortlistBand?: number;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Assess whether an influence ranking has a clear winner to trust as a lead,
|
|
60
|
+
* or is too close to call and should be confirmed by ablation.
|
|
61
|
+
*
|
|
62
|
+
* Guarantees (relied on by the localizer): the returned `shortlist` always
|
|
63
|
+
* contains `lead` when there is one, and — when there is NO clear winner and
|
|
64
|
+
* there are ≥2 suspects — always contains the runner-up too (so ablation over
|
|
65
|
+
* the shortlist covers the real culprit even if it ranked below an innocent).
|
|
66
|
+
*
|
|
67
|
+
* @param scores `scoreInfluence` output (any order — re-sorted defensively).
|
|
68
|
+
* Ids are assumed unique (as `scoreInfluence` enforces); the
|
|
69
|
+
* shortlist is de-duplicated defensively regardless.
|
|
70
|
+
* @throws Error on negative or NaN options.
|
|
71
|
+
*/
|
|
72
|
+
export declare function rankingConfidence(scores: readonly InfluenceScore[], options?: RankingConfidenceOptions): RankingConfidence;
|