npm - @caupulican/pi-adaptative - Versions diffs - 0.80.96 → 0.80.98 - Mend

@caupulican/pi-adaptative 0.80.96 → 0.80.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/dist/core/agent-session.js CHANGED Viewed

@@ -25,6 +25,7 @@ import { evaluateToolGate } from "./autonomy/gates.js";
 import { LaneTracker } from "./autonomy/lane-tracker.js";
 import { appendLaneRecordSnapshot, getLaneRecordSnapshots } from "./autonomy/session-lane-record.js";
 import { composeSubagentSystemPrompt } from "./autonomy/subagent-prompt.js";
+import { AUTONOMY_TELEMETRY_EVENT_TYPES, redactTelemetryValue, } from "./autonomy/telemetry-events.js";
 import { executeBashWithOperations } from "./bash-executor.js";
 import { calculateContextTokens, collectEntriesForBranchSummary, compact, estimateContextTokens, generateBranchSummary, prepareCompaction, shouldCompact, } from "./compaction/index.js";
 // (module-scope helper for curation goal extraction defined below the imports)
@@ -57,6 +58,7 @@ import { buildGoalRuntimeSnapshot, } from "./goals/goal-runtime-snapshot.js";
 import { appendGoalStateSnapshot, getLatestGoalStateSnapshot } from "./goals/session-goal-state.js";
 import { appendLearningAuditSnapshot, getLearningAuditSnapshots, proposalFromReflectionWrite, rollbackPlanForReflectionWrite, } from "./learning/learning-audit.js";
 import { evaluateLearningDecision } from "./learning/learning-gate.js";
+import { ObservationStore, observationKey } from "./learning/observation-store.js";
 import { decideDemand, ReflectionEngine, } from "./learning/reflection-engine.js";
 import { appendLearningDecisionSnapshot, getLearningDecisionSnapshots } from "./learning/session-learning-decision.js";
 import { isPromotedFrontmatter, SkillCurator } from "./learning/skill-curator.js";
@@ -69,6 +71,7 @@ import { createCustomMessage } from "./messages.js";
 import { deriveModelCapabilityProfile, filterToolNamesForCapability, } from "./model-capability.js";
 import { resolveCliModel, resolveProfileModelSettings } from "./model-resolver.js";
 import { collectModelRouterConfigDiagnostics } from "./model-router/config-diagnostics.js";
+import { classifyExecutorTurn } from "./model-router/executor-route.js";
 import { classifyModelRouterRoute } from "./model-router/intent-classifier.js";
 import { ROUTE_JUDGE_MAX_OUTPUT_TOKENS, runRouteJudge } from "./model-router/route-judge.js";
 import { bufferModelRouterSessionCustomMessage, bufferModelRouterSessionMessage, createModelRouterSessionBuffer, flushModelRouterSessionBuffer, } from "./model-router/session-buffer.js";
@@ -79,6 +82,7 @@ import { expandPromptTemplate } from "./prompt-templates.js";
 import { runModelFitnessProbe } from "./research/model-fitness.js";
 import { runResearch } from "./research/research-runner.js";
 import { appendEvidenceBundleSnapshot, getEvidenceBundleSnapshots, getLatestEvidenceBundleSnapshot, } from "./research/session-evidence-bundle.js";
+import { collectWorkspaceSources } from "./research/workspace-collector.js";
 import { stripResourceProfileBlocks } from "./resource-profile-blocks.js";
 import { classifyToolTrust, UNTRUSTED_BOUNDARY_SYSTEM_RULE, wrapUntrustedText } from "./security/untrusted-boundary.js";
 import { CURRENT_SESSION_VERSION, getLatestCompactionEntry } from "./session-manager.js";
@@ -142,6 +146,9 @@ function formatModelRouterModel(model) {
 function persistModelRouterDecision(sessionManager, decision) {
     sessionManager.appendCustomEntry(MODEL_ROUTER_DECISION_CUSTOM_TYPE, decision);
 }
+/** Custom-entry type for G3 autonomy telemetry. Distinct from the router/lane record types so a
+ * telemetry consumer can filter on it without decoding operational snapshots. */
+const AUTONOMY_TELEMETRY_CUSTOM_TYPE = "autonomy-telemetry";
 /** Read a packed grep/find tool result's `details.artifactId`, if present, without `any`. */
 function extractArtifactId(message) {
     if (!message || message.role !== "toolResult")
@@ -256,6 +263,7 @@ export class AgentSession {
     _baseToolDefinitions = new Map();
     _cwd;
     _agentDir;
+    _collectWorkspaceSources;
     _extensionRunnerRef;
     _initialActiveToolNames;
     _allowedToolNames;
@@ -323,6 +331,7 @@ export class AgentSession {
         this._customTools = config.customTools ?? [];
         this._cwd = config.cwd;
         this._agentDir = config.agentDir ?? getAgentDir();
+        this._collectWorkspaceSources = config.collectWorkspaceSources ?? collectWorkspaceSources;
         this._modelRegistry = config.modelRegistry;
         this._extensionRunnerRef = config.extensionRunnerRef;
         this._initialActiveToolNames = config.initialActiveToolNames;
@@ -1213,7 +1222,13 @@ export class AgentSession {
                 writePayloads,
                 curation: curationSettings.enabled
                     ? {
-                        resolveDigest: (digestKey) => this._brainCurator.getDigest(digestKey),
+                        resolveDigest: (digestKey) => {
+                            const digest = this._brainCurator.getDigest(digestKey);
+                            // Count serves on the REAL per-turn pass only, never the report path.
+                            if (digest !== undefined && writePayloads)
+                                this._brainCurator.noteDigestServed();
+                            return digest;
+                        },
                         // Only the real per-turn pass enqueues work; the read-only report path
                         // (writePayloads=false) stays side-effect free.
                         onPacked: writePayloads
@@ -1315,7 +1330,12 @@ export class AgentSession {
     _installAgentToolHooks() {
         this.agent.beforeToolCall = async ({ toolCall, args }) => {
             if (this._activeModelRouterRoute &&
-                shouldEscalateModelRouterTool({ tier: this._activeModelRouterRoute.tier, toolName: toolCall.name, args })) {
+                shouldEscalateModelRouterTool({
+                    tier: this._activeModelRouterRoute.tier,
+                    toolName: toolCall.name,
+                    args,
+                    reasonCode: this._activeModelRouterRoute.reasonCode,
+                })) {
                 this._modelRouterEscalationRequested = true;
                 this.agent.abort();
                 return {
@@ -2037,12 +2057,54 @@ export class AgentSession {
             return false;
         return this._modelRegistry.hasConfiguredAuth(resolved.model);
     }
+    _resolveExecutorRoute(prompt, executorPattern) {
+        if (!executorPattern)
+            return undefined;
+        try {
+            const verdict = classifyExecutorTurn(prompt, this.settingsManager.getToolkitScripts());
+            if (!verdict.execute)
+                return undefined;
+            const resolved = resolveCliModel({ cliModel: executorPattern, modelRegistry: this._modelRegistry });
+            if (!resolved.model || !this._modelRegistry.hasConfiguredAuth(resolved.model))
+                return undefined;
+            // Fitness gate: the executor must have PROVEN tool-calling on this host (same
+            // canonical-ref discipline as the curation gate).
+            const canonicalRef = `${resolved.model.provider}/${resolved.model.id}`;
+            const fitness = FitnessStore.forAgentDir(this._agentDir)
+                .getForHost()
+                .find((entry) => entry.model === canonicalRef);
+            const toolCall = fitness?.report.toolCall;
+            if (!toolCall || toolCall.succeeded < Math.ceil(toolCall.total * (2 / 3)))
+                return undefined;
+            this._lastModelRouterIntent = "research";
+            return {
+                decision: {
+                    tier: "cheap",
+                    risk: "scoped-write",
+                    confidence: 1,
+                    reasonCode: "executor_direct",
+                    reasons: [`Executor lane: Level-0 direct hit on toolkit script "${verdict.scriptName}"`],
+                },
+                model: resolved.model,
+            };
+        }
+        catch {
+            return undefined;
+        }
+    }
     _resolveModelRouterTurnRoute(prompt) {
         const settings = this.settingsManager.getModelRouterSettings();
         if (!settings.enabled) {
             this._lastModelRouterSkipReason = "disabled";
             return undefined;
         }
+        // G16 executor lane: a Level-0 DIRECT toolkit hit on a command-shaped prompt routes the
+        // whole turn to the configured local executor (tool-call-fitness-gated) instead of
+        // spending the frontier model on a one-tool reflex. Ambiguity never routes here — it
+        // stays with the big model and the reflex brain. Deterministic, so the judge is skipped.
+        const executorRoute = this._resolveExecutorRoute(prompt, settings.executorModel);
+        if (executorRoute)
+            return executorRoute;
         const decision = classifyModelRouterRoute(prompt);
         this._lastModelRouterIntent = decision.tier === "cheap" ? "research" : "modify";
         // Learning tier must not be selected for normal user prompts
@@ -2128,6 +2190,9 @@ export class AgentSession {
             return undefined;
         if (options?.skipJudge)
             return baseline;
+        // Deterministic executor routes need no judge (Level-0 already decided).
+        if (baseline.decision.reasonCode === "executor_direct")
+            return baseline;
         const settings = this.settingsManager.getModelRouterSettings();
         if (!settings.judgeEnabled)
             return baseline;
@@ -2336,6 +2401,19 @@ export class AgentSession {
         }
         if (persistDecision && completedDecision) {
             persistModelRouterDecision(this.sessionManager, completedDecision);
+            // G3: one route event per user-facing routed turn (the escalation retry runs with
+            // persistDecision=false, so it does not double-emit). Codes/numbers only — no prompt text.
+            this._emitAutonomyTelemetry({
+                type: AUTONOMY_TELEMETRY_EVENT_TYPES.routeDecision,
+                timestamp: new Date().toISOString(),
+                payload: {
+                    tier: completedDecision.route.tier,
+                    risk: completedDecision.route.risk,
+                    reasonCode: completedDecision.route.reasonCode,
+                    confidence: completedDecision.route.confidence,
+                    outcome: completedDecision.outcome,
+                },
+            });
         }
         if (thrownError) {
             throw thrownError;
@@ -5007,8 +5085,24 @@ export class AgentSession {
     getLaneRecords() {
         return this._laneTracker.getRecords();
     }
-    saveWorkerResultSnapshot(result) {
-        return appendWorkerResultSnapshot(this.sessionManager, result);
+    /**
+     * G3: bounded autonomy-telemetry sink. Passes the whole event through {@link redactTelemetryValue}
+     * (the taxonomy's redaction contract) before storing it, so a secret that leaked into a payload
+     * field never lands in the session log. Observe-only: a failure here can never surface into the
+     * turn it is measuring, so the whole body is swallowed. Payloads MUST stay small (ids, codes,
+     * numbers) — never prompt/summary text; callers own that discipline.
+     */
+    _emitAutonomyTelemetry(event) {
+        try {
+            const redacted = redactTelemetryValue(event);
+            this.sessionManager.appendCustomEntry(AUTONOMY_TELEMETRY_CUSTOM_TYPE, { version: 1, ...redacted });
+        }
+        catch {
+            // Telemetry is best-effort: swallow so a sink failure cannot break the observed turn.
+        }
+    }
+    saveWorkerResultSnapshot(result, request) {
+        return appendWorkerResultSnapshot(this.sessionManager, result, request);
     }
     getWorkerResultSnapshots() {
         return getWorkerResultSnapshots(this.sessionManager.getEntries());
@@ -5285,9 +5379,17 @@ export class AgentSession {
         const startedRecord = this._laneTracker.start({ type: "research", goalId: demand.goalId });
         try {
             let spentUsage;
+            // Best-effort, pointer-first workspace evidence. Derives search terms from the goal/requirement
+            // text (not the identity-key query) and is bounded + silent-on-failure: [] == today's behavior.
+            const workspaceSources = await this._collectWorkspaceSources({
+                query: `${demand.context}\n${demand.query}`,
+                cwd: this._cwd,
+                maxSources: settings.maxSources,
+            });
             const result = await runResearch({
                 query: demand.query,
                 context: demand.context,
+                sources: workspaceSources,
                 envelope: this._buildResearchLaneEnvelope(settings.maxUsd, laneProfile),
                 maxUsd: settings.maxUsd,
                 maxSources: settings.maxSources,
@@ -5346,6 +5448,20 @@ export class AgentSession {
             });
             if (record) {
                 appendLaneRecordSnapshot(this.sessionManager, record);
+                // G3: a research lane's product is an evidence bundle, so its terminal record maps to
+                // the evidence_bundle event. Lane outcome only (status/reasonCode/cost) — no findings text.
+                this._emitAutonomyTelemetry({
+                    type: AUTONOMY_TELEMETRY_EVENT_TYPES.evidenceBundle,
+                    timestamp: new Date().toISOString(),
+                    payload: {
+                        laneId: record.laneId,
+                        laneType: record.type,
+                        status: record.status,
+                        reasonCode: record.reasonCode ?? null,
+                        costUsd: record.costUsd ?? null,
+                        hasEvidence: record.evidenceEntryId !== undefined,
+                    },
+                });
             }
             return { started: true, record, result };
         }
@@ -5356,6 +5472,18 @@ export class AgentSession {
             });
             if (record && !this._disposed) {
                 appendLaneRecordSnapshot(this.sessionManager, record);
+                this._emitAutonomyTelemetry({
+                    type: AUTONOMY_TELEMETRY_EVENT_TYPES.evidenceBundle,
+                    timestamp: new Date().toISOString(),
+                    payload: {
+                        laneId: record.laneId,
+                        laneType: record.type,
+                        status: record.status,
+                        reasonCode: record.reasonCode ?? null,
+                        costUsd: record.costUsd ?? null,
+                        hasEvidence: record.evidenceEntryId !== undefined,
+                    },
+                });
             }
             const message = error instanceof Error ? error.message : String(error);
             this._emit({ type: "warning", message: `Research lane failed: ${message}` });
@@ -5458,7 +5586,7 @@ export class AgentSession {
                 });
                 return { started: true, record, outcome };
             }
-            this.saveWorkerResultSnapshot(outcome.result);
+            this.saveWorkerResultSnapshot(outcome.result, workerRequest);
             if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
                 this.addSpawnedUsage(spentUsage, { label: "worker-delegation", reportId: usageReportId });
             }
@@ -5469,6 +5597,19 @@ export class AgentSession {
             });
             if (record) {
                 appendLaneRecordSnapshot(this.sessionManager, record);
+                // G3: worker lane terminal record -> worker_result event. Lane outcome only
+                // (status/reasonCode/cost) — never the worker's summary/changed-file text.
+                this._emitAutonomyTelemetry({
+                    type: AUTONOMY_TELEMETRY_EVENT_TYPES.workerResult,
+                    timestamp: new Date().toISOString(),
+                    payload: {
+                        laneId: record.laneId,
+                        laneType: record.type,
+                        status: record.status,
+                        reasonCode: record.reasonCode ?? null,
+                        costUsd: record.costUsd ?? null,
+                    },
+                });
             }
             return { started: true, record, outcome };
         }
@@ -5479,6 +5620,17 @@ export class AgentSession {
             });
             if (record && !this._disposed) {
                 appendLaneRecordSnapshot(this.sessionManager, record);
+                this._emitAutonomyTelemetry({
+                    type: AUTONOMY_TELEMETRY_EVENT_TYPES.workerResult,
+                    timestamp: new Date().toISOString(),
+                    payload: {
+                        laneId: record.laneId,
+                        laneType: record.type,
+                        status: record.status,
+                        reasonCode: record.reasonCode ?? null,
+                        costUsd: record.costUsd ?? null,
+                    },
+                });
             }
             const message = error instanceof Error ? error.message : String(error);
             this._emit({ type: "warning", message: `Worker delegation failed: ${message}` });
@@ -5750,17 +5902,32 @@ export class AgentSession {
         // every pass, so advancing it for a no-op (which stores nothing) would make later passes
         // reuse ids — and rollback keys on the id, so a collision blocks or misdirects rollback.
         let auditSequence = getLearningAuditSnapshots(this.sessionManager.getEntries()).length;
+        // G6 evidence strength: durable proposals accumulate observation counts across passes/sessions
+        // so the gate can distinguish a one-off cue from a repeatedly-confirmed lesson. Built once per
+        // pass; every increment is best-effort (store IO must never break reflection).
+        const observationStore = ObservationStore.forAgentDir(this._agentDir);
         let writeIndex = 0;
         for (const write of result.writes) {
             writeIndex += 1;
             const proposalId = `${input.reportId ?? "reflection"}-w${writeIndex}`;
             const proposal = proposalFromReflectionWrite(write, proposalId);
             const rollback = rollbackPlanForReflectionWrite(write);
+            let observations = 1;
+            if (policy.enabled) {
+                try {
+                    observations = observationStore.increment(observationKey(proposal.layer, proposal.summary));
+                }
+                catch {
+                    // A store read/write failure falls back to a fresh count of 1, which keeps the gate
+                    // proposal-first (never spuriously auto-applies) rather than crashing the pass.
+                    observations = 1;
+                }
+            }
             const decision = policy.enabled
                 ? evaluateLearningDecision({
                     proposal,
                     confidence: policy.reflectionSourceConfidence,
-                    observations: 1,
+                    observations,
                     contradictions: 0,
                     settings: {
                         enabled: true,
@@ -5779,6 +5946,18 @@ export class AgentSession {
                     requiresApproval: false,
                 };
             this.saveLearningDecisionSnapshot(decision);
+            // G3: learning-gate outcome. Codes/numbers only — never the proposal summary/memory text.
+            this._emitAutonomyTelemetry({
+                type: AUTONOMY_TELEMETRY_EVENT_TYPES.learningDecision,
+                timestamp: new Date().toISOString(),
+                payload: {
+                    kind: decision.kind,
+                    reasonCode: decision.reasonCode,
+                    layer: proposal.layer,
+                    confidence: decision.confidence,
+                    requiresApproval: decision.requiresApproval,
+                },
+            });
             if (decision.kind === "apply") {
                 await this._applyReflectionWrite(write, signal);
             }