@caupulican/pi-adaptative 0.80.97 → 0.80.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/CHANGELOG.md +53 -0
  2. package/dist/core/agent-session.d.ts +46 -5
  3. package/dist/core/agent-session.d.ts.map +1 -1
  4. package/dist/core/agent-session.js +385 -17
  5. package/dist/core/agent-session.js.map +1 -1
  6. package/dist/core/autonomy/envelope-enforcement.d.ts +17 -0
  7. package/dist/core/autonomy/envelope-enforcement.d.ts.map +1 -0
  8. package/dist/core/autonomy/envelope-enforcement.js +80 -0
  9. package/dist/core/autonomy/envelope-enforcement.js.map +1 -0
  10. package/dist/core/autonomy/foreground-envelope.d.ts +22 -0
  11. package/dist/core/autonomy/foreground-envelope.d.ts.map +1 -0
  12. package/dist/core/autonomy/foreground-envelope.js +65 -0
  13. package/dist/core/autonomy/foreground-envelope.js.map +1 -0
  14. package/dist/core/autonomy/status.d.ts +11 -0
  15. package/dist/core/autonomy/status.d.ts.map +1 -1
  16. package/dist/core/autonomy/status.js.map +1 -1
  17. package/dist/core/context/brain-curator.d.ts +7 -0
  18. package/dist/core/context/brain-curator.d.ts.map +1 -1
  19. package/dist/core/context/brain-curator.js +6 -0
  20. package/dist/core/context/brain-curator.js.map +1 -1
  21. package/dist/core/context/context-composition.d.ts.map +1 -1
  22. package/dist/core/context/context-composition.js +1 -1
  23. package/dist/core/context/context-composition.js.map +1 -1
  24. package/dist/core/delegation/session-worker-result.d.ts +8 -2
  25. package/dist/core/delegation/session-worker-result.d.ts.map +1 -1
  26. package/dist/core/delegation/session-worker-result.js +18 -1
  27. package/dist/core/delegation/session-worker-result.js.map +1 -1
  28. package/dist/core/delegation/worker-actions.d.ts +50 -0
  29. package/dist/core/delegation/worker-actions.d.ts.map +1 -0
  30. package/dist/core/delegation/worker-actions.js +70 -0
  31. package/dist/core/delegation/worker-actions.js.map +1 -0
  32. package/dist/core/delegation/worker-runner.d.ts +9 -0
  33. package/dist/core/delegation/worker-runner.d.ts.map +1 -1
  34. package/dist/core/delegation/worker-runner.js +38 -4
  35. package/dist/core/delegation/worker-runner.js.map +1 -1
  36. package/dist/core/learning/observation-store.d.ts +20 -0
  37. package/dist/core/learning/observation-store.d.ts.map +1 -0
  38. package/dist/core/learning/observation-store.js +101 -0
  39. package/dist/core/learning/observation-store.js.map +1 -0
  40. package/dist/core/model-capability.d.ts +19 -0
  41. package/dist/core/model-capability.d.ts.map +1 -1
  42. package/dist/core/model-capability.js +19 -0
  43. package/dist/core/model-capability.js.map +1 -1
  44. package/dist/core/model-router/executor-route.d.ts +8 -0
  45. package/dist/core/model-router/executor-route.d.ts.map +1 -0
  46. package/dist/core/model-router/executor-route.js +33 -0
  47. package/dist/core/model-router/executor-route.js.map +1 -0
  48. package/dist/core/model-router/tool-escalation.d.ts +2 -0
  49. package/dist/core/model-router/tool-escalation.d.ts.map +1 -1
  50. package/dist/core/model-router/tool-escalation.js +6 -0
  51. package/dist/core/model-router/tool-escalation.js.map +1 -1
  52. package/dist/core/research/research-runner.d.ts +8 -1
  53. package/dist/core/research/research-runner.d.ts.map +1 -1
  54. package/dist/core/research/research-runner.js +13 -1
  55. package/dist/core/research/research-runner.js.map +1 -1
  56. package/dist/core/research/workspace-collector.d.ts +25 -0
  57. package/dist/core/research/workspace-collector.d.ts.map +1 -0
  58. package/dist/core/research/workspace-collector.js +286 -0
  59. package/dist/core/research/workspace-collector.js.map +1 -0
  60. package/dist/core/settings-manager.d.ts +5 -0
  61. package/dist/core/settings-manager.d.ts.map +1 -1
  62. package/dist/core/settings-manager.js +8 -0
  63. package/dist/core/settings-manager.js.map +1 -1
  64. package/dist/modes/interactive/components/fitness-role-selector.d.ts +1 -1
  65. package/dist/modes/interactive/components/fitness-role-selector.d.ts.map +1 -1
  66. package/dist/modes/interactive/components/fitness-role-selector.js +5 -0
  67. package/dist/modes/interactive/components/fitness-role-selector.js.map +1 -1
  68. package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
  69. package/dist/modes/interactive/components/settings-selector.js +20 -0
  70. package/dist/modes/interactive/components/settings-selector.js.map +1 -1
  71. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  72. package/dist/modes/interactive/interactive-mode.js +9 -0
  73. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  74. package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
  75. package/examples/extensions/custom-provider-anthropic/package.json +1 -1
  76. package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
  77. package/examples/extensions/sandbox/package-lock.json +2 -2
  78. package/examples/extensions/sandbox/package.json +1 -1
  79. package/examples/extensions/with-deps/package-lock.json +2 -2
  80. package/examples/extensions/with-deps/package.json +1 -1
  81. package/npm-shrinkwrap.json +12 -12
  82. package/package.json +4 -4
@@ -21,10 +21,12 @@ import { stripFrontmatter } from "../utils/frontmatter.js";
21
21
  import { resolvePath } from "../utils/paths.js";
22
22
  import { sleep } from "../utils/sleep.js";
23
23
  import { formatNoApiKeyFoundMessage, formatNoModelSelectedMessage } from "./auth-guidance.js";
24
+ import { buildForegroundEnvelope, formatForegroundEnvelopeObservation } from "./autonomy/foreground-envelope.js";
24
25
  import { evaluateToolGate } from "./autonomy/gates.js";
25
26
  import { LaneTracker } from "./autonomy/lane-tracker.js";
26
27
  import { appendLaneRecordSnapshot, getLaneRecordSnapshots } from "./autonomy/session-lane-record.js";
27
28
  import { composeSubagentSystemPrompt } from "./autonomy/subagent-prompt.js";
29
+ import { AUTONOMY_TELEMETRY_EVENT_TYPES, redactTelemetryValue, } from "./autonomy/telemetry-events.js";
28
30
  import { executeBashWithOperations } from "./bash-executor.js";
29
31
  import { calculateContextTokens, collectEntriesForBranchSummary, compact, estimateContextTokens, generateBranchSummary, prepareCompaction, shouldCompact, } from "./compaction/index.js";
30
32
  // (module-scope helper for curation goal extraction defined below the imports)
@@ -44,6 +46,7 @@ import { aggregateDailyUsageFromSessionFiles, aggregateDailyUsageFromSessionRoot
44
46
  import { downgradeReasoning, estimateTurnCostUsd, evaluateCostGuard } from "./cost-guard.js";
45
47
  import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
46
48
  import { appendWorkerResultSnapshot, getWorkerResultSnapshots } from "./delegation/session-worker-result.js";
49
+ import { applyWorkerActions } from "./delegation/worker-actions.js";
47
50
  import { runWorker } from "./delegation/worker-runner.js";
48
51
  import { exportSessionToHtml } from "./export-html/index.js";
49
52
  import { createToolHtmlRenderer } from "./export-html/tool-renderer.js";
@@ -57,6 +60,7 @@ import { buildGoalRuntimeSnapshot, } from "./goals/goal-runtime-snapshot.js";
57
60
  import { appendGoalStateSnapshot, getLatestGoalStateSnapshot } from "./goals/session-goal-state.js";
58
61
  import { appendLearningAuditSnapshot, getLearningAuditSnapshots, proposalFromReflectionWrite, rollbackPlanForReflectionWrite, } from "./learning/learning-audit.js";
59
62
  import { evaluateLearningDecision } from "./learning/learning-gate.js";
63
+ import { ObservationStore, observationKey } from "./learning/observation-store.js";
60
64
  import { decideDemand, ReflectionEngine, } from "./learning/reflection-engine.js";
61
65
  import { appendLearningDecisionSnapshot, getLearningDecisionSnapshots } from "./learning/session-learning-decision.js";
62
66
  import { isPromotedFrontmatter, SkillCurator } from "./learning/skill-curator.js";
@@ -66,9 +70,10 @@ import { FileStoreProvider } from "./memory/providers/file-store.js";
66
70
  import { TranscriptRecallProvider } from "./memory/providers/transcript-recall.js";
67
71
  import { compactToolResultDetailsForRetention } from "./message-retention.js";
68
72
  import { createCustomMessage } from "./messages.js";
69
- import { deriveModelCapabilityProfile, filterToolNamesForCapability, } from "./model-capability.js";
73
+ import { deriveModelCapabilityProfile, filterToolNamesForCapability, scaleContinuationBudgetsForCapability, } from "./model-capability.js";
70
74
  import { resolveCliModel, resolveProfileModelSettings } from "./model-resolver.js";
71
75
  import { collectModelRouterConfigDiagnostics } from "./model-router/config-diagnostics.js";
76
+ import { classifyExecutorTurn } from "./model-router/executor-route.js";
72
77
  import { classifyModelRouterRoute } from "./model-router/intent-classifier.js";
73
78
  import { ROUTE_JUDGE_MAX_OUTPUT_TOKENS, runRouteJudge } from "./model-router/route-judge.js";
74
79
  import { bufferModelRouterSessionCustomMessage, bufferModelRouterSessionMessage, createModelRouterSessionBuffer, flushModelRouterSessionBuffer, } from "./model-router/session-buffer.js";
@@ -79,6 +84,7 @@ import { expandPromptTemplate } from "./prompt-templates.js";
79
84
  import { runModelFitnessProbe } from "./research/model-fitness.js";
80
85
  import { runResearch } from "./research/research-runner.js";
81
86
  import { appendEvidenceBundleSnapshot, getEvidenceBundleSnapshots, getLatestEvidenceBundleSnapshot, } from "./research/session-evidence-bundle.js";
87
+ import { collectWorkspaceSources } from "./research/workspace-collector.js";
82
88
  import { stripResourceProfileBlocks } from "./resource-profile-blocks.js";
83
89
  import { classifyToolTrust, UNTRUSTED_BOUNDARY_SYSTEM_RULE, wrapUntrustedText } from "./security/untrusted-boundary.js";
84
90
  import { CURRENT_SESSION_VERSION, getLatestCompactionEntry } from "./session-manager.js";
@@ -142,6 +148,11 @@ function formatModelRouterModel(model) {
142
148
  function persistModelRouterDecision(sessionManager, decision) {
143
149
  sessionManager.appendCustomEntry(MODEL_ROUTER_DECISION_CUSTOM_TYPE, decision);
144
150
  }
151
+ /** Custom-entry type for G3 autonomy telemetry. Distinct from the router/lane record types so a
152
+ * telemetry consumer can filter on it without decoding operational snapshots. */
153
+ const AUTONOMY_TELEMETRY_CUSTOM_TYPE = "autonomy-telemetry";
154
+ /** G8: bound on the in-memory gate-outcome history. Oldest entries evict once the cap is reached. */
155
+ const GATE_OUTCOME_HISTORY_LIMIT = 50;
145
156
  /** Read a packed grep/find tool result's `details.artifactId`, if present, without `any`. */
146
157
  function extractArtifactId(message) {
147
158
  if (!message || message.role !== "toolResult")
@@ -211,8 +222,6 @@ export class AgentSession {
211
222
  _laneTracker = new LaneTracker();
212
223
  /** Session-lifetime abort for in-flight research passes (same pattern as _reflectionAbort). */
213
224
  _researchLaneAbort = new AbortController();
214
- /** Single-flight guard: at most one delegated worker runs at a time per session. */
215
- _isWorkerDelegationRunning = false;
216
225
  /** Session-lifetime abort for in-flight delegated workers. */
217
226
  _workerDelegationAbort = new AbortController();
218
227
  /**
@@ -251,11 +260,14 @@ export class AgentSession {
251
260
  // Extension system
252
261
  _extensionRunner;
253
262
  _turnIndex = 0;
263
+ /** G7: per-turn foreground CapabilityEnvelope auto-built for visibility (observe-only; not enforced). */
264
+ _currentForegroundEnvelope;
254
265
  _resourceLoader;
255
266
  _customTools;
256
267
  _baseToolDefinitions = new Map();
257
268
  _cwd;
258
269
  _agentDir;
270
+ _collectWorkspaceSources;
259
271
  _extensionRunnerRef;
260
272
  _initialActiveToolNames;
261
273
  _allowedToolNames;
@@ -284,6 +296,8 @@ export class AgentSession {
284
296
  _isModelRouterRetry = false;
285
297
  _lastModelRouterDecision;
286
298
  _lastAutonomyGateOutcome;
299
+ /** G8: bounded (cap {@link GATE_OUTCOME_HISTORY_LIMIT}) history of gate outcomes; tail is latest. */
300
+ _gateOutcomeHistory = [];
287
301
  _lastModelRouterSkipReason;
288
302
  _lastModelRouterIntent;
289
303
  /** Lazily-built skill curator (#32) over `<agentDir>/skills`. */
@@ -323,6 +337,7 @@ export class AgentSession {
323
337
  this._customTools = config.customTools ?? [];
324
338
  this._cwd = config.cwd;
325
339
  this._agentDir = config.agentDir ?? getAgentDir();
340
+ this._collectWorkspaceSources = config.collectWorkspaceSources ?? collectWorkspaceSources;
326
341
  this._modelRegistry = config.modelRegistry;
327
342
  this._extensionRunnerRef = config.extensionRunnerRef;
328
343
  this._initialActiveToolNames = config.initialActiveToolNames;
@@ -1005,6 +1020,9 @@ export class AgentSession {
1005
1020
  ...this._resourceLoader.getAgentsDiagnostics().map((diagnostic) => diagnostic.message),
1006
1021
  ...this._inertExtensionWarnings,
1007
1022
  ...this._unboundToolGrantWarnings,
1023
+ // G7: auto-built per-turn foreground envelope (observe-only; not enforced). Falls back to a
1024
+ // live preview when no turn has run yet so /context always shows the current scope.
1025
+ formatForegroundEnvelopeObservation(this._currentForegroundEnvelope ?? this._buildForegroundEnvelopeFromState()),
1008
1026
  // G14 (ratified): a user disable always beats a profile grant — surface the conflict.
1009
1027
  ...["tools", "skills", "prompts", "extensions"].flatMap((kind) => this.settingsManager
1010
1028
  .getProfileGrantsOverriddenByUserDisable(kind)
@@ -1213,7 +1231,13 @@ export class AgentSession {
1213
1231
  writePayloads,
1214
1232
  curation: curationSettings.enabled
1215
1233
  ? {
1216
- resolveDigest: (digestKey) => this._brainCurator.getDigest(digestKey),
1234
+ resolveDigest: (digestKey) => {
1235
+ const digest = this._brainCurator.getDigest(digestKey);
1236
+ // Count serves on the REAL per-turn pass only, never the report path.
1237
+ if (digest !== undefined && writePayloads)
1238
+ this._brainCurator.noteDigestServed();
1239
+ return digest;
1240
+ },
1217
1241
  // Only the real per-turn pass enqueues work; the read-only report path
1218
1242
  // (writePayloads=false) stays side-effect free.
1219
1243
  onPacked: writePayloads
@@ -1315,7 +1339,12 @@ export class AgentSession {
1315
1339
  _installAgentToolHooks() {
1316
1340
  this.agent.beforeToolCall = async ({ toolCall, args }) => {
1317
1341
  if (this._activeModelRouterRoute &&
1318
- shouldEscalateModelRouterTool({ tier: this._activeModelRouterRoute.tier, toolName: toolCall.name, args })) {
1342
+ shouldEscalateModelRouterTool({
1343
+ tier: this._activeModelRouterRoute.tier,
1344
+ toolName: toolCall.name,
1345
+ args,
1346
+ reasonCode: this._activeModelRouterRoute.reasonCode,
1347
+ })) {
1319
1348
  this._modelRouterEscalationRequested = true;
1320
1349
  this.agent.abort();
1321
1350
  return {
@@ -1331,7 +1360,7 @@ export class AgentSession {
1331
1360
  envelope: this.capabilityEnvelope,
1332
1361
  });
1333
1362
  if (this.capabilityEnvelope) {
1334
- this._lastAutonomyGateOutcome = gateResult;
1363
+ this._recordGateOutcome(gateResult);
1335
1364
  }
1336
1365
  if (gateResult.outcome === "block" || gateResult.outcome === "ask-user") {
1337
1366
  return {
@@ -1548,6 +1577,7 @@ export class AgentSession {
1548
1577
  await this._extensionRunner.emit({ type: "agent_end", messages: event.messages });
1549
1578
  }
1550
1579
  else if (event.type === "turn_start") {
1580
+ this._refreshForegroundEnvelope();
1551
1581
  const extensionEvent = {
1552
1582
  type: "turn_start",
1553
1583
  turnIndex: this._turnIndex,
@@ -1769,6 +1799,31 @@ export class AgentSession {
1769
1799
  getActiveToolNames() {
1770
1800
  return this.agent.state.tools.map((t) => t.name);
1771
1801
  }
1802
+ /** G7: build a foreground {@link CapabilityEnvelope} from the live session state (active tools, cwd, cost ceiling). */
1803
+ _buildForegroundEnvelopeFromState() {
1804
+ return buildForegroundEnvelope({
1805
+ turnIndex: this._turnIndex,
1806
+ activeToolNames: this.getActiveToolNames(),
1807
+ cwd: this._cwd,
1808
+ maxTurnUsd: this.settingsManager.getCostGuardSettings().maxTurnUsd,
1809
+ });
1810
+ }
1811
+ /**
1812
+ * G7: (re)build the foreground envelope for the current turn. Visibility only -- the foreground
1813
+ * envelope is NOT enforced this round. Best-effort: never throws into the turn.
1814
+ */
1815
+ _refreshForegroundEnvelope() {
1816
+ try {
1817
+ this._currentForegroundEnvelope = this._buildForegroundEnvelopeFromState();
1818
+ }
1819
+ catch {
1820
+ // Visibility only: a failure to build the envelope must never disturb the turn.
1821
+ }
1822
+ }
1823
+ /** G7: the auto-constructed foreground envelope for the current/most-recent turn (visibility only). */
1824
+ getForegroundEnvelope() {
1825
+ return this._currentForegroundEnvelope;
1826
+ }
1772
1827
  /**
1773
1828
  * Get all configured tools with name, description, parameter schema, prompt guidelines, and source metadata.
1774
1829
  */
@@ -2037,12 +2092,107 @@ export class AgentSession {
2037
2092
  return false;
2038
2093
  return this._modelRegistry.hasConfiguredAuth(resolved.model);
2039
2094
  }
2095
+ _resolveExecutorRoute(prompt, executorPattern) {
2096
+ if (!executorPattern)
2097
+ return undefined;
2098
+ try {
2099
+ const verdict = classifyExecutorTurn(prompt, this.settingsManager.getToolkitScripts());
2100
+ if (!verdict.execute)
2101
+ return undefined;
2102
+ const resolved = resolveCliModel({ cliModel: executorPattern, modelRegistry: this._modelRegistry });
2103
+ if (!resolved.model || !this._modelRegistry.hasConfiguredAuth(resolved.model))
2104
+ return undefined;
2105
+ // Fitness gate: the executor must have PROVEN tool-calling on this host (same
2106
+ // canonical-ref discipline as the curation gate).
2107
+ const canonicalRef = `${resolved.model.provider}/${resolved.model.id}`;
2108
+ const fitness = FitnessStore.forAgentDir(this._agentDir)
2109
+ .getForHost()
2110
+ .find((entry) => entry.model === canonicalRef);
2111
+ const toolCall = fitness?.report.toolCall;
2112
+ if (!toolCall || toolCall.succeeded < Math.ceil(toolCall.total * (2 / 3)))
2113
+ return undefined;
2114
+ this._lastModelRouterIntent = "research";
2115
+ return {
2116
+ decision: {
2117
+ tier: "cheap",
2118
+ risk: "scoped-write",
2119
+ confidence: 1,
2120
+ reasonCode: "executor_direct",
2121
+ reasons: [`Executor lane: Level-0 direct hit on toolkit script "${verdict.scriptName}"`],
2122
+ },
2123
+ model: resolved.model,
2124
+ };
2125
+ }
2126
+ catch {
2127
+ return undefined;
2128
+ }
2129
+ }
2130
+ /** True if a run_toolkit_script tool result since `fromIndex` actually EXECUTED (not error/ambiguous). */
2131
+ _executorTurnExecutedScript(fromIndex) {
2132
+ for (const message of this.agent.state.messages.slice(fromIndex)) {
2133
+ if (message.role !== "toolResult")
2134
+ continue;
2135
+ if (message.toolName !== "run_toolkit_script")
2136
+ continue;
2137
+ if (message.isError === true)
2138
+ continue;
2139
+ const outcome = message.details?.outcome;
2140
+ if (outcome === "executed")
2141
+ return true;
2142
+ }
2143
+ return false;
2144
+ }
2145
+ /** Ask the reflex brain to refine the last user request into an explicit toolkit instruction. */
2146
+ async _buildExecutorRefinedPrompt(messages) {
2147
+ try {
2148
+ const model = this._resolveCurationModelIfFit();
2149
+ if (!model)
2150
+ return undefined;
2151
+ const list = Array.isArray(messages) ? messages : [messages];
2152
+ const request = latestUserPromptText(list.filter((m) => true));
2153
+ if (!request)
2154
+ return undefined;
2155
+ const scripts = this.settingsManager.getToolkitScripts();
2156
+ const completion = await this.runIsolatedCompletion({
2157
+ systemPrompt: REFLEX_INTERPRETER_SYSTEM_PROMPT,
2158
+ messages: [
2159
+ {
2160
+ role: "user",
2161
+ content: [{ type: "text", text: buildReflexUserPrompt(request, scripts) }],
2162
+ timestamp: Date.now(),
2163
+ },
2164
+ ],
2165
+ model,
2166
+ thinkingLevel: "off",
2167
+ maxTokens: 256,
2168
+ cacheRetention: "short",
2169
+ });
2170
+ if (completion.usage.cost.total > 0 || completion.usage.totalTokens > 0) {
2171
+ this.addSpawnedUsage(completion.usage, { label: "executor-brain-warmup" });
2172
+ }
2173
+ const plan = parseReflexPlan(completion.text);
2174
+ if (!plan || plan.script === "none")
2175
+ return undefined;
2176
+ const argHint = plan.args.length > 0 ? ` with args ${JSON.stringify(plan.args)}` : "";
2177
+ return `Run the toolkit script "${plan.script}"${argHint} using run_toolkit_script, then report its result exactly.`;
2178
+ }
2179
+ catch {
2180
+ return undefined;
2181
+ }
2182
+ }
2040
2183
  _resolveModelRouterTurnRoute(prompt) {
2041
2184
  const settings = this.settingsManager.getModelRouterSettings();
2042
2185
  if (!settings.enabled) {
2043
2186
  this._lastModelRouterSkipReason = "disabled";
2044
2187
  return undefined;
2045
2188
  }
2189
+ // G16 executor lane: a Level-0 DIRECT toolkit hit on a command-shaped prompt routes the
2190
+ // whole turn to the configured local executor (tool-call-fitness-gated) instead of
2191
+ // spending the frontier model on a one-tool reflex. Ambiguity never routes here — it
2192
+ // stays with the big model and the reflex brain. Deterministic, so the judge is skipped.
2193
+ const executorRoute = this._resolveExecutorRoute(prompt, settings.executorModel);
2194
+ if (executorRoute)
2195
+ return executorRoute;
2046
2196
  const decision = classifyModelRouterRoute(prompt);
2047
2197
  this._lastModelRouterIntent = decision.tier === "cheap" ? "research" : "modify";
2048
2198
  // Learning tier must not be selected for normal user prompts
@@ -2128,6 +2278,9 @@ export class AgentSession {
2128
2278
  return undefined;
2129
2279
  if (options?.skipJudge)
2130
2280
  return baseline;
2281
+ // Deterministic executor routes need no judge (Level-0 already decided).
2282
+ if (baseline.decision.reasonCode === "executor_direct")
2283
+ return baseline;
2131
2284
  const settings = this.settingsManager.getModelRouterSettings();
2132
2285
  if (!settings.judgeEnabled)
2133
2286
  return baseline;
@@ -2263,6 +2416,36 @@ export class AgentSession {
2263
2416
  }
2264
2417
  try {
2265
2418
  await this._runAgentPrompt(messages);
2419
+ // Speculative muscle-retry (G16 refinement): an executor-routed turn is a bet that the
2420
+ // small model can run the toolkit command directly. If it ends WITHOUT a successful
2421
+ // run_toolkit_script execution, retry ONCE on the same executor with the brain's
2422
+ // refined instruction injected — the brain warms while the muscle tries, so the retry
2423
+ // pays only when the muscle actually missed.
2424
+ if (routeDecision?.reasonCode === "executor_direct" &&
2425
+ !this._isModelRouterRetry &&
2426
+ !this._executorTurnExecutedScript(originalHistoryLength)) {
2427
+ const refined = await this._buildExecutorRefinedPrompt(messages);
2428
+ if (refined) {
2429
+ this.agent.state.messages.splice(originalHistoryLength);
2430
+ await this._runAgentPrompt([
2431
+ { role: "user", content: [{ type: "text", text: refined }], timestamp: Date.now() },
2432
+ ]);
2433
+ completedDecision = {
2434
+ route: {
2435
+ ...routeDecision,
2436
+ reasonCode: "executor_speculative_retry",
2437
+ reasons: [
2438
+ ...routeDecision.reasons,
2439
+ "Executor missed on first try; retried with brain-refined instruction",
2440
+ ],
2441
+ },
2442
+ routedModel: formatModelRouterModel(routedModel),
2443
+ outcome: "routed",
2444
+ intent: "research",
2445
+ };
2446
+ this._lastModelRouterDecision = completedDecision;
2447
+ }
2448
+ }
2266
2449
  if (bufferRoutedTurn && this._modelRouterEscalationRequested) {
2267
2450
  this.agent.state.messages.splice(originalHistoryLength);
2268
2451
  retryModel = this._resolveModelRouterModelForIntent("modify") ?? previousModel;
@@ -2336,6 +2519,19 @@ export class AgentSession {
2336
2519
  }
2337
2520
  if (persistDecision && completedDecision) {
2338
2521
  persistModelRouterDecision(this.sessionManager, completedDecision);
2522
+ // G3: one route event per user-facing routed turn (the escalation retry runs with
2523
+ // persistDecision=false, so it does not double-emit). Codes/numbers only — no prompt text.
2524
+ this._emitAutonomyTelemetry({
2525
+ type: AUTONOMY_TELEMETRY_EVENT_TYPES.routeDecision,
2526
+ timestamp: new Date().toISOString(),
2527
+ payload: {
2528
+ tier: completedDecision.route.tier,
2529
+ risk: completedDecision.route.risk,
2530
+ reasonCode: completedDecision.route.reasonCode,
2531
+ confidence: completedDecision.route.confidence,
2532
+ outcome: completedDecision.outcome,
2533
+ },
2534
+ });
2339
2535
  }
2340
2536
  if (thrownError) {
2341
2537
  throw thrownError;
@@ -5007,8 +5203,58 @@ export class AgentSession {
5007
5203
  getLaneRecords() {
5008
5204
  return this._laneTracker.getRecords();
5009
5205
  }
5010
- saveWorkerResultSnapshot(result) {
5011
- return appendWorkerResultSnapshot(this.sessionManager, result);
5206
+ /**
5207
+ * G3: bounded autonomy-telemetry sink. Passes the whole event through {@link redactTelemetryValue}
5208
+ * (the taxonomy's redaction contract) before storing it, so a secret that leaked into a payload
5209
+ * field never lands in the session log. Observe-only: a failure here can never surface into the
5210
+ * turn it is measuring, so the whole body is swallowed. Payloads MUST stay small (ids, codes,
5211
+ * numbers) — never prompt/summary text; callers own that discipline.
5212
+ */
5213
+ _emitAutonomyTelemetry(event) {
5214
+ try {
5215
+ const redacted = redactTelemetryValue(event);
5216
+ this.sessionManager.appendCustomEntry(AUTONOMY_TELEMETRY_CUSTOM_TYPE, { version: 1, ...redacted });
5217
+ }
5218
+ catch {
5219
+ // Telemetry is best-effort: swallow so a sink failure cannot break the observed turn.
5220
+ }
5221
+ }
5222
+ /**
5223
+ * G8: single sink for a gate outcome. Keeps the latest-outcome getter behavior identical (the
5224
+ * full {@link GateOutcome} still lands in `_lastAutonomyGateOutcome`), and additionally appends a
5225
+ * bounded codes-only entry to {@link _gateOutcomeHistory} (oldest evicted at
5226
+ * {@link GATE_OUTCOME_HISTORY_LIMIT}) and emits the `gate_outcome` telemetry event. The history
5227
+ * tail therefore always mirrors the latest outcome. Only called with an active envelope.
5228
+ */
5229
+ _recordGateOutcome(outcome) {
5230
+ this._lastAutonomyGateOutcome = outcome;
5231
+ const at = new Date().toISOString();
5232
+ this._gateOutcomeHistory.push({
5233
+ outcome: outcome.outcome,
5234
+ gate: outcome.gate,
5235
+ reasonCode: outcome.reasonCode,
5236
+ at,
5237
+ });
5238
+ while (this._gateOutcomeHistory.length > GATE_OUTCOME_HISTORY_LIMIT) {
5239
+ this._gateOutcomeHistory.shift();
5240
+ }
5241
+ // G8: gate outcome event. Codes/ids only — never the gate's human-facing message.
5242
+ this._emitAutonomyTelemetry({
5243
+ type: AUTONOMY_TELEMETRY_EVENT_TYPES.gateOutcome,
5244
+ timestamp: at,
5245
+ payload: {
5246
+ outcome: outcome.outcome,
5247
+ gate: outcome.gate,
5248
+ reasonCode: outcome.reasonCode,
5249
+ },
5250
+ });
5251
+ }
5252
+ /** G8: copies of the bounded gate-outcome history, oldest first, latest last. */
5253
+ getGateOutcomeHistory() {
5254
+ return this._gateOutcomeHistory.map((entry) => ({ ...entry }));
5255
+ }
5256
+ saveWorkerResultSnapshot(result, request) {
5257
+ return appendWorkerResultSnapshot(this.sessionManager, result, request);
5012
5258
  }
5013
5259
  getWorkerResultSnapshots() {
5014
5260
  return getWorkerResultSnapshots(this.sessionManager.getEntries());
@@ -5063,12 +5309,17 @@ export class AgentSession {
5063
5309
  const snapshot = this.getGoalRuntimeSnapshot({ maxStallTurns });
5064
5310
  if (snapshot.continuation.action !== "continue")
5065
5311
  return;
5312
+ // Lean-window models (16-32k) keep autosteer but at a reduced budget; full passes through.
5313
+ const scaled = scaleContinuationBudgetsForCapability(this.getModelCapabilityProfile(), {
5314
+ maxTurns: goalContinueTurns,
5315
+ maxWallClockMinutes: goalContinueMaxWallClockMinutes,
5316
+ });
5066
5317
  this._isGoalAutoContinuing = true;
5067
5318
  try {
5068
5319
  await this.continueGoalLoop({
5069
- maxTurns: goalContinueTurns,
5320
+ maxTurns: scaled.maxTurns,
5070
5321
  maxStallTurns,
5071
- maxWallClockMinutes: goalContinueMaxWallClockMinutes,
5322
+ maxWallClockMinutes: scaled.maxWallClockMinutes,
5072
5323
  });
5073
5324
  }
5074
5325
  catch (error) {
@@ -5285,9 +5536,17 @@ export class AgentSession {
5285
5536
  const startedRecord = this._laneTracker.start({ type: "research", goalId: demand.goalId });
5286
5537
  try {
5287
5538
  let spentUsage;
5539
+ // Best-effort, pointer-first workspace evidence. Derives search terms from the goal/requirement
5540
+ // text (not the identity-key query) and is bounded + silent-on-failure: [] == today's behavior.
5541
+ const workspaceSources = await this._collectWorkspaceSources({
5542
+ query: `${demand.context}\n${demand.query}`,
5543
+ cwd: this._cwd,
5544
+ maxSources: settings.maxSources,
5545
+ });
5288
5546
  const result = await runResearch({
5289
5547
  query: demand.query,
5290
5548
  context: demand.context,
5549
+ sources: workspaceSources,
5291
5550
  envelope: this._buildResearchLaneEnvelope(settings.maxUsd, laneProfile),
5292
5551
  maxUsd: settings.maxUsd,
5293
5552
  maxSources: settings.maxSources,
@@ -5346,6 +5605,20 @@ export class AgentSession {
5346
5605
  });
5347
5606
  if (record) {
5348
5607
  appendLaneRecordSnapshot(this.sessionManager, record);
5608
+ // G3: a research lane's product is an evidence bundle, so its terminal record maps to
5609
+ // the evidence_bundle event. Lane outcome only (status/reasonCode/cost) — no findings text.
5610
+ this._emitAutonomyTelemetry({
5611
+ type: AUTONOMY_TELEMETRY_EVENT_TYPES.evidenceBundle,
5612
+ timestamp: new Date().toISOString(),
5613
+ payload: {
5614
+ laneId: record.laneId,
5615
+ laneType: record.type,
5616
+ status: record.status,
5617
+ reasonCode: record.reasonCode ?? null,
5618
+ costUsd: record.costUsd ?? null,
5619
+ hasEvidence: record.evidenceEntryId !== undefined,
5620
+ },
5621
+ });
5349
5622
  }
5350
5623
  return { started: true, record, result };
5351
5624
  }
@@ -5356,6 +5629,18 @@ export class AgentSession {
5356
5629
  });
5357
5630
  if (record && !this._disposed) {
5358
5631
  appendLaneRecordSnapshot(this.sessionManager, record);
5632
+ this._emitAutonomyTelemetry({
5633
+ type: AUTONOMY_TELEMETRY_EVENT_TYPES.evidenceBundle,
5634
+ timestamp: new Date().toISOString(),
5635
+ payload: {
5636
+ laneId: record.laneId,
5637
+ laneType: record.type,
5638
+ status: record.status,
5639
+ reasonCode: record.reasonCode ?? null,
5640
+ costUsd: record.costUsd ?? null,
5641
+ hasEvidence: record.evidenceEntryId !== undefined,
5642
+ },
5643
+ });
5359
5644
  }
5360
5645
  const message = error instanceof Error ? error.message : String(error);
5361
5646
  this._emit({ type: "warning", message: `Research lane failed: ${message}` });
@@ -5372,7 +5657,8 @@ export class AgentSession {
5372
5657
  * usage (idempotent per-lane reportId). Consumed by the `delegate` tool.
5373
5658
  */
5374
5659
  async runWorkerDelegationOnce(request) {
5375
- if (this._isWorkerDelegationRunning) {
5660
+ const delegationSettings = this.settingsManager.getWorkerDelegationSettings();
5661
+ if (this._laneTracker.getActiveCount("worker") >= delegationSettings.maxConcurrent) {
5376
5662
  return { started: false, skipReason: "worker_delegation_already_running" };
5377
5663
  }
5378
5664
  if (this._disposed) {
@@ -5382,7 +5668,7 @@ export class AgentSession {
5382
5668
  if (instructions.length === 0) {
5383
5669
  return { started: false, skipReason: "missing_instructions" };
5384
5670
  }
5385
- const settings = this.settingsManager.getWorkerDelegationSettings();
5671
+ const settings = delegationSettings;
5386
5672
  if (!settings.enabled) {
5387
5673
  return { started: false, skipReason: "worker_delegation_disabled" };
5388
5674
  }
@@ -5391,7 +5677,6 @@ export class AgentSession {
5391
5677
  return { started: false, skipReason: shipment.skipReason };
5392
5678
  }
5393
5679
  const { model, laneProfile } = shipment;
5394
- this._isWorkerDelegationRunning = true;
5395
5680
  this._laneTracker.ensureCounterAtLeast(getLaneRecordSnapshots(this.sessionManager.getEntries()).length + 1);
5396
5681
  const startedRecord = this._laneTracker.start({ type: "worker" });
5397
5682
  const maxUsd = Math.min(settings.maxUsd, this.capabilityEnvelope?.maxEstimatedUsd ?? Number.POSITIVE_INFINITY);
@@ -5408,7 +5693,12 @@ export class AgentSession {
5408
5693
  envelope: {
5409
5694
  id: `worker-${this.sessionId}-${startedRecord.laneId}`,
5410
5695
  profileId: laneProfile?.name,
5411
- capabilities: ["read_files"],
5696
+ // write_files requires BOTH the opt-in AND an explicit non-empty path scope —
5697
+ // an unscoped write grant is refused here, not discovered at validation time.
5698
+ capabilities: settings.writeEnabled && settings.writePaths.length > 0 ? ["read_files", "write_files"] : ["read_files"],
5699
+ ...(settings.writeEnabled && settings.writePaths.length > 0
5700
+ ? { allowedPaths: [...settings.writePaths] }
5701
+ : {}),
5412
5702
  ...this._laneProfileToolGrants(laneProfile),
5413
5703
  maxEstimatedUsd: maxUsd,
5414
5704
  createdAt: new Date().toISOString(),
@@ -5416,6 +5706,17 @@ export class AgentSession {
5416
5706
  maxEstimatedUsd: maxUsd,
5417
5707
  createdAt: new Date().toISOString(),
5418
5708
  };
5709
+ // G8: worker delegation START. Routing/scope codes + budget only — never the instructions text.
5710
+ this._emitAutonomyTelemetry({
5711
+ type: AUTONOMY_TELEMETRY_EVENT_TYPES.workerRequest,
5712
+ timestamp: new Date().toISOString(),
5713
+ payload: {
5714
+ id: workerRequest.id,
5715
+ tier: workerRequest.route.tier,
5716
+ capabilities: [...workerRequest.envelope.capabilities],
5717
+ maxEstimatedUsd: workerRequest.maxEstimatedUsd ?? null,
5718
+ },
5719
+ });
5419
5720
  const usageReportId = `worker:${this.sessionId}:${startedRecord.laneId}`;
5420
5721
  try {
5421
5722
  let spentUsage;
@@ -5425,6 +5726,10 @@ export class AgentSession {
5425
5726
  maxWallClockMs: settings.maxWallClockMs,
5426
5727
  usageReportId,
5427
5728
  signal: this._workerDelegationAbort.signal,
5729
+ // Write lane (G2): runner-side action application through the envelope path scope.
5730
+ applyActions: workerRequest.envelope.capabilities.includes("write_files")
5731
+ ? (actions) => applyWorkerActions({ actions, envelope: workerRequest.envelope, cwd: this._cwd })
5732
+ : undefined,
5428
5733
  complete: async ({ systemPrompt, userPrompt, signal }) => {
5429
5734
  const completion = await this.runIsolatedCompletion({
5430
5735
  // Level-0 core always survives. A model-provided prompt (delegate tool) is the most
@@ -5458,7 +5763,7 @@ export class AgentSession {
5458
5763
  });
5459
5764
  return { started: true, record, outcome };
5460
5765
  }
5461
- this.saveWorkerResultSnapshot(outcome.result);
5766
+ this.saveWorkerResultSnapshot(outcome.result, workerRequest);
5462
5767
  if (spentUsage && (spentUsage.cost.total > 0 || spentUsage.totalTokens > 0)) {
5463
5768
  this.addSpawnedUsage(spentUsage, { label: "worker-delegation", reportId: usageReportId });
5464
5769
  }
@@ -5469,6 +5774,19 @@ export class AgentSession {
5469
5774
  });
5470
5775
  if (record) {
5471
5776
  appendLaneRecordSnapshot(this.sessionManager, record);
5777
+ // G3: worker lane terminal record -> worker_result event. Lane outcome only
5778
+ // (status/reasonCode/cost) — never the worker's summary/changed-file text.
5779
+ this._emitAutonomyTelemetry({
5780
+ type: AUTONOMY_TELEMETRY_EVENT_TYPES.workerResult,
5781
+ timestamp: new Date().toISOString(),
5782
+ payload: {
5783
+ laneId: record.laneId,
5784
+ laneType: record.type,
5785
+ status: record.status,
5786
+ reasonCode: record.reasonCode ?? null,
5787
+ costUsd: record.costUsd ?? null,
5788
+ },
5789
+ });
5472
5790
  }
5473
5791
  return { started: true, record, outcome };
5474
5792
  }
@@ -5479,13 +5797,23 @@ export class AgentSession {
5479
5797
  });
5480
5798
  if (record && !this._disposed) {
5481
5799
  appendLaneRecordSnapshot(this.sessionManager, record);
5800
+ this._emitAutonomyTelemetry({
5801
+ type: AUTONOMY_TELEMETRY_EVENT_TYPES.workerResult,
5802
+ timestamp: new Date().toISOString(),
5803
+ payload: {
5804
+ laneId: record.laneId,
5805
+ laneType: record.type,
5806
+ status: record.status,
5807
+ reasonCode: record.reasonCode ?? null,
5808
+ costUsd: record.costUsd ?? null,
5809
+ },
5810
+ });
5482
5811
  }
5483
5812
  const message = error instanceof Error ? error.message : String(error);
5484
5813
  this._emit({ type: "warning", message: `Worker delegation failed: ${message}` });
5485
5814
  return { started: true, record };
5486
5815
  }
5487
5816
  finally {
5488
- this._isWorkerDelegationRunning = false;
5489
5817
  }
5490
5818
  }
5491
5819
  /**
@@ -5750,17 +6078,32 @@ export class AgentSession {
5750
6078
  // every pass, so advancing it for a no-op (which stores nothing) would make later passes
5751
6079
  // reuse ids — and rollback keys on the id, so a collision blocks or misdirects rollback.
5752
6080
  let auditSequence = getLearningAuditSnapshots(this.sessionManager.getEntries()).length;
6081
+ // G6 evidence strength: durable proposals accumulate observation counts across passes/sessions
6082
+ // so the gate can distinguish a one-off cue from a repeatedly-confirmed lesson. Built once per
6083
+ // pass; every increment is best-effort (store IO must never break reflection).
6084
+ const observationStore = ObservationStore.forAgentDir(this._agentDir);
5753
6085
  let writeIndex = 0;
5754
6086
  for (const write of result.writes) {
5755
6087
  writeIndex += 1;
5756
6088
  const proposalId = `${input.reportId ?? "reflection"}-w${writeIndex}`;
5757
6089
  const proposal = proposalFromReflectionWrite(write, proposalId);
5758
6090
  const rollback = rollbackPlanForReflectionWrite(write);
6091
+ let observations = 1;
6092
+ if (policy.enabled) {
6093
+ try {
6094
+ observations = observationStore.increment(observationKey(proposal.layer, proposal.summary));
6095
+ }
6096
+ catch {
6097
+ // A store read/write failure falls back to a fresh count of 1, which keeps the gate
6098
+ // proposal-first (never spuriously auto-applies) rather than crashing the pass.
6099
+ observations = 1;
6100
+ }
6101
+ }
5759
6102
  const decision = policy.enabled
5760
6103
  ? evaluateLearningDecision({
5761
6104
  proposal,
5762
6105
  confidence: policy.reflectionSourceConfidence,
5763
- observations: 1,
6106
+ observations,
5764
6107
  contradictions: 0,
5765
6108
  settings: {
5766
6109
  enabled: true,
@@ -5779,6 +6122,31 @@ export class AgentSession {
5779
6122
  requiresApproval: false,
5780
6123
  };
5781
6124
  this.saveLearningDecisionSnapshot(decision);
6125
+ // G3: learning-gate outcome. Codes/numbers only — never the proposal summary/memory text.
6126
+ this._emitAutonomyTelemetry({
6127
+ type: AUTONOMY_TELEMETRY_EVENT_TYPES.learningDecision,
6128
+ timestamp: new Date().toISOString(),
6129
+ payload: {
6130
+ kind: decision.kind,
6131
+ reasonCode: decision.reasonCode,
6132
+ layer: proposal.layer,
6133
+ confidence: decision.confidence,
6134
+ requiresApproval: decision.requiresApproval,
6135
+ },
6136
+ });
6137
+ // G8: a proposal that needs human sign-off is an approval REQUEST. Codes/layer only —
6138
+ // never the proposal summary/memory text (those live only in the audit snapshot).
6139
+ if (decision.requiresApproval) {
6140
+ this._emitAutonomyTelemetry({
6141
+ type: AUTONOMY_TELEMETRY_EVENT_TYPES.approvalRequest,
6142
+ timestamp: new Date().toISOString(),
6143
+ payload: {
6144
+ kind: decision.kind,
6145
+ reasonCode: decision.reasonCode,
6146
+ layer: proposal.layer,
6147
+ },
6148
+ });
6149
+ }
5782
6150
  if (decision.kind === "apply") {
5783
6151
  await this._applyReflectionWrite(write, signal);
5784
6152
  }