@agent-native/core 0.52.0 → 0.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. package/README.md +41 -95
  2. package/blueprints/action/crud.md +98 -0
  3. package/blueprints/channel/discord.md +74 -0
  4. package/blueprints/provider/stripe.md +87 -0
  5. package/blueprints/sandbox/docker.md +78 -0
  6. package/dist/action.d.ts +64 -1
  7. package/dist/action.d.ts.map +1 -1
  8. package/dist/action.js +73 -2
  9. package/dist/action.js.map +1 -1
  10. package/dist/agent/index.d.ts +1 -0
  11. package/dist/agent/index.d.ts.map +1 -1
  12. package/dist/agent/index.js +1 -0
  13. package/dist/agent/index.js.map +1 -1
  14. package/dist/agent/observational-memory/compactor.d.ts +43 -0
  15. package/dist/agent/observational-memory/compactor.d.ts.map +1 -0
  16. package/dist/agent/observational-memory/compactor.js +50 -0
  17. package/dist/agent/observational-memory/compactor.js.map +1 -0
  18. package/dist/agent/observational-memory/config.d.ts +37 -0
  19. package/dist/agent/observational-memory/config.d.ts.map +1 -0
  20. package/dist/agent/observational-memory/config.js +48 -0
  21. package/dist/agent/observational-memory/config.js.map +1 -0
  22. package/dist/agent/observational-memory/index.d.ts +26 -0
  23. package/dist/agent/observational-memory/index.d.ts.map +1 -0
  24. package/dist/agent/observational-memory/index.js +25 -0
  25. package/dist/agent/observational-memory/index.js.map +1 -0
  26. package/dist/agent/observational-memory/internal-run.d.ts +37 -0
  27. package/dist/agent/observational-memory/internal-run.d.ts.map +1 -0
  28. package/dist/agent/observational-memory/internal-run.js +59 -0
  29. package/dist/agent/observational-memory/internal-run.js.map +1 -0
  30. package/dist/agent/observational-memory/message-text.d.ts +13 -0
  31. package/dist/agent/observational-memory/message-text.d.ts.map +1 -0
  32. package/dist/agent/observational-memory/message-text.js +46 -0
  33. package/dist/agent/observational-memory/message-text.js.map +1 -0
  34. package/dist/agent/observational-memory/migrations.d.ts +13 -0
  35. package/dist/agent/observational-memory/migrations.d.ts.map +1 -0
  36. package/dist/agent/observational-memory/migrations.js +43 -0
  37. package/dist/agent/observational-memory/migrations.js.map +1 -0
  38. package/dist/agent/observational-memory/observer.d.ts +37 -0
  39. package/dist/agent/observational-memory/observer.d.ts.map +1 -0
  40. package/dist/agent/observational-memory/observer.js +82 -0
  41. package/dist/agent/observational-memory/observer.js.map +1 -0
  42. package/dist/agent/observational-memory/plugin.d.ts +16 -0
  43. package/dist/agent/observational-memory/plugin.d.ts.map +1 -0
  44. package/dist/agent/observational-memory/plugin.js +26 -0
  45. package/dist/agent/observational-memory/plugin.js.map +1 -0
  46. package/dist/agent/observational-memory/prompts.d.ts +27 -0
  47. package/dist/agent/observational-memory/prompts.d.ts.map +1 -0
  48. package/dist/agent/observational-memory/prompts.js +42 -0
  49. package/dist/agent/observational-memory/prompts.js.map +1 -0
  50. package/dist/agent/observational-memory/read.d.ts +45 -0
  51. package/dist/agent/observational-memory/read.d.ts.map +1 -0
  52. package/dist/agent/observational-memory/read.js +97 -0
  53. package/dist/agent/observational-memory/read.js.map +1 -0
  54. package/dist/agent/observational-memory/reflector.d.ts +31 -0
  55. package/dist/agent/observational-memory/reflector.d.ts.map +1 -0
  56. package/dist/agent/observational-memory/reflector.js +76 -0
  57. package/dist/agent/observational-memory/reflector.js.map +1 -0
  58. package/dist/agent/observational-memory/schema.d.ts +267 -0
  59. package/dist/agent/observational-memory/schema.d.ts.map +1 -0
  60. package/dist/agent/observational-memory/schema.js +48 -0
  61. package/dist/agent/observational-memory/schema.js.map +1 -0
  62. package/dist/agent/observational-memory/store.d.ts +52 -0
  63. package/dist/agent/observational-memory/store.d.ts.map +1 -0
  64. package/dist/agent/observational-memory/store.js +197 -0
  65. package/dist/agent/observational-memory/store.js.map +1 -0
  66. package/dist/agent/observational-memory/types.d.ts +61 -0
  67. package/dist/agent/observational-memory/types.d.ts.map +1 -0
  68. package/dist/agent/observational-memory/types.js +9 -0
  69. package/dist/agent/observational-memory/types.js.map +1 -0
  70. package/dist/agent/processors.d.ts +146 -0
  71. package/dist/agent/processors.d.ts.map +1 -0
  72. package/dist/agent/processors.js +122 -0
  73. package/dist/agent/processors.js.map +1 -0
  74. package/dist/agent/production-agent.d.ts +25 -0
  75. package/dist/agent/production-agent.d.ts.map +1 -1
  76. package/dist/agent/production-agent.js +341 -1
  77. package/dist/agent/production-agent.js.map +1 -1
  78. package/dist/agent/run-loop-with-resume.d.ts.map +1 -1
  79. package/dist/agent/run-loop-with-resume.js +48 -0
  80. package/dist/agent/run-loop-with-resume.js.map +1 -1
  81. package/dist/agent/run-store.d.ts +17 -0
  82. package/dist/agent/run-store.d.ts.map +1 -1
  83. package/dist/agent/run-store.js +55 -0
  84. package/dist/agent/run-store.js.map +1 -1
  85. package/dist/agent/runtime-context.d.ts +30 -0
  86. package/dist/agent/runtime-context.d.ts.map +1 -1
  87. package/dist/agent/runtime-context.js +54 -1
  88. package/dist/agent/runtime-context.js.map +1 -1
  89. package/dist/agent/tool-call-journal.d.ts +99 -0
  90. package/dist/agent/tool-call-journal.d.ts.map +1 -0
  91. package/dist/agent/tool-call-journal.js +212 -0
  92. package/dist/agent/tool-call-journal.js.map +1 -0
  93. package/dist/agent/types.d.ts +35 -0
  94. package/dist/agent/types.d.ts.map +1 -1
  95. package/dist/agent/types.js.map +1 -1
  96. package/dist/cli/add.d.ts +109 -0
  97. package/dist/cli/add.d.ts.map +1 -0
  98. package/dist/cli/add.js +352 -0
  99. package/dist/cli/add.js.map +1 -0
  100. package/dist/cli/connect.d.ts +2 -2
  101. package/dist/cli/connect.d.ts.map +1 -1
  102. package/dist/cli/connect.js +92 -24
  103. package/dist/cli/connect.js.map +1 -1
  104. package/dist/cli/eval.d.ts +17 -0
  105. package/dist/cli/eval.d.ts.map +1 -0
  106. package/dist/cli/eval.js +121 -0
  107. package/dist/cli/eval.js.map +1 -0
  108. package/dist/cli/index.js +44 -3
  109. package/dist/cli/index.js.map +1 -1
  110. package/dist/cli/mcp.d.ts.map +1 -1
  111. package/dist/cli/mcp.js +11 -5
  112. package/dist/cli/mcp.js.map +1 -1
  113. package/dist/cli/plan-local.d.ts +66 -5
  114. package/dist/cli/plan-local.d.ts.map +1 -1
  115. package/dist/cli/plan-local.js +622 -21
  116. package/dist/cli/plan-local.js.map +1 -1
  117. package/dist/cli/skills.d.ts +2 -2
  118. package/dist/cli/skills.d.ts.map +1 -1
  119. package/dist/cli/skills.js +108 -62
  120. package/dist/cli/skills.js.map +1 -1
  121. package/dist/client/AssistantChat.d.ts.map +1 -1
  122. package/dist/client/AssistantChat.js +118 -92
  123. package/dist/client/AssistantChat.js.map +1 -1
  124. package/dist/client/agent-chat-adapter.d.ts.map +1 -1
  125. package/dist/client/agent-chat-adapter.js +16 -0
  126. package/dist/client/agent-chat-adapter.js.map +1 -1
  127. package/dist/client/chat/tool-call-display.d.ts +20 -1
  128. package/dist/client/chat/tool-call-display.d.ts.map +1 -1
  129. package/dist/client/chat/tool-call-display.js +32 -7
  130. package/dist/client/chat/tool-call-display.js.map +1 -1
  131. package/dist/client/sse-event-processor.d.ts +13 -0
  132. package/dist/client/sse-event-processor.d.ts.map +1 -1
  133. package/dist/client/sse-event-processor.js +21 -0
  134. package/dist/client/sse-event-processor.js.map +1 -1
  135. package/dist/coding-tools/run-code.d.ts.map +1 -1
  136. package/dist/coding-tools/run-code.js +18 -2
  137. package/dist/coding-tools/run-code.js.map +1 -1
  138. package/dist/db/client.d.ts +4 -2
  139. package/dist/db/client.d.ts.map +1 -1
  140. package/dist/db/client.js +6 -4
  141. package/dist/db/client.js.map +1 -1
  142. package/dist/deploy/route-discovery.d.ts.map +1 -1
  143. package/dist/deploy/route-discovery.js +1 -0
  144. package/dist/deploy/route-discovery.js.map +1 -1
  145. package/dist/eval/agent-runner.d.ts +63 -0
  146. package/dist/eval/agent-runner.d.ts.map +1 -0
  147. package/dist/eval/agent-runner.js +142 -0
  148. package/dist/eval/agent-runner.js.map +1 -0
  149. package/dist/eval/define-eval.d.ts +29 -0
  150. package/dist/eval/define-eval.d.ts.map +1 -0
  151. package/dist/eval/define-eval.js +43 -0
  152. package/dist/eval/define-eval.js.map +1 -0
  153. package/dist/eval/index.d.ts +18 -0
  154. package/dist/eval/index.d.ts.map +1 -0
  155. package/dist/eval/index.js +17 -0
  156. package/dist/eval/index.js.map +1 -0
  157. package/dist/eval/report.d.ts +8 -0
  158. package/dist/eval/report.d.ts.map +1 -0
  159. package/dist/eval/report.js +44 -0
  160. package/dist/eval/report.js.map +1 -0
  161. package/dist/eval/runner.d.ts +67 -0
  162. package/dist/eval/runner.d.ts.map +1 -0
  163. package/dist/eval/runner.js +256 -0
  164. package/dist/eval/runner.js.map +1 -0
  165. package/dist/eval/scorer.d.ts +83 -0
  166. package/dist/eval/scorer.d.ts.map +1 -0
  167. package/dist/eval/scorer.js +195 -0
  168. package/dist/eval/scorer.js.map +1 -0
  169. package/dist/eval/types.d.ts +162 -0
  170. package/dist/eval/types.d.ts.map +1 -0
  171. package/dist/eval/types.js +20 -0
  172. package/dist/eval/types.js.map +1 -0
  173. package/dist/extensions/fetch-tool.d.ts.map +1 -1
  174. package/dist/extensions/fetch-tool.js +80 -15
  175. package/dist/extensions/fetch-tool.js.map +1 -1
  176. package/dist/extensions/web-content.d.ts +61 -0
  177. package/dist/extensions/web-content.d.ts.map +1 -0
  178. package/dist/extensions/web-content.js +468 -0
  179. package/dist/extensions/web-content.js.map +1 -0
  180. package/dist/extensions/web-search-tool.js +3 -3
  181. package/dist/extensions/web-search-tool.js.map +1 -1
  182. package/dist/mcp/build-server.d.ts.map +1 -1
  183. package/dist/mcp/build-server.js +4 -1
  184. package/dist/mcp/build-server.js.map +1 -1
  185. package/dist/observability/traces.d.ts.map +1 -1
  186. package/dist/observability/traces.js +100 -1
  187. package/dist/observability/traces.js.map +1 -1
  188. package/dist/observability/tracing.d.ts +73 -0
  189. package/dist/observability/tracing.d.ts.map +1 -0
  190. package/dist/observability/tracing.js +126 -0
  191. package/dist/observability/tracing.js.map +1 -0
  192. package/dist/onboarding/default-steps.d.ts.map +1 -1
  193. package/dist/onboarding/default-steps.js +4 -1
  194. package/dist/onboarding/default-steps.js.map +1 -1
  195. package/dist/provider-api/actions/query-staged-dataset.d.ts +1 -1
  196. package/dist/provider-api/corpus-jobs.d.ts +80 -0
  197. package/dist/provider-api/corpus-jobs.d.ts.map +1 -1
  198. package/dist/provider-api/corpus-jobs.js +219 -22
  199. package/dist/provider-api/corpus-jobs.js.map +1 -1
  200. package/dist/provider-api/index.d.ts +24 -32
  201. package/dist/provider-api/index.d.ts.map +1 -1
  202. package/dist/provider-api/index.js +28 -1
  203. package/dist/provider-api/index.js.map +1 -1
  204. package/dist/scripts/agent-engines/list-agent-engines.d.ts.map +1 -1
  205. package/dist/scripts/agent-engines/list-agent-engines.js +10 -3
  206. package/dist/scripts/agent-engines/list-agent-engines.js.map +1 -1
  207. package/dist/server/action-discovery.d.ts.map +1 -1
  208. package/dist/server/action-discovery.js +4 -0
  209. package/dist/server/action-discovery.js.map +1 -1
  210. package/dist/server/agent-chat-plugin.d.ts +9 -0
  211. package/dist/server/agent-chat-plugin.d.ts.map +1 -1
  212. package/dist/server/agent-chat-plugin.js +119 -111
  213. package/dist/server/agent-chat-plugin.js.map +1 -1
  214. package/dist/server/agent-teams.d.ts +62 -0
  215. package/dist/server/agent-teams.d.ts.map +1 -1
  216. package/dist/server/agent-teams.js +99 -2
  217. package/dist/server/agent-teams.js.map +1 -1
  218. package/dist/server/better-auth-instance.d.ts +7 -0
  219. package/dist/server/better-auth-instance.d.ts.map +1 -1
  220. package/dist/server/better-auth-instance.js +90 -0
  221. package/dist/server/better-auth-instance.js.map +1 -1
  222. package/dist/server/core-routes-plugin.d.ts.map +1 -1
  223. package/dist/server/core-routes-plugin.js +7 -4
  224. package/dist/server/core-routes-plugin.js.map +1 -1
  225. package/dist/server/credential-provider.d.ts.map +1 -1
  226. package/dist/server/credential-provider.js +2 -0
  227. package/dist/server/credential-provider.js.map +1 -1
  228. package/dist/server/deep-link.d.ts +7 -0
  229. package/dist/server/deep-link.d.ts.map +1 -1
  230. package/dist/server/deep-link.js +13 -2
  231. package/dist/server/deep-link.js.map +1 -1
  232. package/dist/server/framework-request-handler.d.ts.map +1 -1
  233. package/dist/server/framework-request-handler.js +33 -1
  234. package/dist/server/framework-request-handler.js.map +1 -1
  235. package/dist/server/index.d.ts +2 -1
  236. package/dist/server/index.d.ts.map +1 -1
  237. package/dist/server/index.js +2 -1
  238. package/dist/server/index.js.map +1 -1
  239. package/dist/templates/default/.agents/skills/actions/SKILL.md +52 -1
  240. package/dist/templates/default/.agents/skills/security/SKILL.md +22 -0
  241. package/dist/templates/workspace-core/.agents/skills/actions/SKILL.md +52 -1
  242. package/dist/templates/workspace-core/.agents/skills/external-agents/SKILL.md +16 -4
  243. package/dist/templates/workspace-core/.agents/skills/harness-agents/SKILL.md +20 -0
  244. package/dist/templates/workspace-core/.agents/skills/observability/SKILL.md +31 -0
  245. package/dist/templates/workspace-core/.agents/skills/security/SKILL.md +22 -0
  246. package/docs/content/actions.md +50 -0
  247. package/docs/content/agent-teams.md +32 -0
  248. package/docs/content/blueprint-installer.md +73 -0
  249. package/docs/content/durable-resume.md +49 -0
  250. package/docs/content/evals.md +141 -0
  251. package/docs/content/external-agents.md +2 -2
  252. package/docs/content/human-approval.md +101 -0
  253. package/docs/content/observability.md +21 -0
  254. package/docs/content/observational-memory.md +63 -0
  255. package/docs/content/plan-plugin.md +5 -0
  256. package/docs/content/pr-visual-recap.md +9 -5
  257. package/docs/content/processors.md +99 -0
  258. package/docs/content/sandbox-adapters.md +134 -0
  259. package/docs/content/template-plan.md +97 -21
  260. package/package.json +10 -1
  261. package/src/templates/default/.agents/skills/actions/SKILL.md +52 -1
  262. package/src/templates/default/.agents/skills/security/SKILL.md +22 -0
  263. package/src/templates/workspace-core/.agents/skills/actions/SKILL.md +52 -1
  264. package/src/templates/workspace-core/.agents/skills/external-agents/SKILL.md +16 -4
  265. package/src/templates/workspace-core/.agents/skills/harness-agents/SKILL.md +20 -0
  266. package/src/templates/workspace-core/.agents/skills/observability/SKILL.md +31 -0
  267. package/src/templates/workspace-core/.agents/skills/security/SKILL.md +22 -0
@@ -19,14 +19,17 @@ import { createToolSearchEntry, TOOL_SEARCH_ACTION_NAME, } from "./tool-search.j
19
19
  import { getDefaultMaxIterations, normalizeMaxIterations, readAgentLoopSettings, } from "./loop-settings.js";
20
20
  import { isReasoningEffort, normalizeReasoningEffortForModel, } from "../shared/reasoning-effort.js";
21
21
  import { isAgentActionStopError } from "../action.js";
22
- import { writeLedgerEntry, readLedgerEntry, clearLedgerForThread, } from "./run-store.js";
22
+ import { writeLedgerEntry, readLedgerEntry, clearLedgerForThread, getCurrentTurnEventsForThread, } from "./run-store.js";
23
+ import { classifyToolCallJournal, findCompletedJournalEntry, } from "./tool-call-journal.js";
23
24
  import { preUploadAttachments } from "../file-upload/pre-upload-attachments.js";
24
25
  import { extensionIdFromPathname } from "../extensions/path.js";
25
26
  import { applyContextDirectives } from "./context-xray/apply-directives.js";
27
+ import { ProcessorChain, TripWire, toolCallsFromContent, } from "./processors.js";
26
28
  import { completeRun as completeProgressRun, startRun as startProgressRun, updateRunProgress, } from "../progress/registry.js";
27
29
  import { loadContextDirectives } from "./context-xray/directives-store.js";
28
30
  import { buildManifest, writeContextManifest, } from "./context-xray/manifest.js";
29
31
  import { computeProtectedSegmentIds } from "./context-xray/segments.js";
32
+ import { maybeCompactThread, buildObservationalContext, hasObservationalMemory, serializeObservationalMemoryBlock, } from "./observational-memory/index.js";
30
33
  // Register built-in engines on first import
31
34
  registerBuiltinEngines();
32
35
  export { PROVIDER_TO_ENV };
@@ -1065,6 +1068,84 @@ function findCurrentTurnStartForContinuation(messages) {
1065
1068
  }
1066
1069
  return 0;
1067
1070
  }
1071
+ /**
1072
+ * First message index that is safe to start a trimmed window on. A window must
1073
+ * not begin with a tool-result-only user message — that would orphan it from
1074
+ * the assistant tool-call turn it answers and break Anthropic's tool_use /
1075
+ * tool_result pairing. We walk forward from `desiredStart` to the first
1076
+ * non-orphaned boundary; if none exists we refuse to trim (return -1).
1077
+ */
1078
+ function findSafeWindowStart(messages, desiredStart) {
1079
+ for (let i = Math.max(0, desiredStart); i < messages.length; i++) {
1080
+ if (!isToolResultOnlyUserMessage(messages[i]))
1081
+ return i;
1082
+ }
1083
+ return -1;
1084
+ }
1085
+ /**
1086
+ * Observational Memory consumer (threshold-gated, conservative).
1087
+ *
1088
+ * Builds the three-tier OM context for a thread and, ONLY when the thread has
1089
+ * already crossed the compaction threshold (i.e. it has at least one persisted
1090
+ * observation/reflection), returns a rewritten message list that:
1091
+ * - prepends a single system-role "Observational Memory" block holding the
1092
+ * reflections + observations, and
1093
+ * - replaces the raw older history with just the recent-raw-message window,
1094
+ * keeping the current user turn and any pending tool results intact.
1095
+ *
1096
+ * For threads with NO OM entries (every short thread) it returns the input
1097
+ * array unchanged by reference, so the common path is byte-for-byte identical.
1098
+ *
1099
+ * Best-effort: any failure returns the input unchanged so OM can never break a
1100
+ * normal turn.
1101
+ */
1102
+ async function applyObservationalMemoryToContext(messages, opts) {
1103
+ if (!opts.ownerEmail)
1104
+ return messages;
1105
+ try {
1106
+ const context = await buildObservationalContext({
1107
+ threadId: opts.threadId,
1108
+ ownerEmail: opts.ownerEmail,
1109
+ orgId: opts.orgId ?? null,
1110
+ messages,
1111
+ });
1112
+ // No compacted memory yet → short thread, leave context untouched.
1113
+ if (!hasObservationalMemory(context))
1114
+ return messages;
1115
+ const block = serializeObservationalMemoryBlock(context);
1116
+ if (!block.trim())
1117
+ return messages;
1118
+ // EngineMessage has no "system" role; the framework injects auxiliary
1119
+ // context as leading user messages (same convention as the continuation
1120
+ // nudge and the resume journal note), and the serialized block is clearly
1121
+ // self-labeled "[Observational Memory]".
1122
+ const omMessage = {
1123
+ role: "user",
1124
+ content: [{ type: "text", text: block }],
1125
+ };
1126
+ // Trim the raw prefix to only the recent-raw window. The window is the tail
1127
+ // of `messages`, so it always contains the latest user turn and any pending
1128
+ // tool results. Guard the boundary so we never start mid tool_use/result
1129
+ // pair; if a safe boundary can't be found, additively inject the memory
1130
+ // block WITHOUT trimming (the conservative fallback) so we never drop a
1131
+ // pending tool result.
1132
+ const recentCount = context.recentMessages.length;
1133
+ if (recentCount === 0 || recentCount >= messages.length) {
1134
+ return [omMessage, ...messages];
1135
+ }
1136
+ const desiredStart = messages.length - recentCount;
1137
+ const safeStart = findSafeWindowStart(messages, desiredStart);
1138
+ if (safeStart < 0) {
1139
+ // Whole tail is tool-result-only (degenerate) — don't trim.
1140
+ return [omMessage, ...messages];
1141
+ }
1142
+ return [omMessage, ...messages.slice(safeStart)];
1143
+ }
1144
+ catch (err) {
1145
+ console.warn("[observational-memory] context injection skipped:", err instanceof Error ? err.message : String(err));
1146
+ return messages;
1147
+ }
1148
+ }
1068
1149
  function seedReadOnlyToolResultsFromHistory(messages, actions) {
1069
1150
  const cache = new Map();
1070
1151
  if (!isInternalContinuationTurn(messages))
@@ -1398,6 +1479,11 @@ function toolInputSchemaErrorResult(toolName, input, error) {
1398
1479
  */
1399
1480
  export async function runAgentLoop(opts) {
1400
1481
  const { engine, model, systemPrompt, tools, messages, actions, send, signal, } = opts;
1482
+ // Build the processor chain only when at least one processor is supplied so
1483
+ // the common (no-processors) path is unchanged and carries zero overhead.
1484
+ const processorChain = opts.processors && opts.processors.length > 0
1485
+ ? new ProcessorChain(opts.processors)
1486
+ : null;
1401
1487
  const usage = {
1402
1488
  inputTokens: 0,
1403
1489
  outputTokens: 0,
@@ -1421,9 +1507,46 @@ export async function runAgentLoop(opts) {
1421
1507
  const readOnlyToolResultCache = seedReadOnlyToolResultsFromHistory(messages, actions);
1422
1508
  const duplicateReadOnlyToolCalls = new Map();
1423
1509
  const writeToolInterruptions = seedWriteToolInterruptionsFromHistory(messages, actions);
1510
+ // Tool-call journal hard-block (resume safety). Snapshot the per-turn journal
1511
+ // ONCE here, before any tool runs in this chunk, so it reflects only PRIOR
1512
+ // run chunks of this logical turn. A write tool whose exact call already
1513
+ // completed in an earlier interrupted chunk must not re-fire its side effect;
1514
+ // when matched, runToolCall returns the journaled result instead of executing.
1515
+ // Loaded eagerly (not lazily mid-loop) so the current chunk's own
1516
+ // asynchronously-persisted tool_done events can never leak in and make a
1517
+ // same-chunk call wrongly short-circuit. Best-effort: any ledger failure
1518
+ // leaves the journal empty and all calls run normally. Fresh first-turn calls
1519
+ // see an empty journal and are unaffected.
1520
+ let toolCallJournal = null;
1521
+ const consumedJournalKeys = new Set();
1522
+ if (opts.threadId) {
1523
+ try {
1524
+ const priorEvents = await getCurrentTurnEventsForThread(opts.threadId);
1525
+ if (priorEvents.length > 0) {
1526
+ toolCallJournal = classifyToolCallJournal(priorEvents);
1527
+ }
1528
+ }
1529
+ catch {
1530
+ // Journal is a hardening layer, never a gate — a failed ledger read just
1531
+ // means no hard-block this turn.
1532
+ }
1533
+ }
1424
1534
  const bufferTextUntilFinalGuard = Boolean(opts.finalResponseGuard);
1425
1535
  let finalGuardRetries = 0;
1426
1536
  let iterations = 0;
1537
+ // Set when an in-loop processor aborts via `abort()` / throws a `TripWire`.
1538
+ // The loop emits the `tripwire` event, surfaces the reason as a final
1539
+ // assistant message, and stops cleanly.
1540
+ let tripwire = null;
1541
+ const emitTripwire = (err) => {
1542
+ tripwire = err;
1543
+ send({
1544
+ type: "tripwire",
1545
+ reason: err.message,
1546
+ ...(err.processor ? { processor: err.processor } : {}),
1547
+ });
1548
+ send({ type: "text", text: err.message });
1549
+ };
1427
1550
  while (true) {
1428
1551
  if (signal.aborted)
1429
1552
  break;
@@ -1465,6 +1588,21 @@ export async function runAgentLoop(opts) {
1465
1588
  catch (err) {
1466
1589
  console.warn("[context-xray] context transform skipped:", err instanceof Error ? err.message : String(err));
1467
1590
  }
1591
+ // Observational Memory (consumer): for long threads that have already been
1592
+ // compacted, fold the reflections+observations in as a leading context
1593
+ // block and prefer the recent-raw-message window over the full raw
1594
+ // history. No-op (returns the same array) for short threads with no OM
1595
+ // entries, so the common path is unchanged. Runs after the context-xray
1596
+ // transform so the two compose; best-effort inside the helper. Gated on an
1597
+ // authenticated owner so anonymous threads never read OM scoped to a
1598
+ // shared default identity.
1599
+ if (opts.ownerEmail) {
1600
+ contextMessages = await applyObservationalMemoryToContext(contextMessages, {
1601
+ threadId: opts.threadId,
1602
+ ownerEmail: opts.ownerEmail,
1603
+ orgId: opts.orgId ?? null,
1604
+ });
1605
+ }
1468
1606
  }
1469
1607
  for (let retry = 0;; retry++) {
1470
1608
  assistantContent = undefined;
@@ -1502,6 +1640,22 @@ export async function runAgentLoop(opts) {
1502
1640
  });
1503
1641
  };
1504
1642
  for await (const event of eventStream) {
1643
+ // In-loop processor seam (stream hook). Each chunk is offered to every
1644
+ // processor's `processOutputStream` before the loop handles it. A
1645
+ // processor `abort()` throws a TripWire; catch it locally so it is not
1646
+ // mistaken for a retryable engine error, then break out cleanly.
1647
+ if (processorChain) {
1648
+ try {
1649
+ await processorChain.runStream(event);
1650
+ }
1651
+ catch (err) {
1652
+ if (err instanceof TripWire) {
1653
+ emitTripwire(err);
1654
+ break;
1655
+ }
1656
+ throw err;
1657
+ }
1658
+ }
1505
1659
  if (event.type === "text-delta") {
1506
1660
  if (bufferTextUntilFinalGuard) {
1507
1661
  bufferedAssistantText += event.text;
@@ -1594,6 +1748,10 @@ export async function runAgentLoop(opts) {
1594
1748
  throw err;
1595
1749
  }
1596
1750
  }
1751
+ // A processor aborted mid-stream. The tripwire event + final message were
1752
+ // already emitted; halt the loop without sending a normal `done`.
1753
+ if (tripwire)
1754
+ break;
1597
1755
  if (!assistantContent && toolCallErrors.size > 0) {
1598
1756
  assistantContent = [];
1599
1757
  }
@@ -1624,6 +1782,31 @@ export async function runAgentLoop(opts) {
1624
1782
  : part);
1625
1783
  messages.push({ role: "assistant", content: assistantContentForHistory });
1626
1784
  const toolCallParts = assistantContent.filter((p) => p.type === "tool-call");
1785
+ // In-loop processor seam (step hook). Fires once per model response, around
1786
+ // tool execution, with the tool calls the model just requested (empty for a
1787
+ // final answer) plus the stop reason and cumulative usage. A coverage gate
1788
+ // can inspect what the model is about to do and `abort()` before tools run.
1789
+ if (processorChain) {
1790
+ try {
1791
+ await processorChain.runStep({
1792
+ toolCalls: toolCallsFromContent(assistantContent),
1793
+ ...(terminalStopReason ? { finishReason: terminalStopReason } : {}),
1794
+ usage: {
1795
+ inputTokens: usage.inputTokens,
1796
+ outputTokens: usage.outputTokens,
1797
+ cacheReadTokens: usage.cacheReadTokens,
1798
+ cacheWriteTokens: usage.cacheWriteTokens,
1799
+ },
1800
+ });
1801
+ }
1802
+ catch (err) {
1803
+ if (err instanceof TripWire) {
1804
+ emitTripwire(err);
1805
+ break;
1806
+ }
1807
+ throw err;
1808
+ }
1809
+ }
1627
1810
  const flushBufferedAssistantText = () => {
1628
1811
  if (!bufferTextUntilFinalGuard)
1629
1812
  return;
@@ -1690,6 +1873,11 @@ export async function runAgentLoop(opts) {
1690
1873
  finalGuardRetries = 0;
1691
1874
  flushBufferedAssistantText();
1692
1875
  let requestedActionStop = null;
1876
+ // Human-in-the-loop approvals granted by the user for this turn (opt-in;
1877
+ // empty for the overwhelming majority of turns). Keyed by the stable
1878
+ // tool-call approval key so a re-issued continuation can let an approved
1879
+ // call run. The model cannot populate this — it comes from the request.
1880
+ const approvedToolCallKeys = new Set(opts.approvedToolCalls ?? []);
1693
1881
  const runToolCall = async (toolCall) => {
1694
1882
  const wireToolInput = JSON.stringify(toolCall.input ?? {});
1695
1883
  const normalizedToolInput = normalizeToolCallInputForHistory(toolCall.input);
@@ -1774,6 +1962,60 @@ export async function runAgentLoop(opts) {
1774
1962
  isError: true,
1775
1963
  };
1776
1964
  }
1965
+ // Human-in-the-loop approval gate (opt-in via defineAction
1966
+ // `needsApproval`; default off). When an action requires approval and
1967
+ // this specific call has NOT been approved by a human, pause the turn
1968
+ // instead of executing. The action's side effect never happens until a
1969
+ // human re-issues the turn approving this call's stable key.
1970
+ const approvalKey = toolCallCacheKey(toolCall.name, toolCall.input);
1971
+ if (actionEntry.needsApproval && !approvedToolCallKeys.has(approvalKey)) {
1972
+ let mustApprove = false;
1973
+ try {
1974
+ mustApprove =
1975
+ typeof actionEntry.needsApproval === "function"
1976
+ ? Boolean(await actionEntry.needsApproval(toolCall.input, {
1977
+ userEmail: getRequestUserEmail(),
1978
+ orgId: getRequestOrgId() ?? null,
1979
+ caller: "tool",
1980
+ }))
1981
+ : actionEntry.needsApproval === true;
1982
+ }
1983
+ catch {
1984
+ // Fail closed: a throwing predicate means we require approval rather
1985
+ // than silently running a high-consequence action.
1986
+ mustApprove = true;
1987
+ }
1988
+ if (mustApprove) {
1989
+ send({
1990
+ type: "tool_start",
1991
+ tool: toolCall.name,
1992
+ input: toolCall.input,
1993
+ });
1994
+ send({
1995
+ type: "approval_required",
1996
+ tool: toolCall.name,
1997
+ input: toolCall.input,
1998
+ approvalKey,
1999
+ ...(toolCall.id ? { toolCallId: toolCall.id } : {}),
2000
+ });
2001
+ const result = `Awaiting human approval to run "${toolCall.name}". This action did ` +
2002
+ `NOT execute — a human must approve this specific call before it ` +
2003
+ `can run. The turn is paused; do not retry.`;
2004
+ send({ type: "tool_done", tool: toolCall.name, result });
2005
+ recordToolResult(result, false);
2006
+ requestedActionStop ??= {
2007
+ message: `Waiting for your approval to run ${toolCall.name}.`,
2008
+ errorCode: "needs-approval",
2009
+ };
2010
+ return {
2011
+ type: "tool-result",
2012
+ toolCallId: toolCall.id,
2013
+ toolName: toolCall.name,
2014
+ toolInput: wireToolInput,
2015
+ content: result,
2016
+ };
2017
+ }
2018
+ }
1777
2019
  const cacheKey = actionEntry.readOnly === true
1778
2020
  ? toolCallCacheKey(toolCall.name, toolCall.input)
1779
2021
  : null;
@@ -1805,6 +2047,40 @@ export async function runAgentLoop(opts) {
1805
2047
  content: result,
1806
2048
  };
1807
2049
  }
2050
+ // TOOL-CALL JOURNAL HARD-BLOCK (resume safety, tool-layer enforcement).
2051
+ // The prompt-level resume journal already TELLS a resuming model not to
2052
+ // re-run completed tool calls; this enforces it at the tool layer so a
2053
+ // re-dispatched write call whose exact (tool name + input) already
2054
+ // completed in an earlier interrupted chunk of this turn does NOT execute
2055
+ // its side effect again — we return the journaled result instead and emit
2056
+ // the normal tool_start/tool_done so the transcript stays coherent.
2057
+ //
2058
+ // Gated on a non-readOnly tool + an existing prior-chunk journal (so fresh
2059
+ // calls with no completed journal entry are completely unaffected). The
2060
+ // snapshot was taken before this chunk's tools ran, so it can only match a
2061
+ // PRIOR completion, never one from the current chunk.
2062
+ if (!actionEntry.readOnly && toolCallJournal) {
2063
+ const journaled = findCompletedJournalEntry(toolCallJournal, toolCall.name, toolCall.input, consumedJournalKeys);
2064
+ if (journaled) {
2065
+ const recordedResult = journaled.result ?? "";
2066
+ const result = `(Already completed in an earlier interrupted attempt - not re-run to avoid a duplicate side effect.)\n\n` +
2067
+ recordedResult;
2068
+ send({
2069
+ type: "tool_start",
2070
+ tool: toolCall.name,
2071
+ input: toolCall.input,
2072
+ });
2073
+ send({ type: "tool_done", tool: toolCall.name, result });
2074
+ recordToolResult(result, false);
2075
+ return {
2076
+ type: "tool-result",
2077
+ toolCallId: toolCall.id,
2078
+ toolName: toolCall.name,
2079
+ toolInput: wireToolInput,
2080
+ content: result,
2081
+ };
2082
+ }
2083
+ }
1808
2084
  // Guard against write tools that have been interrupted too many times in
1809
2085
  // this turn (connection drop mid-execution → agent retries → repeat).
1810
2086
  // A write tool that keeps failing likely has a timeout / large-payload
@@ -2117,13 +2393,60 @@ export async function runAgentLoop(opts) {
2117
2393
  break;
2118
2394
  }
2119
2395
  }
2396
+ // A processor halted the run: the `tripwire` event and final message were
2397
+ // already emitted at the abort site. Do NOT send the normal `done` — the run
2398
+ // ended on a guardrail, not a clean turn. The result hook still fires below
2399
+ // so processors can observe the (halted) final text.
2400
+ if (tripwire) {
2401
+ if (processorChain) {
2402
+ try {
2403
+ await processorChain.runResult(collectTextParts(messages.flatMap((m) => (m.role === "assistant" ? m.content : []))));
2404
+ }
2405
+ catch (err) {
2406
+ if (!(err instanceof TripWire))
2407
+ throw err;
2408
+ // A result-hook abort is a no-op: the run is already halting.
2409
+ }
2410
+ }
2411
+ return usage;
2412
+ }
2120
2413
  if (!signal.aborted) {
2414
+ // In-loop processor seam (result hook). Fires once at clean run end with the
2415
+ // final assistant text so processors (e.g. a proof-of-done gate) can record
2416
+ // a verdict. A result-hook abort cannot un-finish a completed run, so a
2417
+ // TripWire here is swallowed.
2418
+ if (processorChain) {
2419
+ try {
2420
+ await processorChain.runResult(collectTextParts(messages.flatMap((m) => (m.role === "assistant" ? m.content : []))));
2421
+ }
2422
+ catch (err) {
2423
+ if (!(err instanceof TripWire))
2424
+ throw err;
2425
+ }
2426
+ }
2121
2427
  send({ type: "done" });
2122
2428
  // Clean up any zombie-completion ledger entries for this thread now that
2123
2429
  // the turn completed normally. If the run was aborted the ledger must stay
2124
2430
  // intact so the next continuation chunk can still recover from it.
2125
2431
  if (opts.threadId) {
2126
2432
  void clearLedgerForThread(opts.threadId).catch(() => { });
2433
+ // Observational Memory (producer): after a clean turn, run a best-effort
2434
+ // compaction pass so long threads accrue observations/reflections that the
2435
+ // consumer above will surface on later turns. Both the Observer and the
2436
+ // Reflector no-op below their token thresholds, so this is cheap for short
2437
+ // threads. Fire-and-forget; any failure is swallowed so OM never affects
2438
+ // the user-visible turn.
2439
+ if (opts.ownerEmail) {
2440
+ const compactThreadId = opts.threadId;
2441
+ void maybeCompactThread({
2442
+ threadId: compactThreadId,
2443
+ ownerEmail: opts.ownerEmail,
2444
+ orgId: opts.orgId ?? null,
2445
+ messages,
2446
+ }).catch((err) => {
2447
+ console.warn("[observational-memory] post-turn compaction skipped:", err instanceof Error ? err.message : String(err));
2448
+ });
2449
+ }
2127
2450
  }
2128
2451
  }
2129
2452
  return usage;
@@ -2977,6 +3300,13 @@ export function createProductionAgentHandler(options) {
2977
3300
  catch {
2978
3301
  // Experiments module unavailable — use default model
2979
3302
  }
3303
+ // TODO(processor-seam): thread `processors` from ProductionAgentOptions
3304
+ // through to runAgentLoop here once the handler exposes a way to
3305
+ // configure them (e.g. a `processors` field on ProductionAgentOptions
3306
+ // or a per-request resolver). The loop-level seam (runAgentLoop's
3307
+ // `processors` opt + ProcessorChain/TripWire) is the deliverable and is
3308
+ // already callable directly by sub-agents, A2A, MCP, and tests; this is
3309
+ // only the HTTP-handler convenience plumbing.
2980
3310
  const agentLoopOpts = {
2981
3311
  engine,
2982
3312
  model: effectiveModel,
@@ -2998,6 +3328,16 @@ export function createProductionAgentHandler(options) {
2998
3328
  ...(threadId
2999
3329
  ? { threadId: effectiveThreadId, turnId: effectiveTurnId }
3000
3330
  : {}),
3331
+ // Human-in-the-loop approval grants for this turn (sanitized — the
3332
+ // request is untrusted; accept only a bounded list of string keys).
3333
+ ...(Array.isArray(body.approvedToolCalls) &&
3334
+ body.approvedToolCalls.length
3335
+ ? {
3336
+ approvedToolCalls: body.approvedToolCalls
3337
+ .filter((k) => typeof k === "string")
3338
+ .slice(0, 200),
3339
+ }
3340
+ : {}),
3001
3341
  };
3002
3342
  send({ type: "activity", label: "Contacting model" });
3003
3343
  // loopUsage is always assigned — either via instrumentAgentLoop or