@agent-native/core 0.51.15 → 0.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/README.md +42 -96
  2. package/blueprints/action/crud.md +98 -0
  3. package/blueprints/channel/discord.md +74 -0
  4. package/blueprints/provider/stripe.md +87 -0
  5. package/blueprints/sandbox/docker.md +78 -0
  6. package/dist/action.d.ts +24 -0
  7. package/dist/action.d.ts.map +1 -1
  8. package/dist/action.js +4 -0
  9. package/dist/action.js.map +1 -1
  10. package/dist/agent/observational-memory/compactor.d.ts +43 -0
  11. package/dist/agent/observational-memory/compactor.d.ts.map +1 -0
  12. package/dist/agent/observational-memory/compactor.js +50 -0
  13. package/dist/agent/observational-memory/compactor.js.map +1 -0
  14. package/dist/agent/observational-memory/config.d.ts +37 -0
  15. package/dist/agent/observational-memory/config.d.ts.map +1 -0
  16. package/dist/agent/observational-memory/config.js +48 -0
  17. package/dist/agent/observational-memory/config.js.map +1 -0
  18. package/dist/agent/observational-memory/index.d.ts +26 -0
  19. package/dist/agent/observational-memory/index.d.ts.map +1 -0
  20. package/dist/agent/observational-memory/index.js +25 -0
  21. package/dist/agent/observational-memory/index.js.map +1 -0
  22. package/dist/agent/observational-memory/internal-run.d.ts +37 -0
  23. package/dist/agent/observational-memory/internal-run.d.ts.map +1 -0
  24. package/dist/agent/observational-memory/internal-run.js +59 -0
  25. package/dist/agent/observational-memory/internal-run.js.map +1 -0
  26. package/dist/agent/observational-memory/message-text.d.ts +13 -0
  27. package/dist/agent/observational-memory/message-text.d.ts.map +1 -0
  28. package/dist/agent/observational-memory/message-text.js +46 -0
  29. package/dist/agent/observational-memory/message-text.js.map +1 -0
  30. package/dist/agent/observational-memory/migrations.d.ts +13 -0
  31. package/dist/agent/observational-memory/migrations.d.ts.map +1 -0
  32. package/dist/agent/observational-memory/migrations.js +43 -0
  33. package/dist/agent/observational-memory/migrations.js.map +1 -0
  34. package/dist/agent/observational-memory/observer.d.ts +37 -0
  35. package/dist/agent/observational-memory/observer.d.ts.map +1 -0
  36. package/dist/agent/observational-memory/observer.js +82 -0
  37. package/dist/agent/observational-memory/observer.js.map +1 -0
  38. package/dist/agent/observational-memory/plugin.d.ts +16 -0
  39. package/dist/agent/observational-memory/plugin.d.ts.map +1 -0
  40. package/dist/agent/observational-memory/plugin.js +26 -0
  41. package/dist/agent/observational-memory/plugin.js.map +1 -0
  42. package/dist/agent/observational-memory/prompts.d.ts +27 -0
  43. package/dist/agent/observational-memory/prompts.d.ts.map +1 -0
  44. package/dist/agent/observational-memory/prompts.js +42 -0
  45. package/dist/agent/observational-memory/prompts.js.map +1 -0
  46. package/dist/agent/observational-memory/read.d.ts +47 -0
  47. package/dist/agent/observational-memory/read.d.ts.map +1 -0
  48. package/dist/agent/observational-memory/read.js +99 -0
  49. package/dist/agent/observational-memory/read.js.map +1 -0
  50. package/dist/agent/observational-memory/reflector.d.ts +31 -0
  51. package/dist/agent/observational-memory/reflector.d.ts.map +1 -0
  52. package/dist/agent/observational-memory/reflector.js +76 -0
  53. package/dist/agent/observational-memory/reflector.js.map +1 -0
  54. package/dist/agent/observational-memory/schema.d.ts +267 -0
  55. package/dist/agent/observational-memory/schema.d.ts.map +1 -0
  56. package/dist/agent/observational-memory/schema.js +48 -0
  57. package/dist/agent/observational-memory/schema.js.map +1 -0
  58. package/dist/agent/observational-memory/store.d.ts +52 -0
  59. package/dist/agent/observational-memory/store.d.ts.map +1 -0
  60. package/dist/agent/observational-memory/store.js +197 -0
  61. package/dist/agent/observational-memory/store.js.map +1 -0
  62. package/dist/agent/observational-memory/types.d.ts +61 -0
  63. package/dist/agent/observational-memory/types.d.ts.map +1 -0
  64. package/dist/agent/observational-memory/types.js +9 -0
  65. package/dist/agent/observational-memory/types.js.map +1 -0
  66. package/dist/agent/production-agent.d.ts +15 -0
  67. package/dist/agent/production-agent.d.ts.map +1 -1
  68. package/dist/agent/production-agent.js +240 -1
  69. package/dist/agent/production-agent.js.map +1 -1
  70. package/dist/agent/run-loop-with-resume.d.ts.map +1 -1
  71. package/dist/agent/run-loop-with-resume.js +49 -0
  72. package/dist/agent/run-loop-with-resume.js.map +1 -1
  73. package/dist/agent/run-store.d.ts +17 -0
  74. package/dist/agent/run-store.d.ts.map +1 -1
  75. package/dist/agent/run-store.js +55 -0
  76. package/dist/agent/run-store.js.map +1 -1
  77. package/dist/agent/runtime-context.d.ts +30 -0
  78. package/dist/agent/runtime-context.d.ts.map +1 -1
  79. package/dist/agent/runtime-context.js +54 -1
  80. package/dist/agent/runtime-context.js.map +1 -1
  81. package/dist/agent/tool-call-journal.d.ts +101 -0
  82. package/dist/agent/tool-call-journal.d.ts.map +1 -0
  83. package/dist/agent/tool-call-journal.js +214 -0
  84. package/dist/agent/tool-call-journal.js.map +1 -0
  85. package/dist/agent/types.d.ts +24 -0
  86. package/dist/agent/types.d.ts.map +1 -1
  87. package/dist/agent/types.js.map +1 -1
  88. package/dist/cli/add.d.ts +109 -0
  89. package/dist/cli/add.d.ts.map +1 -0
  90. package/dist/cli/add.js +352 -0
  91. package/dist/cli/add.js.map +1 -0
  92. package/dist/cli/connect.d.ts +5 -4
  93. package/dist/cli/connect.d.ts.map +1 -1
  94. package/dist/cli/connect.js +157 -48
  95. package/dist/cli/connect.js.map +1 -1
  96. package/dist/cli/eval.d.ts +17 -0
  97. package/dist/cli/eval.d.ts.map +1 -0
  98. package/dist/cli/eval.js +121 -0
  99. package/dist/cli/eval.js.map +1 -0
  100. package/dist/cli/index.js +44 -3
  101. package/dist/cli/index.js.map +1 -1
  102. package/dist/cli/mcp-config-writers.d.ts +20 -13
  103. package/dist/cli/mcp-config-writers.d.ts.map +1 -1
  104. package/dist/cli/mcp-config-writers.js +152 -13
  105. package/dist/cli/mcp-config-writers.js.map +1 -1
  106. package/dist/cli/mcp.d.ts +2 -2
  107. package/dist/cli/mcp.d.ts.map +1 -1
  108. package/dist/cli/mcp.js +50 -196
  109. package/dist/cli/mcp.js.map +1 -1
  110. package/dist/cli/plan-local.d.ts +69 -6
  111. package/dist/cli/plan-local.d.ts.map +1 -1
  112. package/dist/cli/plan-local.js +517 -23
  113. package/dist/cli/plan-local.js.map +1 -1
  114. package/dist/cli/recap.d.ts.map +1 -1
  115. package/dist/cli/recap.js +1 -1
  116. package/dist/cli/recap.js.map +1 -1
  117. package/dist/cli/skills.d.ts +13 -6
  118. package/dist/cli/skills.d.ts.map +1 -1
  119. package/dist/cli/skills.js +287 -111
  120. package/dist/cli/skills.js.map +1 -1
  121. package/dist/client/AssistantChat.d.ts.map +1 -1
  122. package/dist/client/AssistantChat.js +118 -92
  123. package/dist/client/AssistantChat.js.map +1 -1
  124. package/dist/client/agent-chat-adapter.d.ts.map +1 -1
  125. package/dist/client/agent-chat-adapter.js +16 -0
  126. package/dist/client/agent-chat-adapter.js.map +1 -1
  127. package/dist/client/agent-engine-key.d.ts +6 -4
  128. package/dist/client/agent-engine-key.d.ts.map +1 -1
  129. package/dist/client/agent-engine-key.js +9 -6
  130. package/dist/client/agent-engine-key.js.map +1 -1
  131. package/dist/client/chat/run-recovery.js +1 -1
  132. package/dist/client/chat/run-recovery.js.map +1 -1
  133. package/dist/client/chat/tool-call-display.d.ts +20 -1
  134. package/dist/client/chat/tool-call-display.d.ts.map +1 -1
  135. package/dist/client/chat/tool-call-display.js +32 -7
  136. package/dist/client/chat/tool-call-display.js.map +1 -1
  137. package/dist/client/settings/SettingsPanel.d.ts.map +1 -1
  138. package/dist/client/settings/SettingsPanel.js +7 -14
  139. package/dist/client/settings/SettingsPanel.js.map +1 -1
  140. package/dist/client/sse-event-processor.d.ts +13 -0
  141. package/dist/client/sse-event-processor.d.ts.map +1 -1
  142. package/dist/client/sse-event-processor.js +21 -0
  143. package/dist/client/sse-event-processor.js.map +1 -1
  144. package/dist/coding-tools/run-code.d.ts +7 -0
  145. package/dist/coding-tools/run-code.d.ts.map +1 -1
  146. package/dist/coding-tools/run-code.js +21 -106
  147. package/dist/coding-tools/run-code.js.map +1 -1
  148. package/dist/coding-tools/sandbox/adapter.d.ts +79 -0
  149. package/dist/coding-tools/sandbox/adapter.d.ts.map +1 -0
  150. package/dist/coding-tools/sandbox/adapter.js +24 -0
  151. package/dist/coding-tools/sandbox/adapter.js.map +1 -0
  152. package/dist/coding-tools/sandbox/index.d.ts +51 -0
  153. package/dist/coding-tools/sandbox/index.d.ts.map +1 -0
  154. package/dist/coding-tools/sandbox/index.js +79 -0
  155. package/dist/coding-tools/sandbox/index.js.map +1 -0
  156. package/dist/coding-tools/sandbox/local-child-process-adapter.d.ts +24 -0
  157. package/dist/coding-tools/sandbox/local-child-process-adapter.d.ts.map +1 -0
  158. package/dist/coding-tools/sandbox/local-child-process-adapter.js +141 -0
  159. package/dist/coding-tools/sandbox/local-child-process-adapter.js.map +1 -0
  160. package/dist/db/client.d.ts +4 -2
  161. package/dist/db/client.d.ts.map +1 -1
  162. package/dist/db/client.js +6 -4
  163. package/dist/db/client.js.map +1 -1
  164. package/dist/deploy/route-discovery.d.ts.map +1 -1
  165. package/dist/deploy/route-discovery.js +1 -0
  166. package/dist/deploy/route-discovery.js.map +1 -1
  167. package/dist/eval/agent-runner.d.ts +63 -0
  168. package/dist/eval/agent-runner.d.ts.map +1 -0
  169. package/dist/eval/agent-runner.js +142 -0
  170. package/dist/eval/agent-runner.js.map +1 -0
  171. package/dist/eval/define-eval.d.ts +29 -0
  172. package/dist/eval/define-eval.d.ts.map +1 -0
  173. package/dist/eval/define-eval.js +43 -0
  174. package/dist/eval/define-eval.js.map +1 -0
  175. package/dist/eval/index.d.ts +18 -0
  176. package/dist/eval/index.d.ts.map +1 -0
  177. package/dist/eval/index.js +17 -0
  178. package/dist/eval/index.js.map +1 -0
  179. package/dist/eval/report.d.ts +8 -0
  180. package/dist/eval/report.d.ts.map +1 -0
  181. package/dist/eval/report.js +44 -0
  182. package/dist/eval/report.js.map +1 -0
  183. package/dist/eval/runner.d.ts +67 -0
  184. package/dist/eval/runner.d.ts.map +1 -0
  185. package/dist/eval/runner.js +256 -0
  186. package/dist/eval/runner.js.map +1 -0
  187. package/dist/eval/scorer.d.ts +83 -0
  188. package/dist/eval/scorer.d.ts.map +1 -0
  189. package/dist/eval/scorer.js +195 -0
  190. package/dist/eval/scorer.js.map +1 -0
  191. package/dist/eval/types.d.ts +162 -0
  192. package/dist/eval/types.d.ts.map +1 -0
  193. package/dist/eval/types.js +20 -0
  194. package/dist/eval/types.js.map +1 -0
  195. package/dist/observability/traces.d.ts.map +1 -1
  196. package/dist/observability/traces.js +100 -1
  197. package/dist/observability/traces.js.map +1 -1
  198. package/dist/observability/tracing.d.ts +73 -0
  199. package/dist/observability/tracing.d.ts.map +1 -0
  200. package/dist/observability/tracing.js +126 -0
  201. package/dist/observability/tracing.js.map +1 -0
  202. package/dist/onboarding/default-steps.d.ts.map +1 -1
  203. package/dist/onboarding/default-steps.js +4 -1
  204. package/dist/onboarding/default-steps.js.map +1 -1
  205. package/dist/provider-api/actions/query-staged-dataset.d.ts +1 -1
  206. package/dist/scripts/agent-engines/list-agent-engines.d.ts.map +1 -1
  207. package/dist/scripts/agent-engines/list-agent-engines.js +10 -3
  208. package/dist/scripts/agent-engines/list-agent-engines.js.map +1 -1
  209. package/dist/server/action-discovery.d.ts.map +1 -1
  210. package/dist/server/action-discovery.js +4 -0
  211. package/dist/server/action-discovery.js.map +1 -1
  212. package/dist/server/agent-chat-plugin.d.ts +9 -0
  213. package/dist/server/agent-chat-plugin.d.ts.map +1 -1
  214. package/dist/server/agent-chat-plugin.js +118 -110
  215. package/dist/server/agent-chat-plugin.js.map +1 -1
  216. package/dist/server/agent-engine-api-key-route.d.ts +37 -0
  217. package/dist/server/agent-engine-api-key-route.d.ts.map +1 -0
  218. package/dist/server/agent-engine-api-key-route.js +105 -0
  219. package/dist/server/agent-engine-api-key-route.js.map +1 -0
  220. package/dist/server/agent-teams.d.ts +62 -0
  221. package/dist/server/agent-teams.d.ts.map +1 -1
  222. package/dist/server/agent-teams.js +99 -2
  223. package/dist/server/agent-teams.js.map +1 -1
  224. package/dist/server/core-routes-plugin.d.ts.map +1 -1
  225. package/dist/server/core-routes-plugin.js +17 -10
  226. package/dist/server/core-routes-plugin.js.map +1 -1
  227. package/dist/server/create-server.js +1 -1
  228. package/dist/server/create-server.js.map +1 -1
  229. package/dist/server/credential-provider.d.ts.map +1 -1
  230. package/dist/server/credential-provider.js +2 -0
  231. package/dist/server/credential-provider.js.map +1 -1
  232. package/dist/server/framework-request-handler.d.ts.map +1 -1
  233. package/dist/server/framework-request-handler.js +33 -1
  234. package/dist/server/framework-request-handler.js.map +1 -1
  235. package/dist/server/index.d.ts +1 -0
  236. package/dist/server/index.d.ts.map +1 -1
  237. package/dist/server/index.js +1 -0
  238. package/dist/server/index.js.map +1 -1
  239. package/dist/templates/workspace-core/.agents/skills/external-agents/SKILL.md +17 -4
  240. package/dist/templates/workspace-core/.agents/skills/harness-agents/SKILL.md +20 -0
  241. package/dist/templates/workspace-core/.agents/skills/observability/SKILL.md +20 -0
  242. package/docs/content/agent-teams.md +32 -0
  243. package/docs/content/blueprint-installer.md +73 -0
  244. package/docs/content/evals.md +141 -0
  245. package/docs/content/pr-visual-recap.md +7 -4
  246. package/docs/content/sandbox-adapters.md +134 -0
  247. package/docs/content/template-plan.md +20 -8
  248. package/package.json +5 -1
  249. package/src/templates/workspace-core/.agents/skills/external-agents/SKILL.md +17 -4
  250. package/src/templates/workspace-core/.agents/skills/harness-agents/SKILL.md +20 -0
  251. package/src/templates/workspace-core/.agents/skills/observability/SKILL.md +20 -0
@@ -19,7 +19,8 @@ import { createToolSearchEntry, TOOL_SEARCH_ACTION_NAME, } from "./tool-search.j
19
19
  import { getDefaultMaxIterations, normalizeMaxIterations, readAgentLoopSettings, } from "./loop-settings.js";
20
20
  import { isReasoningEffort, normalizeReasoningEffortForModel, } from "../shared/reasoning-effort.js";
21
21
  import { isAgentActionStopError } from "../action.js";
22
- import { writeLedgerEntry, readLedgerEntry, clearLedgerForThread, } from "./run-store.js";
22
+ import { writeLedgerEntry, readLedgerEntry, clearLedgerForThread, getCurrentTurnEventsForThread, } from "./run-store.js";
23
+ import { classifyToolCallJournal, findCompletedJournalEntry, } from "./tool-call-journal.js";
23
24
  import { preUploadAttachments } from "../file-upload/pre-upload-attachments.js";
24
25
  import { extensionIdFromPathname } from "../extensions/path.js";
25
26
  import { applyContextDirectives } from "./context-xray/apply-directives.js";
@@ -27,6 +28,7 @@ import { completeRun as completeProgressRun, startRun as startProgressRun, updat
27
28
  import { loadContextDirectives } from "./context-xray/directives-store.js";
28
29
  import { buildManifest, writeContextManifest, } from "./context-xray/manifest.js";
29
30
  import { computeProtectedSegmentIds } from "./context-xray/segments.js";
31
+ import { maybeCompactThread, buildObservationalContext, hasObservationalMemory, serializeObservationalMemoryBlock, } from "./observational-memory/index.js";
30
32
  // Register built-in engines on first import
31
33
  registerBuiltinEngines();
32
34
  export { PROVIDER_TO_ENV };
@@ -1065,6 +1067,84 @@ function findCurrentTurnStartForContinuation(messages) {
1065
1067
  }
1066
1068
  return 0;
1067
1069
  }
1070
+ /**
1071
+ * First message index that is safe to start a trimmed window on. A window must
1072
+ * not begin with a tool-result-only user message — that would orphan it from
1073
+ * the assistant tool-call turn it answers and break Anthropic's tool_use /
1074
+ * tool_result pairing. We walk forward from `desiredStart` to the first
1075
+ * non-orphaned boundary; if none exists we refuse to trim (return -1).
1076
+ */
1077
+ function findSafeWindowStart(messages, desiredStart) {
1078
+ for (let i = Math.max(0, desiredStart); i < messages.length; i++) {
1079
+ if (!isToolResultOnlyUserMessage(messages[i]))
1080
+ return i;
1081
+ }
1082
+ return -1;
1083
+ }
1084
+ /**
1085
+ * Observational Memory consumer (threshold-gated, conservative).
1086
+ *
1087
+ * Builds the three-tier OM context for a thread and, ONLY when the thread has
1088
+ * already crossed the compaction threshold (i.e. it has at least one persisted
1089
+ * observation/reflection), returns a rewritten message list that:
1090
+ * - prepends a single system-role "Observational Memory" block holding the
1091
+ * reflections + observations, and
1092
+ * - replaces the raw older history with just the recent-raw-message window,
1093
+ * keeping the current user turn and any pending tool results intact.
1094
+ *
1095
+ * For threads with NO OM entries (every short thread) it returns the input
1096
+ * array unchanged by reference, so the common path is byte-for-byte identical.
1097
+ *
1098
+ * Best-effort: any failure returns the input unchanged so OM can never break a
1099
+ * normal turn.
1100
+ */
1101
+ async function applyObservationalMemoryToContext(messages, opts) {
1102
+ if (!opts.ownerEmail)
1103
+ return messages;
1104
+ try {
1105
+ const context = await buildObservationalContext({
1106
+ threadId: opts.threadId,
1107
+ ownerEmail: opts.ownerEmail,
1108
+ orgId: opts.orgId ?? null,
1109
+ messages,
1110
+ });
1111
+ // No compacted memory yet → short thread, leave context untouched.
1112
+ if (!hasObservationalMemory(context))
1113
+ return messages;
1114
+ const block = serializeObservationalMemoryBlock(context);
1115
+ if (!block.trim())
1116
+ return messages;
1117
+ // EngineMessage has no "system" role; the framework injects auxiliary
1118
+ // context as leading user messages (same convention as the continuation
1119
+ // nudge and the resume journal note), and the serialized block is clearly
1120
+ // self-labeled "[Observational Memory]".
1121
+ const omMessage = {
1122
+ role: "user",
1123
+ content: [{ type: "text", text: block }],
1124
+ };
1125
+ // Trim the raw prefix to only the recent-raw window. The window is the tail
1126
+ // of `messages`, so it always contains the latest user turn and any pending
1127
+ // tool results. Guard the boundary so we never start mid tool_use/result
1128
+ // pair; if a safe boundary can't be found, additively inject the memory
1129
+ // block WITHOUT trimming (the conservative fallback) so we never drop a
1130
+ // pending tool result.
1131
+ const recentCount = context.recentMessages.length;
1132
+ if (recentCount === 0 || recentCount >= messages.length) {
1133
+ return [omMessage, ...messages];
1134
+ }
1135
+ const desiredStart = messages.length - recentCount;
1136
+ const safeStart = findSafeWindowStart(messages, desiredStart);
1137
+ if (safeStart < 0) {
1138
+ // Whole tail is tool-result-only (degenerate) — don't trim.
1139
+ return [omMessage, ...messages];
1140
+ }
1141
+ return [omMessage, ...messages.slice(safeStart)];
1142
+ }
1143
+ catch (err) {
1144
+ console.warn("[observational-memory] context injection skipped:", err instanceof Error ? err.message : String(err));
1145
+ return messages;
1146
+ }
1147
+ }
1068
1148
  function seedReadOnlyToolResultsFromHistory(messages, actions) {
1069
1149
  const cache = new Map();
1070
1150
  if (!isInternalContinuationTurn(messages))
@@ -1421,6 +1501,30 @@ export async function runAgentLoop(opts) {
1421
1501
  const readOnlyToolResultCache = seedReadOnlyToolResultsFromHistory(messages, actions);
1422
1502
  const duplicateReadOnlyToolCalls = new Map();
1423
1503
  const writeToolInterruptions = seedWriteToolInterruptionsFromHistory(messages, actions);
1504
+ // Tool-call journal hard-block (resume safety). Snapshot the per-turn journal
1505
+ // ONCE here, before any tool runs in this chunk, so it reflects only PRIOR
1506
+ // run chunks of this logical turn. A write tool whose exact call already
1507
+ // completed in an earlier interrupted chunk must not re-fire its side effect;
1508
+ // when matched, runToolCall returns the journaled result instead of executing.
1509
+ // Loaded eagerly (not lazily mid-loop) so the current chunk's own
1510
+ // asynchronously-persisted tool_done events can never leak in and make a
1511
+ // same-chunk call wrongly short-circuit. Best-effort: any ledger failure
1512
+ // leaves the journal empty and all calls run normally. Fresh first-turn calls
1513
+ // see an empty journal and are unaffected.
1514
+ let toolCallJournal = null;
1515
+ const consumedJournalKeys = new Set();
1516
+ if (opts.threadId) {
1517
+ try {
1518
+ const priorEvents = await getCurrentTurnEventsForThread(opts.threadId);
1519
+ if (priorEvents.length > 0) {
1520
+ toolCallJournal = classifyToolCallJournal(priorEvents);
1521
+ }
1522
+ }
1523
+ catch {
1524
+ // Journal is a hardening layer, never a gate — a failed ledger read just
1525
+ // means no hard-block this turn.
1526
+ }
1527
+ }
1424
1528
  const bufferTextUntilFinalGuard = Boolean(opts.finalResponseGuard);
1425
1529
  let finalGuardRetries = 0;
1426
1530
  let iterations = 0;
@@ -1465,6 +1569,21 @@ export async function runAgentLoop(opts) {
1465
1569
  catch (err) {
1466
1570
  console.warn("[context-xray] context transform skipped:", err instanceof Error ? err.message : String(err));
1467
1571
  }
1572
+ // Observational Memory (consumer): for long threads that have already been
1573
+ // compacted, fold the reflections+observations in as a leading context
1574
+ // block and prefer the recent-raw-message window over the full raw
1575
+ // history. No-op (returns the same array) for short threads with no OM
1576
+ // entries, so the common path is unchanged. Runs after the context-xray
1577
+ // transform so the two compose; best-effort inside the helper. Gated on an
1578
+ // authenticated owner so anonymous threads never read OM scoped to a
1579
+ // shared default identity.
1580
+ if (opts.ownerEmail) {
1581
+ contextMessages = await applyObservationalMemoryToContext(contextMessages, {
1582
+ threadId: opts.threadId,
1583
+ ownerEmail: opts.ownerEmail,
1584
+ orgId: opts.orgId ?? null,
1585
+ });
1586
+ }
1468
1587
  }
1469
1588
  for (let retry = 0;; retry++) {
1470
1589
  assistantContent = undefined;
@@ -1690,6 +1809,11 @@ export async function runAgentLoop(opts) {
1690
1809
  finalGuardRetries = 0;
1691
1810
  flushBufferedAssistantText();
1692
1811
  let requestedActionStop = null;
1812
+ // Human-in-the-loop approvals granted by the user for this turn (opt-in;
1813
+ // empty for the overwhelming majority of turns). Keyed by the stable
1814
+ // tool-call approval key so a re-issued continuation can let an approved
1815
+ // call run. The model cannot populate this — it comes from the request.
1816
+ const approvedToolCallKeys = new Set(opts.approvedToolCalls ?? []);
1693
1817
  const runToolCall = async (toolCall) => {
1694
1818
  const wireToolInput = JSON.stringify(toolCall.input ?? {});
1695
1819
  const normalizedToolInput = normalizeToolCallInputForHistory(toolCall.input);
@@ -1774,6 +1898,60 @@ export async function runAgentLoop(opts) {
1774
1898
  isError: true,
1775
1899
  };
1776
1900
  }
1901
+ // Human-in-the-loop approval gate (opt-in via defineAction
1902
+ // `needsApproval`; default off). When an action requires approval and
1903
+ // this specific call has NOT been approved by a human, pause the turn
1904
+ // instead of executing. The action's side effect never happens until a
1905
+ // human re-issues the turn approving this call's stable key.
1906
+ const approvalKey = toolCallCacheKey(toolCall.name, toolCall.input);
1907
+ if (actionEntry.needsApproval && !approvedToolCallKeys.has(approvalKey)) {
1908
+ let mustApprove = false;
1909
+ try {
1910
+ mustApprove =
1911
+ typeof actionEntry.needsApproval === "function"
1912
+ ? Boolean(await actionEntry.needsApproval(toolCall.input, {
1913
+ userEmail: getRequestUserEmail(),
1914
+ orgId: getRequestOrgId() ?? null,
1915
+ caller: "tool",
1916
+ }))
1917
+ : actionEntry.needsApproval === true;
1918
+ }
1919
+ catch {
1920
+ // Fail closed: a throwing predicate means we require approval rather
1921
+ // than silently running a high-consequence action.
1922
+ mustApprove = true;
1923
+ }
1924
+ if (mustApprove) {
1925
+ send({
1926
+ type: "tool_start",
1927
+ tool: toolCall.name,
1928
+ input: toolCall.input,
1929
+ });
1930
+ send({
1931
+ type: "approval_required",
1932
+ tool: toolCall.name,
1933
+ input: toolCall.input,
1934
+ approvalKey,
1935
+ ...(toolCall.id ? { toolCallId: toolCall.id } : {}),
1936
+ });
1937
+ const result = `Awaiting human approval to run "${toolCall.name}". This action did ` +
1938
+ `NOT execute — a human must approve this specific call before it ` +
1939
+ `can run. The turn is paused; do not retry.`;
1940
+ send({ type: "tool_done", tool: toolCall.name, result });
1941
+ recordToolResult(result, false);
1942
+ requestedActionStop ??= {
1943
+ message: `Waiting for your approval to run ${toolCall.name}.`,
1944
+ errorCode: "needs-approval",
1945
+ };
1946
+ return {
1947
+ type: "tool-result",
1948
+ toolCallId: toolCall.id,
1949
+ toolName: toolCall.name,
1950
+ toolInput: wireToolInput,
1951
+ content: result,
1952
+ };
1953
+ }
1954
+ }
1777
1955
  const cacheKey = actionEntry.readOnly === true
1778
1956
  ? toolCallCacheKey(toolCall.name, toolCall.input)
1779
1957
  : null;
@@ -1805,6 +1983,40 @@ export async function runAgentLoop(opts) {
1805
1983
  content: result,
1806
1984
  };
1807
1985
  }
1986
+ // TOOL-CALL JOURNAL HARD-BLOCK (resume safety, tool-layer enforcement).
1987
+ // The prompt-level resume journal already TELLS a resuming model not to
1988
+ // re-run completed tool calls; this enforces it at the tool layer so a
1989
+ // re-dispatched write call whose exact (tool name + input) already
1990
+ // completed in an earlier interrupted chunk of this turn does NOT execute
1991
+ // its side effect again — we return the journaled result instead and emit
1992
+ // the normal tool_start/tool_done so the transcript stays coherent.
1993
+ //
1994
+ // Gated on a non-readOnly tool + an existing prior-chunk journal (so fresh
1995
+ // calls with no completed journal entry are completely unaffected). The
1996
+ // snapshot was taken before this chunk's tools ran, so it can only match a
1997
+ // PRIOR completion, never one from the current chunk.
1998
+ if (!actionEntry.readOnly && toolCallJournal) {
1999
+ const journaled = findCompletedJournalEntry(toolCallJournal, toolCall.name, toolCall.input, consumedJournalKeys);
2000
+ if (journaled) {
2001
+ const recordedResult = journaled.result ?? "";
2002
+ const result = `(Already completed in an earlier interrupted attempt - not re-run to avoid a duplicate side effect.)\n\n` +
2003
+ recordedResult;
2004
+ send({
2005
+ type: "tool_start",
2006
+ tool: toolCall.name,
2007
+ input: toolCall.input,
2008
+ });
2009
+ send({ type: "tool_done", tool: toolCall.name, result });
2010
+ recordToolResult(result, false);
2011
+ return {
2012
+ type: "tool-result",
2013
+ toolCallId: toolCall.id,
2014
+ toolName: toolCall.name,
2015
+ toolInput: wireToolInput,
2016
+ content: result,
2017
+ };
2018
+ }
2019
+ }
1808
2020
  // Guard against write tools that have been interrupted too many times in
1809
2021
  // this turn (connection drop mid-execution → agent retries → repeat).
1810
2022
  // A write tool that keeps failing likely has a timeout / large-payload
@@ -2124,6 +2336,23 @@ export async function runAgentLoop(opts) {
2124
2336
  // intact so the next continuation chunk can still recover from it.
2125
2337
  if (opts.threadId) {
2126
2338
  void clearLedgerForThread(opts.threadId).catch(() => { });
2339
+ // Observational Memory (producer): after a clean turn, run a best-effort
2340
+ // compaction pass so long threads accrue observations/reflections that the
2341
+ // consumer above will surface on later turns. Both the Observer and the
2342
+ // Reflector no-op below their token thresholds, so this is cheap for short
2343
+ // threads. Fire-and-forget; any failure is swallowed so OM never affects
2344
+ // the user-visible turn.
2345
+ if (opts.ownerEmail) {
2346
+ const compactThreadId = opts.threadId;
2347
+ void maybeCompactThread({
2348
+ threadId: compactThreadId,
2349
+ ownerEmail: opts.ownerEmail,
2350
+ orgId: opts.orgId ?? null,
2351
+ messages,
2352
+ }).catch((err) => {
2353
+ console.warn("[observational-memory] post-turn compaction skipped:", err instanceof Error ? err.message : String(err));
2354
+ });
2355
+ }
2127
2356
  }
2128
2357
  }
2129
2358
  return usage;
@@ -2998,6 +3227,16 @@ export function createProductionAgentHandler(options) {
2998
3227
  ...(threadId
2999
3228
  ? { threadId: effectiveThreadId, turnId: effectiveTurnId }
3000
3229
  : {}),
3230
+ // Human-in-the-loop approval grants for this turn (sanitized — the
3231
+ // request is untrusted; accept only a bounded list of string keys).
3232
+ ...(Array.isArray(body.approvedToolCalls) &&
3233
+ body.approvedToolCalls.length
3234
+ ? {
3235
+ approvedToolCalls: body.approvedToolCalls
3236
+ .filter((k) => typeof k === "string")
3237
+ .slice(0, 200),
3238
+ }
3239
+ : {}),
3001
3240
  };
3002
3241
  send({ type: "activity", label: "Contacting model" });
3003
3242
  // loopUsage is always assigned — either via instrumentAgentLoop or