@namzu/sdk 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/CHANGELOG.md +241 -0
  2. package/dist/advisory/executor.d.ts.map +1 -1
  3. package/dist/advisory/executor.js +3 -2
  4. package/dist/advisory/executor.js.map +1 -1
  5. package/dist/advisory/executor.test.js +36 -14
  6. package/dist/advisory/executor.test.js.map +1 -1
  7. package/dist/agents/ReactiveAgent.d.ts.map +1 -1
  8. package/dist/agents/ReactiveAgent.js +1 -0
  9. package/dist/agents/ReactiveAgent.js.map +1 -1
  10. package/dist/agents/RouterAgent.d.ts.map +1 -1
  11. package/dist/agents/RouterAgent.js +3 -2
  12. package/dist/agents/RouterAgent.js.map +1 -1
  13. package/dist/agents/SupervisorAgent.d.ts.map +1 -1
  14. package/dist/agents/SupervisorAgent.js +2 -0
  15. package/dist/agents/SupervisorAgent.js.map +1 -1
  16. package/dist/bridge/a2a/mapper.d.ts.map +1 -1
  17. package/dist/bridge/a2a/mapper.js +23 -9
  18. package/dist/bridge/a2a/mapper.js.map +1 -1
  19. package/dist/bridge/a2a/mapper.test.js +35 -9
  20. package/dist/bridge/a2a/mapper.test.js.map +1 -1
  21. package/dist/bridge/sse/mapper.d.ts.map +1 -1
  22. package/dist/bridge/sse/mapper.js +60 -8
  23. package/dist/bridge/sse/mapper.js.map +1 -1
  24. package/dist/bridge/sse/mapper.test.js +123 -16
  25. package/dist/bridge/sse/mapper.test.js.map +1 -1
  26. package/dist/compaction/verifier.d.ts.map +1 -1
  27. package/dist/compaction/verifier.js +3 -2
  28. package/dist/compaction/verifier.js.map +1 -1
  29. package/dist/config/runtime.d.ts +14 -14
  30. package/dist/config/runtime.js +1 -1
  31. package/dist/config/runtime.js.map +1 -1
  32. package/dist/contracts/api.d.ts +1 -1
  33. package/dist/contracts/api.d.ts.map +1 -1
  34. package/dist/contracts/schemas.js +1 -1
  35. package/dist/contracts/schemas.js.map +1 -1
  36. package/dist/gateway/local.d.ts +1 -1
  37. package/dist/gateway/local.d.ts.map +1 -1
  38. package/dist/gateway/local.js +1 -0
  39. package/dist/gateway/local.js.map +1 -1
  40. package/dist/manager/agent/__tests__/lifecycle.test.js +2 -2
  41. package/dist/provider/collect.d.ts +25 -0
  42. package/dist/provider/collect.d.ts.map +1 -0
  43. package/dist/provider/collect.js +82 -0
  44. package/dist/provider/collect.js.map +1 -0
  45. package/dist/provider/collect.test.d.ts +22 -0
  46. package/dist/provider/collect.test.d.ts.map +1 -0
  47. package/dist/provider/collect.test.js +123 -0
  48. package/dist/provider/collect.test.js.map +1 -0
  49. package/dist/provider/instrumentation.d.ts.map +1 -1
  50. package/dist/provider/instrumentation.js +10 -43
  51. package/dist/provider/instrumentation.js.map +1 -1
  52. package/dist/provider/instrumentation.test.d.ts +15 -0
  53. package/dist/provider/instrumentation.test.d.ts.map +1 -1
  54. package/dist/provider/instrumentation.test.js +73 -87
  55. package/dist/provider/instrumentation.test.js.map +1 -1
  56. package/dist/provider/mock.d.ts +1 -2
  57. package/dist/provider/mock.d.ts.map +1 -1
  58. package/dist/provider/mock.js +2 -5
  59. package/dist/provider/mock.js.map +1 -1
  60. package/dist/public-runtime.d.ts +1 -0
  61. package/dist/public-runtime.d.ts.map +1 -1
  62. package/dist/public-runtime.js +5 -0
  63. package/dist/public-runtime.js.map +1 -1
  64. package/dist/run/LimitChecker.test.d.ts +2 -0
  65. package/dist/run/LimitChecker.test.d.ts.map +1 -0
  66. package/dist/run/LimitChecker.test.js +26 -0
  67. package/dist/run/LimitChecker.test.js.map +1 -0
  68. package/dist/run/reporter.d.ts.map +1 -1
  69. package/dist/run/reporter.js +10 -6
  70. package/dist/run/reporter.js.map +1 -1
  71. package/dist/runtime/query/__tests__/prompt.test.d.ts +2 -0
  72. package/dist/runtime/query/__tests__/prompt.test.d.ts.map +1 -0
  73. package/dist/runtime/query/__tests__/prompt.test.js +35 -0
  74. package/dist/runtime/query/__tests__/prompt.test.js.map +1 -0
  75. package/dist/runtime/query/context-cache.d.ts +2 -0
  76. package/dist/runtime/query/context-cache.d.ts.map +1 -1
  77. package/dist/runtime/query/context-cache.js +3 -0
  78. package/dist/runtime/query/context-cache.js.map +1 -1
  79. package/dist/runtime/query/events.d.ts +2 -0
  80. package/dist/runtime/query/events.d.ts.map +1 -1
  81. package/dist/runtime/query/events.js +48 -1
  82. package/dist/runtime/query/events.js.map +1 -1
  83. package/dist/runtime/query/executor.d.ts.map +1 -1
  84. package/dist/runtime/query/executor.js +55 -5
  85. package/dist/runtime/query/executor.js.map +1 -1
  86. package/dist/runtime/query/index.d.ts +2 -1
  87. package/dist/runtime/query/index.d.ts.map +1 -1
  88. package/dist/runtime/query/index.js +2 -0
  89. package/dist/runtime/query/index.js.map +1 -1
  90. package/dist/runtime/query/iteration/index.d.ts.map +1 -1
  91. package/dist/runtime/query/iteration/index.js +245 -13
  92. package/dist/runtime/query/iteration/index.js.map +1 -1
  93. package/dist/runtime/query/iteration/phases/compaction.d.ts.map +1 -1
  94. package/dist/runtime/query/iteration/phases/compaction.js +2 -0
  95. package/dist/runtime/query/iteration/phases/compaction.js.map +1 -1
  96. package/dist/runtime/query/prompt.d.ts +2 -0
  97. package/dist/runtime/query/prompt.d.ts.map +1 -1
  98. package/dist/runtime/query/prompt.js +35 -13
  99. package/dist/runtime/query/prompt.js.map +1 -1
  100. package/dist/session/__tests__/integration/e2e-spawn.test.js +2 -2
  101. package/dist/session/__tests__/integration/event-stream-ordering.test.d.ts +1 -1
  102. package/dist/session/__tests__/integration/event-stream-ordering.test.js +7 -7
  103. package/dist/streaming/coalesce.d.ts +28 -0
  104. package/dist/streaming/coalesce.d.ts.map +1 -0
  105. package/dist/streaming/coalesce.js +75 -0
  106. package/dist/streaming/coalesce.js.map +1 -0
  107. package/dist/streaming/coalesce.test.d.ts +19 -0
  108. package/dist/streaming/coalesce.test.d.ts.map +1 -0
  109. package/dist/streaming/coalesce.test.js +120 -0
  110. package/dist/streaming/coalesce.test.js.map +1 -0
  111. package/dist/tools/coordinator/index.d.ts +2 -0
  112. package/dist/tools/coordinator/index.d.ts.map +1 -1
  113. package/dist/tools/coordinator/index.js +1 -0
  114. package/dist/tools/coordinator/index.js.map +1 -1
  115. package/dist/types/agent/base.d.ts +7 -0
  116. package/dist/types/agent/base.d.ts.map +1 -1
  117. package/dist/types/agent/gateway.d.ts +2 -1
  118. package/dist/types/agent/gateway.d.ts.map +1 -1
  119. package/dist/types/ids/index.d.ts +10 -0
  120. package/dist/types/ids/index.d.ts.map +1 -1
  121. package/dist/types/ids/index.js.map +1 -1
  122. package/dist/types/provider/interface.d.ts +26 -2
  123. package/dist/types/provider/interface.d.ts.map +1 -1
  124. package/dist/types/provider/stream.d.ts +18 -0
  125. package/dist/types/provider/stream.d.ts.map +1 -1
  126. package/dist/types/run/events.d.ts +58 -8
  127. package/dist/types/run/events.d.ts.map +1 -1
  128. package/dist/types/run/events.js +23 -1
  129. package/dist/types/run/events.js.map +1 -1
  130. package/dist/types/run/schema-version.d.ts +7 -1
  131. package/dist/types/run/schema-version.d.ts.map +1 -1
  132. package/dist/types/run/schema-version.js +7 -1
  133. package/dist/types/run/schema-version.js.map +1 -1
  134. package/dist/types/run/stop-reason.d.ts +9 -0
  135. package/dist/types/run/stop-reason.d.ts.map +1 -1
  136. package/package.json +1 -1
  137. package/src/advisory/executor.test.ts +37 -15
  138. package/src/advisory/executor.ts +10 -7
  139. package/src/agents/ReactiveAgent.ts +1 -0
  140. package/src/agents/RouterAgent.ts +9 -6
  141. package/src/agents/SupervisorAgent.ts +2 -0
  142. package/src/bridge/a2a/mapper.test.ts +35 -9
  143. package/src/bridge/a2a/mapper.ts +23 -9
  144. package/src/bridge/sse/mapper.test.ts +152 -24
  145. package/src/bridge/sse/mapper.ts +66 -9
  146. package/src/compaction/verifier.ts +9 -6
  147. package/src/config/runtime.ts +1 -1
  148. package/src/contracts/api.ts +7 -0
  149. package/src/contracts/schemas.ts +1 -1
  150. package/src/gateway/local.ts +3 -2
  151. package/src/manager/agent/__tests__/lifecycle.test.ts +2 -2
  152. package/src/provider/collect.test.ts +142 -0
  153. package/src/provider/collect.ts +85 -0
  154. package/src/provider/instrumentation.test.ts +81 -100
  155. package/src/provider/instrumentation.ts +11 -53
  156. package/src/provider/mock.ts +2 -6
  157. package/src/public-runtime.ts +6 -0
  158. package/src/run/LimitChecker.test.ts +32 -0
  159. package/src/run/reporter.ts +10 -7
  160. package/src/runtime/query/__tests__/prompt.test.ts +38 -0
  161. package/src/runtime/query/context-cache.ts +5 -0
  162. package/src/runtime/query/events.ts +52 -1
  163. package/src/runtime/query/executor.ts +54 -5
  164. package/src/runtime/query/index.ts +5 -1
  165. package/src/runtime/query/iteration/index.ts +301 -26
  166. package/src/runtime/query/iteration/phases/compaction.ts +2 -0
  167. package/src/runtime/query/prompt.ts +45 -17
  168. package/src/session/__tests__/integration/e2e-spawn.test.ts +2 -2
  169. package/src/session/__tests__/integration/event-stream-ordering.test.ts +7 -7
  170. package/src/streaming/coalesce.test.ts +132 -0
  171. package/src/streaming/coalesce.ts +89 -0
  172. package/src/tools/coordinator/index.ts +3 -0
  173. package/src/types/agent/base.ts +9 -0
  174. package/src/types/agent/gateway.ts +3 -1
  175. package/src/types/ids/index.ts +10 -0
  176. package/src/types/provider/interface.ts +28 -3
  177. package/src/types/provider/stream.ts +18 -0
  178. package/src/types/run/events.ts +105 -9
  179. package/src/types/run/schema-version.ts +7 -1
  180. package/src/types/run/stop-reason.ts +17 -0
@@ -126,10 +126,27 @@ export class ToolExecutor {
126
126
  try {
127
127
  input = JSON.parse(toolCall.function.arguments)
128
128
  } catch {
129
- return {
130
- toolCallId: toolCall.id,
131
- output: `Error: Invalid JSON in tool arguments for "${toolName}"`,
132
- }
129
+ // Codex M2: malformed JSON args used to return without ever
130
+ // emitting tool_executing or tool_completed, leaving UI cards
131
+ // orphaned in `streaming_input`. Emit the executing→completed
132
+ // terminal pair so the card lifecycle closes.
133
+ const message = `Error: Invalid JSON in tool arguments for "${toolName}"`
134
+ await this.emitEvent({
135
+ type: 'tool_executing',
136
+ runId: this.config.runId,
137
+ toolUseId: toolCall.id,
138
+ toolName,
139
+ input: {},
140
+ })
141
+ await this.emitEvent({
142
+ type: 'tool_completed',
143
+ runId: this.config.runId,
144
+ toolUseId: toolCall.id,
145
+ toolName,
146
+ result: message,
147
+ isError: true,
148
+ })
149
+ return { toolCallId: toolCall.id, output: message }
133
150
  }
134
151
 
135
152
  const preOutcome = await this.runPreToolHook(toolName, input)
@@ -152,6 +169,7 @@ export class ToolExecutor {
152
169
  await this.emitEvent({
153
170
  type: 'tool_executing',
154
171
  runId: this.config.runId,
172
+ toolUseId: toolCall.id,
155
173
  toolName,
156
174
  input,
157
175
  })
@@ -160,6 +178,7 @@ export class ToolExecutor {
160
178
  {
161
179
  type: 'tool_executing',
162
180
  runId: this.config.runId,
181
+ toolUseId: toolCall.id,
163
182
  toolName,
164
183
  input,
165
184
  },
@@ -178,6 +197,16 @@ export class ToolExecutor {
178
197
  if (activity) {
179
198
  this.activityStore.fail(activity.id, veto.message)
180
199
  }
200
+ // Codex M1: probe veto used to skip tool_completed entirely.
201
+ // Emit the terminal event with isError so UI cards finalize.
202
+ await this.emitEvent({
203
+ type: 'tool_completed',
204
+ runId: this.config.runId,
205
+ toolUseId: toolCall.id,
206
+ toolName,
207
+ result: `Error: ${veto.message}`,
208
+ isError: true,
209
+ })
181
210
  return {
182
211
  toolCallId: toolCall.id,
183
212
  output: `Error: ${veto.message}`,
@@ -189,7 +218,22 @@ export class ToolExecutor {
189
218
  }
190
219
 
191
220
  const startMs = Date.now()
192
- const result = await this.config.tools.execute(toolName, input, toolContext)
221
+ // Codex M4: an unhandled throw from `tools.execute(...)` used to
222
+ // propagate up to `result.ts` as `run_failed` without emitting a
223
+ // terminal `tool_completed`, leaving UI cards stuck in `executing`.
224
+ // Wrap so any throw materialises as an error result.
225
+ let result: { success: boolean; output: string; error?: string }
226
+ try {
227
+ result = await this.config.tools.execute(toolName, input, toolContext)
228
+ } catch (err) {
229
+ const message = err instanceof Error ? err.message : String(err)
230
+ this.log.warn('Tool execution threw', {
231
+ runId: this.config.runId,
232
+ tool: toolName,
233
+ error: message,
234
+ })
235
+ result = { success: false, output: '', error: message }
236
+ }
193
237
  const durationMs = Date.now() - startMs
194
238
 
195
239
  const rawOutput = result.success
@@ -236,8 +280,10 @@ export class ToolExecutor {
236
280
  await this.emitEvent({
237
281
  type: 'tool_completed',
238
282
  runId: this.config.runId,
283
+ toolUseId: toolCall.id,
239
284
  toolName,
240
285
  result: output,
286
+ isError: effectiveIsError,
241
287
  })
242
288
 
243
289
  return { toolCallId: toolCall.id, output }
@@ -351,14 +397,17 @@ export class ToolExecutor {
351
397
  await this.emitEvent({
352
398
  type: 'tool_executing',
353
399
  runId: this.config.runId,
400
+ toolUseId: toolCallId,
354
401
  toolName,
355
402
  input,
356
403
  })
357
404
  await this.emitEvent({
358
405
  type: 'tool_completed',
359
406
  runId: this.config.runId,
407
+ toolUseId: toolCallId,
360
408
  toolName,
361
409
  result: outcome.output,
410
+ isError: outcome.kind === 'error',
362
411
  })
363
412
  return { toolCallId, output: outcome.output }
364
413
  }
@@ -14,7 +14,7 @@ import { buildAdvisoryTools } from '../../tools/advisory/index.js'
14
14
  import { SearchToolsTool } from '../../tools/builtins/search-tools.js'
15
15
  import { buildTaskTools } from '../../tools/task/index.js'
16
16
  import type { AdvisoryConfig } from '../../types/advisory/index.js'
17
- import type { RuntimeToolOverrides } from '../../types/agent/base.js'
17
+ import type { AgentRuntimeContext, RuntimeToolOverrides } from '../../types/agent/base.js'
18
18
  import type { AgentContextLevel } from '../../types/agent/factory.js'
19
19
  import {
20
20
  type CheckpointId,
@@ -108,6 +108,8 @@ export interface QueryParams {
108
108
 
109
109
  runtimeToolOverrides?: RuntimeToolOverrides
110
110
 
111
+ runtimeContext?: AgentRuntimeContext
112
+
111
113
  taskGateway?: import('../../types/agent/gateway.js').TaskGateway
112
114
 
113
115
  launchedTasks?: Map<
@@ -251,6 +253,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
251
253
  basePrompt: params.basePrompt,
252
254
  tools: params.tools,
253
255
  allowedTools: params.allowedTools,
256
+ runtimeContext: params.runtimeContext,
254
257
  })
255
258
 
256
259
  const guard = new GuardCoordinator({
@@ -368,6 +371,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
368
371
  basePrompt: contextLevel === 'full' ? params.basePrompt : undefined,
369
372
  tools: params.tools,
370
373
  allowedTools: params.allowedTools,
374
+ runtimeContext: params.runtimeContext,
371
375
  }
372
376
 
373
377
  const segments: PromptSegments = params.contextCache
@@ -5,15 +5,23 @@ import type { WorkingStateManager } from '../../../compaction/manager.js'
5
5
  import type { CompactionConfig } from '../../../config/runtime.js'
6
6
  import type { PlanManager } from '../../../manager/plan/lifecycle.js'
7
7
  import type { RunPersistence } from '../../../manager/run/persistence.js'
8
+ import { collect } from '../../../provider/collect.js'
8
9
  import type { ActivityStore } from '../../../store/activity/memory.js'
9
10
  import { GENAI, NAMZU, agentIterationSpanName } from '../../../telemetry/attributes.js'
10
11
  import { getTracer } from '../../../telemetry/runtime-accessors.js'
11
12
  import type { ResumeHandler } from '../../../types/hitl/index.js'
13
+ import type { ToolUseId } from '../../../types/ids/index.js'
12
14
  import { createAssistantMessage, createUserMessage } from '../../../types/message/index.js'
13
- import type { LLMProvider } from '../../../types/provider/index.js'
15
+ import type {
16
+ ChatCompletionResponse,
17
+ LLMProvider,
18
+ StreamChunk,
19
+ } from '../../../types/provider/index.js'
14
20
  import type { AgentRunConfig, RunEvent, StopReason } from '../../../types/run/index.js'
21
+ import type { MessageStopReason } from '../../../types/run/stop-reason.js'
15
22
  import type { ToolRegistryContract } from '../../../types/tool/index.js'
16
23
  import { toErrorMessage } from '../../../utils/error.js'
24
+ import { generateMessageId } from '../../../utils/id.js'
17
25
  import type { Logger } from '../../../utils/logger.js'
18
26
  import type { CheckpointManager } from '../checkpoint.js'
19
27
  import type { EmitEvent } from '../events.js'
@@ -50,6 +58,251 @@ export interface IterationConfig {
50
58
  pluginManager?: import('../../../plugin/lifecycle.js').PluginLifecycleManager
51
59
  }
52
60
 
61
+ /**
62
+ * Map a provider's coarse `finishReason` plus the orchestrator's
63
+ * `forceFinalize` flag onto the per-message {@link MessageStopReason}
64
+ * union the v3 `message_completed` event surfaces.
65
+ */
66
+ function synthesizeMessageStopReason(
67
+ finishReason: 'stop' | 'tool_calls' | 'length' | 'content_filter',
68
+ forceFinalize: boolean,
69
+ ): MessageStopReason {
70
+ if (forceFinalize) return 'forced_finalize'
71
+ switch (finishReason) {
72
+ case 'tool_calls':
73
+ return 'tool_use'
74
+ case 'length':
75
+ return 'max_tokens'
76
+ case 'content_filter':
77
+ return 'refusal'
78
+ default:
79
+ return 'end_turn'
80
+ }
81
+ }
82
+
83
+ interface StreamingTurnResult {
84
+ response: ChatCompletionResponse
85
+ messageId: import('../../../types/ids/index.js').MessageId
86
+ }
87
+
88
+ /**
89
+ * Consume a provider's streaming response and emit the v3 RunEvent
90
+ * lifecycle natively (message_started → text_delta* + tool_input_*
91
+ * → message_completed). Returns the aggregated `ChatCompletionResponse`
92
+ * for downstream code that still expects the legacy shape (assistant
93
+ * message construction, working-state extraction, telemetry attribute
94
+ * stamping).
95
+ *
96
+ * Per-delta `emitEvent` calls are followed by a `drainPending()`
97
+ * yield so SSE consumers see live progress instead of a burst at
98
+ * end-of-message. The bus's ephemeral filter (D1) ensures these
99
+ * deltas never hit transcript.jsonl.
100
+ *
101
+ * Edge cases (codex A3, A4, A5):
102
+ * - Stream ends without `finishReason` (anthropic-sdk-typescript#842
103
+ * dropped message_stop): we still emit `message_completed` from a
104
+ * finally-style fall-through path with `stopReason: 'refusal'`.
105
+ * - `tool_input_delta` with no `toolUseId` registered yet: we drop
106
+ * the fragment and log a warning (proxies seen to misorder events).
107
+ * - `chunk.error`: we surface as a thrown error after emitting the
108
+ * message_completed terminator so consumer cards still close.
109
+ */
110
+ async function* streamProviderTurn(
111
+ provider: LLMProvider,
112
+ params: import('../../../types/provider/index.js').ChatCompletionParams,
113
+ emitEvent: EmitEvent,
114
+ drainPending: () => Generator<RunEvent>,
115
+ runId: import('../../../types/ids/index.js').RunId,
116
+ iteration: number,
117
+ forceFinalize: boolean,
118
+ log: Logger,
119
+ ): AsyncGenerator<RunEvent, StreamingTurnResult> {
120
+ const messageId = generateMessageId()
121
+ await emitEvent({ type: 'message_started', runId, iteration, messageId })
122
+ yield* drainPending()
123
+
124
+ let id = ''
125
+ const model = ''
126
+ let textBuf = ''
127
+ let finishReason: ChatCompletionResponse['finishReason'] = 'stop'
128
+ let usage: ChatCompletionResponse['usage'] = {
129
+ promptTokens: 0,
130
+ completionTokens: 0,
131
+ totalTokens: 0,
132
+ cachedTokens: 0,
133
+ cacheWriteTokens: 0,
134
+ }
135
+ const toolBuckets = new Map<
136
+ number,
137
+ { id: string; name: string; argsBuf: string; started: boolean; completed: boolean }
138
+ >()
139
+ let streamError: string | undefined
140
+
141
+ const stream = provider.chatStream({ ...params, stream: true }) as AsyncIterable<StreamChunk>
142
+
143
+ try {
144
+ for await (const chunk of stream) {
145
+ if (chunk.error) {
146
+ streamError = chunk.error
147
+ break
148
+ }
149
+ if (!id && chunk.id) id = chunk.id
150
+
151
+ if (chunk.delta.content) {
152
+ textBuf += chunk.delta.content
153
+ await emitEvent({
154
+ type: 'text_delta',
155
+ runId,
156
+ iteration,
157
+ messageId,
158
+ text: chunk.delta.content,
159
+ })
160
+ yield* drainPending()
161
+ }
162
+
163
+ for (const tc of chunk.delta.toolCalls ?? []) {
164
+ let bucket = toolBuckets.get(tc.index)
165
+ if (!bucket) {
166
+ bucket = {
167
+ id: tc.id ?? '',
168
+ name: tc.function?.name ?? '',
169
+ argsBuf: '',
170
+ started: false,
171
+ completed: false,
172
+ }
173
+ toolBuckets.set(tc.index, bucket)
174
+ }
175
+ if (tc.id && !bucket.id) bucket.id = tc.id
176
+ if (tc.function?.name && !bucket.name) bucket.name = tc.function.name
177
+
178
+ if (!bucket.started && bucket.id && bucket.name) {
179
+ bucket.started = true
180
+ await emitEvent({
181
+ type: 'tool_input_started',
182
+ runId,
183
+ iteration,
184
+ messageId,
185
+ toolUseId: bucket.id as ToolUseId,
186
+ toolName: bucket.name,
187
+ })
188
+ yield* drainPending()
189
+ }
190
+
191
+ const fragment = tc.function?.arguments
192
+ if (fragment) {
193
+ if (!bucket.id) {
194
+ log.warn('tool_input_delta arrived before tool id was known; dropping fragment', {
195
+ runId,
196
+ index: tc.index,
197
+ length: fragment.length,
198
+ })
199
+ } else {
200
+ bucket.argsBuf += fragment
201
+ await emitEvent({
202
+ type: 'tool_input_delta',
203
+ runId,
204
+ toolUseId: bucket.id as ToolUseId,
205
+ partialJson: fragment,
206
+ })
207
+ yield* drainPending()
208
+ }
209
+ }
210
+ }
211
+
212
+ if (chunk.delta.toolCallEnd) {
213
+ const { index, id: endId } = chunk.delta.toolCallEnd
214
+ const bucket = toolBuckets.get(index)
215
+ if (bucket && !bucket.completed) {
216
+ bucket.completed = true
217
+ let parsed: unknown = {}
218
+ try {
219
+ parsed = bucket.argsBuf ? JSON.parse(bucket.argsBuf) : {}
220
+ } catch (err) {
221
+ log.warn('tool input JSON parse failed at content_block_stop', {
222
+ runId,
223
+ toolUseId: endId,
224
+ error: err instanceof Error ? err.message : String(err),
225
+ })
226
+ }
227
+ await emitEvent({
228
+ type: 'tool_input_completed',
229
+ runId,
230
+ toolUseId: endId as ToolUseId,
231
+ input: parsed,
232
+ })
233
+ yield* drainPending()
234
+ }
235
+ }
236
+
237
+ if (chunk.finishReason) finishReason = chunk.finishReason
238
+ if (chunk.usage) usage = chunk.usage
239
+ }
240
+ } catch (err) {
241
+ streamError = err instanceof Error ? err.message : String(err)
242
+ }
243
+
244
+ // Flush any tool buckets the provider failed to close (no toolCallEnd
245
+ // arrived — defensive against providers that don't yet emit it).
246
+ for (const bucket of toolBuckets.values()) {
247
+ if (bucket.started && !bucket.completed) {
248
+ bucket.completed = true
249
+ let parsed: unknown = {}
250
+ try {
251
+ parsed = bucket.argsBuf ? JSON.parse(bucket.argsBuf) : {}
252
+ } catch {
253
+ // leave parsed = {}
254
+ }
255
+ await emitEvent({
256
+ type: 'tool_input_completed',
257
+ runId,
258
+ toolUseId: bucket.id as ToolUseId,
259
+ input: parsed,
260
+ })
261
+ yield* drainPending()
262
+ }
263
+ }
264
+
265
+ const stopReason: MessageStopReason = streamError
266
+ ? 'refusal'
267
+ : synthesizeMessageStopReason(finishReason, forceFinalize)
268
+
269
+ await emitEvent({
270
+ type: 'message_completed',
271
+ runId,
272
+ iteration,
273
+ messageId,
274
+ stopReason,
275
+ usage,
276
+ content: textBuf || undefined,
277
+ })
278
+ yield* drainPending()
279
+
280
+ if (streamError) {
281
+ throw new Error(`Provider stream error: ${streamError}`)
282
+ }
283
+
284
+ const toolCalls = [...toolBuckets.entries()]
285
+ .sort(([a], [b]) => a - b)
286
+ .map(([, b]) => ({
287
+ id: b.id,
288
+ type: 'function' as const,
289
+ function: { name: b.name, arguments: b.argsBuf },
290
+ }))
291
+
292
+ const response: ChatCompletionResponse = {
293
+ id: id || messageId,
294
+ model: model || params.model,
295
+ message: {
296
+ role: 'assistant',
297
+ content: textBuf.length > 0 ? textBuf : null,
298
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
299
+ },
300
+ finishReason,
301
+ usage,
302
+ }
303
+ return { response, messageId }
304
+ }
305
+
53
306
  export class IterationOrchestrator {
54
307
  private ctx: IterationContext
55
308
 
@@ -214,14 +467,31 @@ export class IterationOrchestrator {
214
467
  yield* this.ctx.drainPending()
215
468
  }
216
469
 
217
- const response = await this.ctx.provider.chat({
218
- model,
219
- messages,
220
- tools: openAITools && openAITools.length > 0 ? openAITools : undefined,
221
- temperature: runConfig.temperature,
222
- maxTokens: runConfig.maxResponseTokens,
223
- cacheControl: { type: 'auto' },
224
- })
470
+ // Phase 4 (ses_001-tool-stream-events): consume the
471
+ // streaming response natively, emitting message and
472
+ // tool-input lifecycle events as deltas arrive. The
473
+ // helper yields RunEvents through drainPending() so SSE
474
+ // consumers see live progress; its return value is the
475
+ // aggregated `ChatCompletionResponse` for the legacy
476
+ // downstream paths (assistantMsg construction, working
477
+ // state extraction, telemetry attribute stamping).
478
+ const { response } = yield* streamProviderTurn(
479
+ this.ctx.provider,
480
+ {
481
+ model,
482
+ messages,
483
+ tools: openAITools && openAITools.length > 0 ? openAITools : undefined,
484
+ temperature: runConfig.temperature,
485
+ maxTokens: runConfig.maxResponseTokens,
486
+ cacheControl: { type: 'auto' },
487
+ },
488
+ this.ctx.emitEvent,
489
+ this.ctx.drainPending,
490
+ runMgr.id,
491
+ iterationNum,
492
+ forceFinalize,
493
+ this.ctx.log,
494
+ )
225
495
 
226
496
  runMgr.accumulateUsage(response.usage)
227
497
 
@@ -268,13 +538,6 @@ export class IterationOrchestrator {
268
538
  )
269
539
  }
270
540
 
271
- await this.ctx.emitEvent({
272
- type: 'llm_response',
273
- runId: runMgr.id,
274
- content: response.message.content,
275
- hasToolCalls: forceFinalize ? false : !!response.message.toolCalls?.length,
276
- })
277
-
278
541
  yield* this.ctx.drainPending()
279
542
 
280
543
  iterSpan.setAttributes({
@@ -490,24 +753,36 @@ export class IterationOrchestrator {
490
753
  ),
491
754
  ]
492
755
 
493
- const response = await this.ctx.provider.chat({
494
- model,
495
- messages: finalMessages,
496
- temperature: this.ctx.runConfig.temperature,
497
- maxTokens: this.ctx.runConfig.maxResponseTokens,
498
- cacheControl: { type: 'auto' },
499
- })
756
+ const response = await collect(
757
+ this.ctx.provider.chatStream({
758
+ model,
759
+ messages: finalMessages,
760
+ temperature: this.ctx.runConfig.temperature,
761
+ maxTokens: this.ctx.runConfig.maxResponseTokens,
762
+ cacheControl: { type: 'auto' },
763
+ }),
764
+ )
500
765
 
501
766
  this.ctx.runMgr.accumulateUsage(response.usage)
502
767
 
503
768
  const assistantMsg = createAssistantMessage(response.message.content)
504
769
  this.ctx.runMgr.pushMessage(assistantMsg)
505
770
 
771
+ const finalMessageId = generateMessageId()
772
+ await this.ctx.emitEvent({
773
+ type: 'message_started',
774
+ runId: this.ctx.runMgr.id,
775
+ iteration: this.ctx.runMgr.currentIteration,
776
+ messageId: finalMessageId,
777
+ })
506
778
  await this.ctx.emitEvent({
507
- type: 'llm_response',
779
+ type: 'message_completed',
508
780
  runId: this.ctx.runMgr.id,
509
- content: response.message.content,
510
- hasToolCalls: false,
781
+ iteration: this.ctx.runMgr.currentIteration,
782
+ messageId: finalMessageId,
783
+ stopReason: 'forced_finalize',
784
+ usage: response.usage,
785
+ content: response.message.content ?? undefined,
511
786
  })
512
787
  } catch (err) {
513
788
  this.ctx.log.error('Failed to get final response', {
@@ -32,6 +32,8 @@ export async function runCompactionCheck(ctx: IterationContext): Promise<void> {
32
32
 
33
33
  const estimatedTokens = estimateTokens(ctx)
34
34
  const budget = ctx.runConfig.tokenBudget
35
+ if (budget <= 0) return
36
+
35
37
  const usage = estimatedTokens / budget
36
38
 
37
39
  if (usage < config.triggerThreshold) return
@@ -1,5 +1,6 @@
1
1
  import { FILESYSTEM_TOOLS } from '../../constants/tools/index.js'
2
2
  import { assembleSystemPrompt } from '../../persona/assembler.js'
3
+ import type { AgentRuntimeContext } from '../../types/agent/base.js'
3
4
  import type { AgentContextLevel } from '../../types/agent/factory.js'
4
5
  import type { AgentPersona } from '../../types/persona/index.js'
5
6
  import type { Skill } from '../../types/skills/index.js'
@@ -23,15 +24,40 @@ export interface PromptBuilderConfig {
23
24
 
24
25
  tools: ToolRegistryContract
25
26
  allowedTools?: string[]
27
+ runtimeContext?: AgentRuntimeContext
26
28
  }
27
29
 
28
- function buildEnvContext(workingDirectory: string): string {
29
- return `<env>
30
+ function buildEnvContext(workingDirectory: string, runtimeContext?: AgentRuntimeContext): string {
31
+ const lines = [
32
+ `<env>
30
33
  Working directory: ${workingDirectory}
31
- Platform: ${process.platform}
32
- </env>
34
+ Platform: ${process.platform}`,
35
+ ]
33
36
 
34
- IMPORTANT: Always use absolute paths based on the working directory above. Before reading a file, use the glob tool to discover actual file paths — never guess or hallucinate paths.`
37
+ if (runtimeContext?.label) {
38
+ lines.push(`Runtime: ${runtimeContext.label}`)
39
+ }
40
+
41
+ if (runtimeContext?.outputDirectory) {
42
+ lines.push(`Output directory: ${runtimeContext.outputDirectory}`)
43
+ }
44
+
45
+ if (runtimeContext?.outputFileMarker) {
46
+ lines.push(`Output file marker: ${runtimeContext.outputFileMarker}`)
47
+ }
48
+
49
+ if (runtimeContext?.notes?.length) {
50
+ lines.push('Runtime notes:')
51
+ for (const note of runtimeContext.notes) {
52
+ lines.push(`- ${note}`)
53
+ }
54
+ }
55
+
56
+ lines.push(`</env>
57
+
58
+ IMPORTANT: Always use absolute paths based on the working directory above. Before reading a file, use the glob tool to discover actual file paths — never guess or hallucinate paths.`)
59
+
60
+ return lines.join('\n')
35
61
  }
36
62
 
37
63
  function hasFilesystemTools(tools: ToolRegistryContract, allowedTools?: string[]): boolean {
@@ -71,12 +97,13 @@ export class PromptBuilder {
71
97
  }
72
98
  }
73
99
 
74
- if (
75
- contextLevel !== 'minimal' &&
76
- workingDirectory &&
77
- hasFilesystemTools(this.config.tools, this.config.allowedTools)
78
- ) {
79
- parts.push(buildEnvContext(workingDirectory))
100
+ if (contextLevel !== 'minimal' && workingDirectory) {
101
+ const shouldIncludeEnv =
102
+ hasFilesystemTools(this.config.tools, this.config.allowedTools) ||
103
+ Boolean(this.config.runtimeContext)
104
+ if (shouldIncludeEnv) {
105
+ parts.push(buildEnvContext(workingDirectory, this.config.runtimeContext))
106
+ }
80
107
  }
81
108
 
82
109
  return parts.join('\n\n---\n\n')
@@ -120,12 +147,13 @@ export class PromptBuilder {
120
147
  }
121
148
  }
122
149
 
123
- if (
124
- contextLevel !== 'minimal' &&
125
- workingDirectory &&
126
- hasFilesystemTools(this.config.tools, this.config.allowedTools)
127
- ) {
128
- dynamicParts.push(buildEnvContext(workingDirectory))
150
+ if (contextLevel !== 'minimal' && workingDirectory) {
151
+ const shouldIncludeEnv =
152
+ hasFilesystemTools(this.config.tools, this.config.allowedTools) ||
153
+ Boolean(this.config.runtimeContext)
154
+ if (shouldIncludeEnv) {
155
+ dynamicParts.push(buildEnvContext(workingDirectory, this.config.runtimeContext))
156
+ }
129
157
  }
130
158
 
131
159
  return {
@@ -182,12 +182,12 @@ describe('E2E — SubSession spawn → kernel summary → parent drill', () => {
182
182
  expect(spawned.lineage.parentSessionId).toBe(parentSession.id)
183
183
  expect(spawned.lineage.rootSessionId).toBe(parentSession.id)
184
184
  expect(spawned.lineage.depth).toBe(1)
185
- expect(spawned.schemaVersion).toBe(2)
185
+ expect(spawned.schemaVersion).toBe(3)
186
186
  }
187
187
  if (idled && 'lineage' in idled && 'schemaVersion' in idled) {
188
188
  expect(idled.lineage.rootSessionId).toBe(parentSession.id)
189
189
  expect(idled.lineage.depth).toBe(1)
190
- expect(idled.schemaVersion).toBe(2)
190
+ expect(idled.schemaVersion).toBe(3)
191
191
  }
192
192
 
193
193
  // --- Summary materialized by kernel ---