claude-agent-sdk 0.16.10 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,15 +22,31 @@ module ClaudeAgentSDK
22
22
 
23
23
  CONTROL_REQUEST_TIMEOUT_ENV_VAR = 'CLAUDE_AGENT_SDK_CONTROL_REQUEST_TIMEOUT_SECONDS'
24
24
  DEFAULT_CONTROL_REQUEST_TIMEOUT_SECONDS = 1200.0
25
- # NOTE: CLAUDE_CODE_STREAM_CLOSE_TIMEOUT is defined by the CLI in
26
- # MILLISECONDS (Python SDK uses `int(os.environ[...])/1000`); the SDK
27
- # divides by 1000 to obtain seconds. The default below is *seconds*
28
- # for direct use without env conversion (60 s = the CLI's 60000 ms).
29
- STREAM_CLOSE_TIMEOUT_ENV_VAR = 'CLAUDE_CODE_STREAM_CLOSE_TIMEOUT'
30
- DEFAULT_STREAM_CLOSE_TIMEOUT_SECONDS = 60.0
25
+
26
+ # Waiter for control responses awaited OFF the reactor — i.e. a control
27
+ # method called from inside a hook/can_use_tool/SDK-MCP callback, which
28
+ # runs on a FiberBoundary worker thread (Python supports this reentrancy
29
+ # natively: callbacks are event-loop tasks and anyio.Event is
30
+ # level-triggered). Duck-types Async::Condition#signal for the read
31
+ # loop's signal sites; the unconditional token push makes it
32
+ # level-triggered, closing the check-then-wait gap that an
33
+ # edge-triggered Condition would lose across threads.
34
+ class ThreadWaiter
35
+ def initialize
36
+ @queue = ::Queue.new
37
+ end
38
+
39
+ def signal(_value = nil)
40
+ @queue << true
41
+ end
42
+
43
+ def wait(timeout)
44
+ @queue.pop(timeout: timeout)
45
+ end
46
+ end
31
47
 
32
48
  def initialize(transport:, is_streaming_mode:, can_use_tool: nil, hooks: nil, sdk_mcp_servers: nil, agents: nil,
33
- exclude_dynamic_sections: nil)
49
+ exclude_dynamic_sections: nil, skills: nil)
34
50
  @transport = transport
35
51
  @is_streaming_mode = is_streaming_mode
36
52
  @can_use_tool = can_use_tool
@@ -38,6 +54,7 @@ module ClaudeAgentSDK
38
54
  @sdk_mcp_servers = sdk_mcp_servers || {}
39
55
  @agents = agents
40
56
  @exclude_dynamic_sections = exclude_dynamic_sections
57
+ @skills = skills
41
58
 
42
59
  # Control protocol state
43
60
  @pending_control_responses = {}
@@ -46,16 +63,20 @@ module ClaudeAgentSDK
46
63
  @hook_callback_timeouts = {}
47
64
  @next_callback_id = 0
48
65
  @request_counter = 0
66
+ @request_counter_mutex = Mutex.new
49
67
  @inflight_control_request_tasks = {}
50
68
 
51
69
  # Message stream
52
70
  @message_queue = Async::Queue.new
53
71
  @first_result_received = false
72
+ @last_error_result_text = nil
54
73
  @first_result_condition = Async::Condition.new
55
74
  @task = nil
75
+ @child_tasks = []
56
76
  @initialized = false
57
77
  @closed = false
58
78
  @initialization_result = nil
79
+ @transcript_mirror_batcher = nil
59
80
  end
60
81
 
61
82
  # Initialize control protocol if in streaming mode
@@ -79,10 +100,17 @@ module ClaudeAgentSDK
79
100
  @hook_callback_timeouts[callback_id] = matcher[:timeout] if matcher[:timeout]
80
101
  callback_ids << callback_id
81
102
  end
82
- hooks_config[event] << {
103
+ matcher_config = {
83
104
  matcher: matcher[:matcher],
84
105
  hookCallbackIds: callback_ids
85
106
  }
107
+ # Wire field is literal "timeout" in SECONDS, per matcher,
108
+ # omitted when absent (Python _internal/query.py parity — no
109
+ # camelCase, no ms conversion). Local enforcement via
110
+ # @hook_callback_timeouts stays as defense-in-depth for CLIs
111
+ # that ignore the field.
112
+ matcher_config[:timeout] = matcher[:timeout] if matcher[:timeout]
113
+ hooks_config[event] << matcher_config
86
114
  end
87
115
  end
88
116
  end
@@ -116,6 +144,9 @@ module ClaudeAgentSDK
116
144
  agents: agents_dict
117
145
  }
118
146
  request[:excludeDynamicSections] = @exclude_dynamic_sections unless @exclude_dynamic_sections.nil?
147
+ # 'all' and omitted are equivalent at the wire level (no filter), so
148
+ # only send the field when it's an explicit list (mirrors Python).
149
+ request[:skills] = @skills if @skills.is_a?(Array)
119
150
 
120
151
  response = send_control_request(request)
121
152
  @initialized = true
@@ -150,6 +181,47 @@ module ClaudeAgentSDK
150
181
  @task = parent.async { read_messages }
151
182
  end
152
183
 
184
+ # Spawn a child task that is stopped by #close (mirrors the Python SDK's
185
+ # Query#spawn_task / _child_tasks). Used for background input streaming so
186
+ # a dying read loop or #close can never strand the stream task and hang
187
+ # the enclosing Async reactor.
188
+ #
189
+ # NOTE: intentionally a partial mirror — Python prunes completed tasks via
190
+ # add_done_callback(_child_tasks.discard); here entries live until #close.
191
+ # Fine for the current one-shot call sites (max two tasks per Query); do
192
+ # not route per-request work (control handlers, per-turn streams) through
193
+ # this without adding completion-based removal.
194
+ def spawn_task(&block)
195
+ parent = Async::Task.current?
196
+ raise CLIConnectionError, 'Query#spawn_task must be called inside an Async{} block' unless parent
197
+
198
+ task = parent.async(&block)
199
+ @child_tasks << task
200
+ task
201
+ end
202
+
203
+ # Install the transcript-mirror batcher fed by `transcript_mirror` frames
204
+ # (Client mode with a session_store). nil disables mirroring.
205
+ def set_transcript_mirror_batcher(batcher)
206
+ @transcript_mirror_batcher = batcher
207
+ end
208
+
209
+ # Synthesize a `mirror_error` system message and put it on the SDK message
210
+ # stream so consumers learn a mirror batch was dropped after exhausting
211
+ # retries. Non-blocking: the message queue is unbounded, so unlike the
212
+ # Python SDK there is no buffer-full drop path.
213
+ def report_mirror_error(key, error)
214
+ session_id = key && (key['session_id'] || key[:session_id])
215
+ @message_queue.enqueue(
216
+ type: 'system',
217
+ subtype: 'mirror_error',
218
+ error: error,
219
+ key: key,
220
+ uuid: SecureRandom.uuid,
221
+ session_id: session_id || ''
222
+ )
223
+ end
224
+
153
225
  private
154
226
 
155
227
  def control_request_timeout_seconds
@@ -162,16 +234,6 @@ module ClaudeAgentSDK
162
234
  DEFAULT_CONTROL_REQUEST_TIMEOUT_SECONDS
163
235
  end
164
236
 
165
- def stream_close_timeout_seconds
166
- raw_value = ENV.fetch(STREAM_CLOSE_TIMEOUT_ENV_VAR, nil)
167
- return DEFAULT_STREAM_CLOSE_TIMEOUT_SECONDS if raw_value.nil? || raw_value.strip.empty?
168
-
169
- value = Float(raw_value) / 1000.0
170
- value.positive? ? value : DEFAULT_STREAM_CLOSE_TIMEOUT_SECONDS
171
- rescue ArgumentError
172
- DEFAULT_STREAM_CLOSE_TIMEOUT_SECONDS
173
- end
174
-
175
237
  def read_messages
176
238
  @transport.read_messages do |message|
177
239
  break if @closed
@@ -200,50 +262,100 @@ module ClaudeAgentSDK
200
262
  task = request_id ? @inflight_control_request_tasks[request_id] : nil
201
263
  task&.stop
202
264
  next
265
+ when 'transcript_mirror'
266
+ # session_store mirror frame — fed to the batcher, never surfaced to
267
+ # consumers. camelCase on the wire; transport symbolizes keys.
268
+ @transcript_mirror_batcher&.enqueue(message[:filePath] || message[:file_path], message[:entries] || [])
269
+ next
203
270
  else
204
- if message[:type] == 'result' && !@first_result_received
205
- @first_result_received = true
206
- @first_result_condition.signal
271
+ if message[:type] == 'result'
272
+ # Flush the mirror before signaling/yielding the result so a
273
+ # consumer observing the result sees an up-to-date store for the turn.
274
+ flush_transcript_mirror
275
+ unless @first_result_received
276
+ @first_result_received = true
277
+ @first_result_condition.signal
278
+ end
279
+ if message[:is_error]
280
+ errors = (message[:errors] || []).join('; ')
281
+ @last_error_result_text = errors.empty? ? (message[:subtype] || 'unknown error').to_s : errors
282
+ else
283
+ @last_error_result_text = nil
284
+ end
285
+ elsif !(msg_type == 'system' && message[:subtype] == 'session_state_changed')
286
+ # Anything other than the post-turn session_state_changed marker
287
+ # means the conversation moved on; a ProcessError now is a fresh
288
+ # crash, not the expected exit from a prior error result. Mirrors
289
+ # the Python/TypeScript SDK reset logic.
290
+ @last_error_result_text = nil
207
291
  end
208
292
  # Regular SDK messages go to the queue
209
293
  @message_queue.enqueue(message)
210
294
  end
211
295
  end
212
- rescue ProcessError => e
213
- # The CLI can exit non-zero after delivering a valid result (e.g.,
214
- # StructuredOutput tool_use triggers exit code 1). When we already
215
- # received a result message, treat the process error as non-fatal.
216
- if @first_result_received
217
- warn "Claude SDK: Process exited with code #{e.exit_code} after result — ignoring"
218
- else
219
- @pending_control_responses.dup.each do |request_id, condition|
220
- @pending_control_results[request_id] ||= e
221
- condition.signal
222
- end
223
- @message_queue.enqueue({ type: 'error', error: e })
224
- end
225
296
  rescue StandardError => e
226
- # Unblock pending control requests (e.g., initialize) so callers don't hang until timeout.
297
+ # Unblock pending control requests (e.g., initialize) so callers don't
298
+ # hang until timeout. INVARIANT: store the result before signaling —
299
+ # senders check the slot before waiting (level-trigger).
227
300
  @pending_control_responses.dup.each do |request_id, condition|
228
301
  @pending_control_results[request_id] ||= e
229
302
  condition.signal
230
303
  end
231
304
 
305
+ # When the CLI emits a result with is_error=true (e.g. error_max_turns,
306
+ # error_during_execution, a StructuredOutput error) it then exits
307
+ # non-zero on purpose, for shell-script consumers. The trailing
308
+ # ProcessError carries no information beyond "exit code 1" — replace it
309
+ # with the structured error the CLI already reported so the exception is
310
+ # actionable. Mirrors the Python SDK (_read_messages) and the TypeScript
311
+ # SDK (Query.ts readMessages).
312
+ error = if e.is_a?(ProcessError) && @last_error_result_text
313
+ ProcessError.new("Claude Code returned an error result: #{@last_error_result_text}",
314
+ exit_code: e.exit_code, stderr: e.stderr)
315
+ else
316
+ e
317
+ end
318
+
232
319
  # Put error in queue so iterators can handle it
233
- @message_queue.enqueue({ type: 'error', error: e })
320
+ @message_queue.enqueue({ type: 'error', error: error })
234
321
  ensure
235
- unless @first_result_received
236
- @first_result_received = true
237
- @first_result_condition.signal
322
+ # Catch entries from a turn that ended without a `result` (early EOF /
323
+ # transport error) so they aren't dropped. The flush can suspend (lock
324
+ # acquire / thread join), so Async::Stop delivered mid-flush would skip
325
+ # the rest of this block — the nested ensure guarantees the signal and
326
+ # the end sentinel (which have no suspension points) are still delivered,
327
+ # mirroring the Python port's shielded flush + send_nowait sentinel.
328
+ begin
329
+ flush_transcript_mirror
330
+ ensure
331
+ unless @first_result_received
332
+ @first_result_received = true
333
+ @first_result_condition.signal
334
+ end
335
+ # Always signal end of stream
336
+ @message_queue.enqueue({ type: 'end' })
238
337
  end
239
- # Always signal end of stream
240
- @message_queue.enqueue({ type: 'end' })
338
+ end
339
+
340
+ # Flush the transcript-mirror batcher, swallowing errors — a mirror failure
341
+ # must never propagate into the read loop or its teardown.
342
+ def flush_transcript_mirror
343
+ @transcript_mirror_batcher&.flush
344
+ rescue StandardError => e
345
+ warn "Claude SDK: transcript mirror flush failed: #{e.message}"
241
346
  end
242
347
 
243
348
  def handle_control_response(message)
244
349
  response = message[:response] || {}
245
350
  request_id = response[:request_id] || response[:requestId] || message[:request_id] || message[:requestId]
246
- return unless @pending_control_responses.key?(request_id)
351
+ # Capture the waiter ONCE: a worker-thread caller can satisfy its
352
+ # level-trigger check and evict the entries between our key? check and
353
+ # a re-lookup, so `@pending_control_responses[request_id].signal` could
354
+ # call signal on nil — a NoMethodError the read loop would treat as a
355
+ # fatal transport error, tearing down the whole session. Signaling an
356
+ # already-evicted waiter is harmless (orphan token push / no-op).
357
+ waiter = @pending_control_responses[request_id]
358
+ return unless waiter
247
359
 
248
360
  if response[:subtype] == 'error'
249
361
  @pending_control_results[request_id] = StandardError.new(response[:error] || 'Unknown error')
@@ -251,8 +363,10 @@ module ClaudeAgentSDK
251
363
  @pending_control_results[request_id] = response
252
364
  end
253
365
 
254
- # Signal that response is ready
255
- @pending_control_responses[request_id].signal
366
+ # Signal that response is ready. INVARIANT: the result slot above
367
+ # MUST be written before this signal — senders check the slot before
368
+ # waiting (level-trigger).
369
+ waiter.signal
256
370
  end
257
371
 
258
372
  def handle_control_request(request)
@@ -315,11 +429,20 @@ module ClaudeAgentSDK
315
429
 
316
430
  original_input = request_data[:input]
317
431
 
432
+ # Field order mirrors Python _internal/query.py's can_use_tool branch.
433
+ # Suggestions are hydrated into PermissionUpdate (Python #920); a
434
+ # malformed entry raises here, on the reactor, and becomes an error
435
+ # control_response — same observable behavior as Python.
318
436
  context = ToolPermissionContext.new(
319
437
  signal: nil,
320
- suggestions: request_data[:permission_suggestions] || [],
438
+ suggestions: (request_data[:permission_suggestions] || []).map { |s| PermissionUpdate.new(s) },
321
439
  tool_use_id: request_data[:tool_use_id],
322
- agent_id: request_data[:agent_id]
440
+ agent_id: request_data[:agent_id],
441
+ blocked_path: request_data[:blocked_path],
442
+ decision_reason: request_data[:decision_reason],
443
+ title: request_data[:title],
444
+ display_name: request_data[:display_name],
445
+ description: request_data[:description]
323
446
  )
324
447
 
325
448
  # User-supplied permission callback runs on a plain thread, not the
@@ -594,8 +717,10 @@ module ClaudeAgentSDK
594
717
  **base_args
595
718
  )
596
719
  else
597
- # Return base input for unknown event types
598
- BaseHookInput.new(**base_args)
720
+ # Unknown event: preserve the wire event name and full raw payload
721
+ # rather than dropping event-specific fields (Python passes the raw
722
+ # dict through, so nothing is lost there).
723
+ UnknownHookInput.new(hook_event_name: event_name, raw_input: input_data, **base_args)
599
724
  end
600
725
  end
601
726
 
@@ -648,15 +773,25 @@ module ClaudeAgentSDK
648
773
 
649
774
  timeout_seconds = control_request_timeout_seconds
650
775
 
651
- # Generate unique request ID
652
- @request_counter += 1
653
- request_id = "req_#{@request_counter}_#{SecureRandom.hex(4)}"
776
+ # Detect the execution mode BEFORE any write: a control method called
777
+ # from inside a hook/permission/SDK-MCP callback runs on a
778
+ # FiberBoundary worker thread with no reactor. Detecting after the
779
+ # write left a half-executed request (written to the CLI, then
780
+ # RuntimeError; the eventual response dropped by the key? guard).
781
+ task = Async::Task.current?
782
+
783
+ # Generate unique request ID (callbacks may issue requests from
784
+ # worker threads concurrently with the reactor)
785
+ request_id = @request_counter_mutex.synchronize do
786
+ @request_counter += 1
787
+ "req_#{@request_counter}_#{SecureRandom.hex(4)}"
788
+ end
654
789
 
655
- # Create condition for response
656
- condition = Async::Condition.new
657
- @pending_control_responses[request_id] = condition
790
+ # Reactor callers wait on an Async::Condition; worker-thread callers
791
+ # on a ThreadWaiter. Registration must precede the write.
792
+ waiter = task ? Async::Condition.new : ThreadWaiter.new
793
+ @pending_control_responses[request_id] = waiter
658
794
 
659
- # Build and send request
660
795
  control_request = {
661
796
  type: 'control_request',
662
797
  request_id: request_id,
@@ -666,24 +801,12 @@ module ClaudeAgentSDK
666
801
 
667
802
  writeln(JSON.generate(control_request))
668
803
 
669
- # Wait for response with timeout. Use the current task's timeout so we
670
- # stay in the caller's fiber (a nested `Async do ... end.wait` spawned a
671
- # separate task and could leak the pending entries when an Async::Stop
672
- # propagated through `.wait` before either the success-path or the
673
- # timeout-path cleanup ran). Control requests must run inside an Async
674
- # reactor — `Query#start` already enforces this precondition, so the
675
- # cleanest place to surface the contract is the start hand-off; here we
676
- # assume an active task is present.
677
804
  begin
678
- Async::Task.current.with_timeout(timeout_seconds) do
679
- condition.wait
680
- end
805
+ await_control_response(request_id, waiter, task, timeout_seconds, request[:subtype])
681
806
  result = @pending_control_results[request_id]
682
807
  raise result if result.is_a?(Exception)
683
808
 
684
809
  result&.[](:response) || {}
685
- rescue Async::TimeoutError
686
- raise ControlRequestTimeoutError, "Control request timeout: #{request[:subtype]}"
687
810
  ensure
688
811
  # Always evict the entries so a late control_response (after timeout)
689
812
  # or an Async::Stop propagating through wait does not leak state.
@@ -692,6 +815,37 @@ module ClaudeAgentSDK
692
815
  end
693
816
  end
694
817
 
818
+ # Level-triggered wait: every signal site stores the result BEFORE
819
+ # signaling, so checking the result slot before (and between) waits
820
+ # cannot lose a wakeup — Async::Condition is edge-triggered and a signal
821
+ # arriving before the sender reaches wait would otherwise be dropped
822
+ # (reachable when a custom transport's #write suspends after delivery,
823
+ # or when the read loop's rescue broadcast fires mid-write). Mirrors
824
+ # anyio.Event's level-trigger semantics in Python.
825
+ #
826
+ # Do NOT reimplement the reactor wait as a nested `Async do ... end.wait`
827
+ # — that spawned a separate task and leaked the pending entries when an
828
+ # Async::Stop propagated through `.wait` before cleanup ran.
829
+ def await_control_response(request_id, waiter, task, timeout_seconds, subtype)
830
+ if task
831
+ begin
832
+ task.with_timeout(timeout_seconds) do
833
+ waiter.wait until @pending_control_results.key?(request_id)
834
+ end
835
+ rescue Async::TimeoutError
836
+ raise ControlRequestTimeoutError, "Control request timeout: #{subtype}"
837
+ end
838
+ else
839
+ deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout_seconds
840
+ until @pending_control_results.key?(request_id)
841
+ remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
842
+ raise ControlRequestTimeoutError, "Control request timeout: #{subtype}" if remaining <= 0
843
+
844
+ waiter.wait(remaining)
845
+ end
846
+ end
847
+ end
848
+
695
849
  def handle_sdk_mcp_request(server_name, message)
696
850
  # Convert server_name to symbol if needed for hash lookup
697
851
  server_key = @sdk_mcp_servers.key?(server_name) ? server_name : server_name.to_sym
@@ -773,27 +927,15 @@ module ClaudeAgentSDK
773
927
  }
774
928
  end
775
929
 
776
- def handle_mcp_tools_call(server, message, params)
777
- # Execute tool on the SDK MCP server
778
- tool_name = params[:name]
779
- arguments = params[:arguments] || {}
780
-
781
- # Call the tool
782
- result = server.call_tool(tool_name, arguments)
783
- content = ClaudeAgentSDK.flexible_fetch(result, 'content', 'content') || []
784
- response_data = { content: content }
785
-
786
- is_error = ClaudeAgentSDK.flexible_fetch(result, 'isError', 'is_error')
787
- response_data[:isError] = !!is_error unless is_error.nil?
788
-
789
- structured_content = ClaudeAgentSDK.flexible_fetch(result, 'structuredContent', 'structured_content')
790
- response_data[:structuredContent] = structured_content unless structured_content.nil?
791
-
792
- {
793
- jsonrpc: '2.0',
794
- id: message[:id],
795
- result: response_data
796
- }
930
+ def handle_mcp_tools_call(server, message, _params)
931
+ # Route through the official MCP::Server (Python parity: its lowlevel
932
+ # server validates arguments against the tool's inputSchema BEFORE the
933
+ # handler runs and reports validation failures, unknown tools, and
934
+ # handler exceptions as in-band isError results). tools/list,
935
+ # initialize, resources/* and prompts/* stay on the SDK paths — the
936
+ # gem drops annotations/_meta from tools/list and negotiates newer
937
+ # protocol versions.
938
+ server.handle_message(message)
797
939
  end
798
940
 
799
941
  def handle_mcp_resources_list(server, message)
@@ -925,31 +1067,60 @@ module ClaudeAgentSDK
925
1067
 
926
1068
  # Wait for the first result before closing stdin when hooks or SDK MCP
927
1069
  # servers may still need to exchange control messages with the CLI.
1070
+ # The control protocol requires stdin to stay open for the entire turn
1071
+ # (hook replies, can_use_tool replies and SDK MCP tool results are all
1072
+ # written to stdin), so no timeout is applied — closing stdin mid-turn
1073
+ # silently broke hooks/MCP on turns longer than the old 60s bound
1074
+ # (mirrors Python SDK commit c3d96cb). The condition is guaranteed to be
1075
+ # signaled: by the result branch in read_messages, or by its ensure block
1076
+ # when the process exits early.
928
1077
  def wait_for_result_and_end_input
929
1078
  if !@first_result_received &&
930
1079
  ((@sdk_mcp_servers && !@sdk_mcp_servers.empty?) || (@hooks && !@hooks.empty?))
931
- Async::Task.current.with_timeout(stream_close_timeout_seconds) do
932
- @first_result_condition.wait unless @first_result_received
933
- end
1080
+ @first_result_condition.wait
934
1081
  end
935
- rescue Async::TimeoutError
936
- nil
937
1082
  ensure
938
1083
  @transport.end_input
939
1084
  end
940
1085
 
941
- # Stream input messages to transport
1086
+ # Stream input messages to transport. NOTE: iteration runs on the
1087
+ # reactor (the deliberate FiberBoundary carve-out — see
1088
+ # fiber_boundary.rb): scheduler-aware blocking (Thread::Queue#pop,
1089
+ # sleep, socket IO) parks only this task; CPU-bound or scheduler-opaque
1090
+ # work in the enumerator must be moved to a producer Thread by the user.
942
1091
  def stream_input(stream)
1092
+ wrote_message = false
943
1093
  stream.each do |message|
944
1094
  break if @closed
945
1095
  serialized = message.is_a?(Hash) ? JSON.generate(message) : message.to_s
946
1096
  writeln(serialized)
1097
+ wrote_message = true
947
1098
  end
948
1099
  rescue StandardError => e
949
1100
  # Log error but don't raise
950
1101
  warn "Error streaming input: #{e.message}"
951
1102
  ensure
952
- wait_for_result_and_end_input
1103
+ # Three teardown shapes:
1104
+ # - #close in progress (@closed, Async::Stop unwinding): do nothing —
1105
+ # the transport is about to be closed, and waiting on
1106
+ # @first_result_condition inside a stopping fiber could suspend
1107
+ # teardown. Mirrors Python, where cancellation skips this entirely.
1108
+ # - A turn is in flight (some message reached the CLI): hold stdin
1109
+ # open until its first result so hooks/SDK MCP control replies can
1110
+ # still be written (no timeout — the result or process exit is
1111
+ # guaranteed to signal).
1112
+ # - No complete message ever reached the CLI (empty stream, or the
1113
+ # stream raised before the first write): no result can ever arrive,
1114
+ # so waiting would park query() forever beside an idle CLI. Close
1115
+ # stdin so the CLI sees EOF and exits. Deliberate improvement over
1116
+ # Python, which leaves stdin open and hangs on this path.
1117
+ unless @closed
1118
+ if wrote_message
1119
+ wait_for_result_and_end_input
1120
+ else
1121
+ @transport.end_input
1122
+ end
1123
+ end
953
1124
  end
954
1125
 
955
1126
  def writeln(string)
@@ -976,6 +1147,24 @@ module ClaudeAgentSDK
976
1147
  # Close the query and transport
977
1148
  def close
978
1149
  @closed = true
1150
+ # Wake pending control-request waiters (same shape as the read-loop
1151
+ # rescue broadcast): close stops the read task with Async::Stop, which
1152
+ # bypasses that broadcast — a worker-thread caller parked in
1153
+ # ThreadWaiter#wait would otherwise leak its OS thread for the full
1154
+ # control-request timeout (up to 1200s) in long-lived processes.
1155
+ # INVARIANT: store the result before signaling (level-trigger).
1156
+ @pending_control_responses.dup.each do |request_id, waiter|
1157
+ @pending_control_results[request_id] ||= CLIConnectionError.new('Query closed')
1158
+ waiter.signal
1159
+ end
1160
+ # Final mirror flush BEFORE stopping the read task, so the last turn's
1161
+ # entries reach the store. #close on the batcher never raises.
1162
+ @transcript_mirror_batcher&.close
1163
+ # Stop tracked child tasks (e.g. stream_input) before the read task and
1164
+ # transport so a parked input stream can never keep the reactor alive
1165
+ # (mirrors Python close() cancelling _child_tasks).
1166
+ @child_tasks.each(&:stop)
1167
+ @child_tasks.clear
979
1168
  @task&.stop
980
1169
  @transport.close
981
1170
  end