claude-agent-sdk 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +56 -0
- data/README.md +4 -2
- data/docs/configuration.md +13 -2
- data/docs/observability.md +28 -4
- data/docs/sessions.md +15 -2
- data/lib/claude_agent_sdk/command_builder.rb +69 -22
- data/lib/claude_agent_sdk/fiber_boundary.rb +39 -1
- data/lib/claude_agent_sdk/instrumentation/otel.rb +97 -23
- data/lib/claude_agent_sdk/message_parser.rb +4 -1
- data/lib/claude_agent_sdk/observer.rb +23 -3
- data/lib/claude_agent_sdk/query.rb +223 -88
- data/lib/claude_agent_sdk/sdk_mcp_server.rb +232 -181
- data/lib/claude_agent_sdk/session_store.rb +4 -0
- data/lib/claude_agent_sdk/sessions.rb +144 -24
- data/lib/claude_agent_sdk/subprocess_cli_transport.rb +184 -50
- data/lib/claude_agent_sdk/testing/session_store_conformance.rb +15 -1
- data/lib/claude_agent_sdk/types.rb +43 -5
- data/lib/claude_agent_sdk/version.rb +1 -1
- data/lib/claude_agent_sdk.rb +359 -93
- metadata +12 -6
|
@@ -22,15 +22,31 @@ module ClaudeAgentSDK
|
|
|
22
22
|
|
|
23
23
|
CONTROL_REQUEST_TIMEOUT_ENV_VAR = 'CLAUDE_AGENT_SDK_CONTROL_REQUEST_TIMEOUT_SECONDS'
|
|
24
24
|
DEFAULT_CONTROL_REQUEST_TIMEOUT_SECONDS = 1200.0
|
|
25
|
-
|
|
26
|
-
#
|
|
27
|
-
#
|
|
28
|
-
#
|
|
29
|
-
|
|
30
|
-
|
|
25
|
+
|
|
26
|
+
# Waiter for control responses awaited OFF the reactor — i.e. a control
|
|
27
|
+
# method called from inside a hook/can_use_tool/SDK-MCP callback, which
|
|
28
|
+
# runs on a FiberBoundary worker thread (Python supports this reentrancy
|
|
29
|
+
# natively: callbacks are event-loop tasks and anyio.Event is
|
|
30
|
+
# level-triggered). Duck-types Async::Condition#signal for the read
|
|
31
|
+
# loop's signal sites; the unconditional token push makes it
|
|
32
|
+
# level-triggered, closing the check-then-wait gap that an
|
|
33
|
+
# edge-triggered Condition would lose across threads.
|
|
34
|
+
class ThreadWaiter
|
|
35
|
+
def initialize
|
|
36
|
+
@queue = ::Queue.new
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def signal(_value = nil)
|
|
40
|
+
@queue << true
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def wait(timeout)
|
|
44
|
+
@queue.pop(timeout: timeout)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
31
47
|
|
|
32
48
|
def initialize(transport:, is_streaming_mode:, can_use_tool: nil, hooks: nil, sdk_mcp_servers: nil, agents: nil,
|
|
33
|
-
exclude_dynamic_sections: nil)
|
|
49
|
+
exclude_dynamic_sections: nil, skills: nil)
|
|
34
50
|
@transport = transport
|
|
35
51
|
@is_streaming_mode = is_streaming_mode
|
|
36
52
|
@can_use_tool = can_use_tool
|
|
@@ -38,6 +54,7 @@ module ClaudeAgentSDK
|
|
|
38
54
|
@sdk_mcp_servers = sdk_mcp_servers || {}
|
|
39
55
|
@agents = agents
|
|
40
56
|
@exclude_dynamic_sections = exclude_dynamic_sections
|
|
57
|
+
@skills = skills
|
|
41
58
|
|
|
42
59
|
# Control protocol state
|
|
43
60
|
@pending_control_responses = {}
|
|
@@ -46,13 +63,16 @@ module ClaudeAgentSDK
|
|
|
46
63
|
@hook_callback_timeouts = {}
|
|
47
64
|
@next_callback_id = 0
|
|
48
65
|
@request_counter = 0
|
|
66
|
+
@request_counter_mutex = Mutex.new
|
|
49
67
|
@inflight_control_request_tasks = {}
|
|
50
68
|
|
|
51
69
|
# Message stream
|
|
52
70
|
@message_queue = Async::Queue.new
|
|
53
71
|
@first_result_received = false
|
|
72
|
+
@last_error_result_text = nil
|
|
54
73
|
@first_result_condition = Async::Condition.new
|
|
55
74
|
@task = nil
|
|
75
|
+
@child_tasks = []
|
|
56
76
|
@initialized = false
|
|
57
77
|
@closed = false
|
|
58
78
|
@initialization_result = nil
|
|
@@ -80,10 +100,17 @@ module ClaudeAgentSDK
|
|
|
80
100
|
@hook_callback_timeouts[callback_id] = matcher[:timeout] if matcher[:timeout]
|
|
81
101
|
callback_ids << callback_id
|
|
82
102
|
end
|
|
83
|
-
|
|
103
|
+
matcher_config = {
|
|
84
104
|
matcher: matcher[:matcher],
|
|
85
105
|
hookCallbackIds: callback_ids
|
|
86
106
|
}
|
|
107
|
+
# Wire field is literal "timeout" in SECONDS, per matcher,
|
|
108
|
+
# omitted when absent (Python _internal/query.py parity — no
|
|
109
|
+
# camelCase, no ms conversion). Local enforcement via
|
|
110
|
+
# @hook_callback_timeouts stays as defense-in-depth for CLIs
|
|
111
|
+
# that ignore the field.
|
|
112
|
+
matcher_config[:timeout] = matcher[:timeout] if matcher[:timeout]
|
|
113
|
+
hooks_config[event] << matcher_config
|
|
87
114
|
end
|
|
88
115
|
end
|
|
89
116
|
end
|
|
@@ -117,6 +144,9 @@ module ClaudeAgentSDK
|
|
|
117
144
|
agents: agents_dict
|
|
118
145
|
}
|
|
119
146
|
request[:excludeDynamicSections] = @exclude_dynamic_sections unless @exclude_dynamic_sections.nil?
|
|
147
|
+
# 'all' and omitted are equivalent at the wire level (no filter), so
|
|
148
|
+
# only send the field when it's an explicit list (mirrors Python).
|
|
149
|
+
request[:skills] = @skills if @skills.is_a?(Array)
|
|
120
150
|
|
|
121
151
|
response = send_control_request(request)
|
|
122
152
|
@initialized = true
|
|
@@ -151,6 +181,25 @@ module ClaudeAgentSDK
|
|
|
151
181
|
@task = parent.async { read_messages }
|
|
152
182
|
end
|
|
153
183
|
|
|
184
|
+
# Spawn a child task that is stopped by #close (mirrors the Python SDK's
|
|
185
|
+
# Query#spawn_task / _child_tasks). Used for background input streaming so
|
|
186
|
+
# a dying read loop or #close can never strand the stream task and hang
|
|
187
|
+
# the enclosing Async reactor.
|
|
188
|
+
#
|
|
189
|
+
# NOTE: intentionally a partial mirror — Python prunes completed tasks via
|
|
190
|
+
# add_done_callback(_child_tasks.discard); here entries live until #close.
|
|
191
|
+
# Fine for the current one-shot call sites (max two tasks per Query); do
|
|
192
|
+
# not route per-request work (control handlers, per-turn streams) through
|
|
193
|
+
# this without adding completion-based removal.
|
|
194
|
+
def spawn_task(&block)
|
|
195
|
+
parent = Async::Task.current?
|
|
196
|
+
raise CLIConnectionError, 'Query#spawn_task must be called inside an Async{} block' unless parent
|
|
197
|
+
|
|
198
|
+
task = parent.async(&block)
|
|
199
|
+
@child_tasks << task
|
|
200
|
+
task
|
|
201
|
+
end
|
|
202
|
+
|
|
154
203
|
# Install the transcript-mirror batcher fed by `transcript_mirror` frames
|
|
155
204
|
# (Client mode with a session_store). nil disables mirroring.
|
|
156
205
|
def set_transcript_mirror_batcher(batcher)
|
|
@@ -185,16 +234,6 @@ module ClaudeAgentSDK
|
|
|
185
234
|
DEFAULT_CONTROL_REQUEST_TIMEOUT_SECONDS
|
|
186
235
|
end
|
|
187
236
|
|
|
188
|
-
def stream_close_timeout_seconds
|
|
189
|
-
raw_value = ENV.fetch(STREAM_CLOSE_TIMEOUT_ENV_VAR, nil)
|
|
190
|
-
return DEFAULT_STREAM_CLOSE_TIMEOUT_SECONDS if raw_value.nil? || raw_value.strip.empty?
|
|
191
|
-
|
|
192
|
-
value = Float(raw_value) / 1000.0
|
|
193
|
-
value.positive? ? value : DEFAULT_STREAM_CLOSE_TIMEOUT_SECONDS
|
|
194
|
-
rescue ArgumentError
|
|
195
|
-
DEFAULT_STREAM_CLOSE_TIMEOUT_SECONDS
|
|
196
|
-
end
|
|
197
|
-
|
|
198
237
|
def read_messages
|
|
199
238
|
@transport.read_messages do |message|
|
|
200
239
|
break if @closed
|
|
@@ -237,33 +276,48 @@ module ClaudeAgentSDK
|
|
|
237
276
|
@first_result_received = true
|
|
238
277
|
@first_result_condition.signal
|
|
239
278
|
end
|
|
279
|
+
if message[:is_error]
|
|
280
|
+
errors = (message[:errors] || []).join('; ')
|
|
281
|
+
@last_error_result_text = errors.empty? ? (message[:subtype] || 'unknown error').to_s : errors
|
|
282
|
+
else
|
|
283
|
+
@last_error_result_text = nil
|
|
284
|
+
end
|
|
285
|
+
elsif !(msg_type == 'system' && message[:subtype] == 'session_state_changed')
|
|
286
|
+
# Anything other than the post-turn session_state_changed marker
|
|
287
|
+
# means the conversation moved on; a ProcessError now is a fresh
|
|
288
|
+
# crash, not the expected exit from a prior error result. Mirrors
|
|
289
|
+
# the Python/TypeScript SDK reset logic.
|
|
290
|
+
@last_error_result_text = nil
|
|
240
291
|
end
|
|
241
292
|
# Regular SDK messages go to the queue
|
|
242
293
|
@message_queue.enqueue(message)
|
|
243
294
|
end
|
|
244
295
|
end
|
|
245
|
-
rescue ProcessError => e
|
|
246
|
-
# The CLI can exit non-zero after delivering a valid result (e.g.,
|
|
247
|
-
# StructuredOutput tool_use triggers exit code 1). When we already
|
|
248
|
-
# received a result message, treat the process error as non-fatal.
|
|
249
|
-
if @first_result_received
|
|
250
|
-
warn "Claude SDK: Process exited with code #{e.exit_code} after result — ignoring"
|
|
251
|
-
else
|
|
252
|
-
@pending_control_responses.dup.each do |request_id, condition|
|
|
253
|
-
@pending_control_results[request_id] ||= e
|
|
254
|
-
condition.signal
|
|
255
|
-
end
|
|
256
|
-
@message_queue.enqueue({ type: 'error', error: e })
|
|
257
|
-
end
|
|
258
296
|
rescue StandardError => e
|
|
259
|
-
# Unblock pending control requests (e.g., initialize) so callers don't
|
|
297
|
+
# Unblock pending control requests (e.g., initialize) so callers don't
|
|
298
|
+
# hang until timeout. INVARIANT: store the result before signaling —
|
|
299
|
+
# senders check the slot before waiting (level-trigger).
|
|
260
300
|
@pending_control_responses.dup.each do |request_id, condition|
|
|
261
301
|
@pending_control_results[request_id] ||= e
|
|
262
302
|
condition.signal
|
|
263
303
|
end
|
|
264
304
|
|
|
305
|
+
# When the CLI emits a result with is_error=true (e.g. error_max_turns,
|
|
306
|
+
# error_during_execution, a StructuredOutput error) it then exits
|
|
307
|
+
# non-zero on purpose, for shell-script consumers. The trailing
|
|
308
|
+
# ProcessError carries no information beyond "exit code 1" — replace it
|
|
309
|
+
# with the structured error the CLI already reported so the exception is
|
|
310
|
+
# actionable. Mirrors the Python SDK (_read_messages) and the TypeScript
|
|
311
|
+
# SDK (Query.ts readMessages).
|
|
312
|
+
error = if e.is_a?(ProcessError) && @last_error_result_text
|
|
313
|
+
ProcessError.new("Claude Code returned an error result: #{@last_error_result_text}",
|
|
314
|
+
exit_code: e.exit_code, stderr: e.stderr)
|
|
315
|
+
else
|
|
316
|
+
e
|
|
317
|
+
end
|
|
318
|
+
|
|
265
319
|
# Put error in queue so iterators can handle it
|
|
266
|
-
@message_queue.enqueue({ type: 'error', error:
|
|
320
|
+
@message_queue.enqueue({ type: 'error', error: error })
|
|
267
321
|
ensure
|
|
268
322
|
# Catch entries from a turn that ended without a `result` (early EOF /
|
|
269
323
|
# transport error) so they aren't dropped. The flush can suspend (lock
|
|
@@ -294,7 +348,14 @@ module ClaudeAgentSDK
|
|
|
294
348
|
def handle_control_response(message)
|
|
295
349
|
response = message[:response] || {}
|
|
296
350
|
request_id = response[:request_id] || response[:requestId] || message[:request_id] || message[:requestId]
|
|
297
|
-
|
|
351
|
+
# Capture the waiter ONCE: a worker-thread caller can satisfy its
|
|
352
|
+
# level-trigger check and evict the entries between our key? check and
|
|
353
|
+
# a re-lookup, so `@pending_control_responses[request_id].signal` could
|
|
354
|
+
# call signal on nil — a NoMethodError the read loop would treat as a
|
|
355
|
+
# fatal transport error, tearing down the whole session. Signaling an
|
|
356
|
+
# already-evicted waiter is harmless (orphan token push / no-op).
|
|
357
|
+
waiter = @pending_control_responses[request_id]
|
|
358
|
+
return unless waiter
|
|
298
359
|
|
|
299
360
|
if response[:subtype] == 'error'
|
|
300
361
|
@pending_control_results[request_id] = StandardError.new(response[:error] || 'Unknown error')
|
|
@@ -302,8 +363,10 @@ module ClaudeAgentSDK
|
|
|
302
363
|
@pending_control_results[request_id] = response
|
|
303
364
|
end
|
|
304
365
|
|
|
305
|
-
# Signal that response is ready
|
|
306
|
-
|
|
366
|
+
# Signal that response is ready. INVARIANT: the result slot above
|
|
367
|
+
# MUST be written before this signal — senders check the slot before
|
|
368
|
+
# waiting (level-trigger).
|
|
369
|
+
waiter.signal
|
|
307
370
|
end
|
|
308
371
|
|
|
309
372
|
def handle_control_request(request)
|
|
@@ -366,11 +429,20 @@ module ClaudeAgentSDK
|
|
|
366
429
|
|
|
367
430
|
original_input = request_data[:input]
|
|
368
431
|
|
|
432
|
+
# Field order mirrors Python _internal/query.py's can_use_tool branch.
|
|
433
|
+
# Suggestions are hydrated into PermissionUpdate (Python #920); a
|
|
434
|
+
# malformed entry raises here, on the reactor, and becomes an error
|
|
435
|
+
# control_response — same observable behavior as Python.
|
|
369
436
|
context = ToolPermissionContext.new(
|
|
370
437
|
signal: nil,
|
|
371
|
-
suggestions: request_data[:permission_suggestions] || [],
|
|
438
|
+
suggestions: (request_data[:permission_suggestions] || []).map { |s| PermissionUpdate.new(s) },
|
|
372
439
|
tool_use_id: request_data[:tool_use_id],
|
|
373
|
-
agent_id: request_data[:agent_id]
|
|
440
|
+
agent_id: request_data[:agent_id],
|
|
441
|
+
blocked_path: request_data[:blocked_path],
|
|
442
|
+
decision_reason: request_data[:decision_reason],
|
|
443
|
+
title: request_data[:title],
|
|
444
|
+
display_name: request_data[:display_name],
|
|
445
|
+
description: request_data[:description]
|
|
374
446
|
)
|
|
375
447
|
|
|
376
448
|
# User-supplied permission callback runs on a plain thread, not the
|
|
@@ -645,8 +717,10 @@ module ClaudeAgentSDK
|
|
|
645
717
|
**base_args
|
|
646
718
|
)
|
|
647
719
|
else
|
|
648
|
-
#
|
|
649
|
-
|
|
720
|
+
# Unknown event: preserve the wire event name and full raw payload
|
|
721
|
+
# rather than dropping event-specific fields (Python passes the raw
|
|
722
|
+
# dict through, so nothing is lost there).
|
|
723
|
+
UnknownHookInput.new(hook_event_name: event_name, raw_input: input_data, **base_args)
|
|
650
724
|
end
|
|
651
725
|
end
|
|
652
726
|
|
|
@@ -699,15 +773,25 @@ module ClaudeAgentSDK
|
|
|
699
773
|
|
|
700
774
|
timeout_seconds = control_request_timeout_seconds
|
|
701
775
|
|
|
702
|
-
#
|
|
703
|
-
|
|
704
|
-
|
|
776
|
+
# Detect the execution mode BEFORE any write: a control method called
|
|
777
|
+
# from inside a hook/permission/SDK-MCP callback runs on a
|
|
778
|
+
# FiberBoundary worker thread with no reactor. Detecting after the
|
|
779
|
+
# write left a half-executed request (written to the CLI, then
|
|
780
|
+
# RuntimeError; the eventual response dropped by the key? guard).
|
|
781
|
+
task = Async::Task.current?
|
|
782
|
+
|
|
783
|
+
# Generate unique request ID (callbacks may issue requests from
|
|
784
|
+
# worker threads concurrently with the reactor)
|
|
785
|
+
request_id = @request_counter_mutex.synchronize do
|
|
786
|
+
@request_counter += 1
|
|
787
|
+
"req_#{@request_counter}_#{SecureRandom.hex(4)}"
|
|
788
|
+
end
|
|
705
789
|
|
|
706
|
-
#
|
|
707
|
-
|
|
708
|
-
|
|
790
|
+
# Reactor callers wait on an Async::Condition; worker-thread callers
|
|
791
|
+
# on a ThreadWaiter. Registration must precede the write.
|
|
792
|
+
waiter = task ? Async::Condition.new : ThreadWaiter.new
|
|
793
|
+
@pending_control_responses[request_id] = waiter
|
|
709
794
|
|
|
710
|
-
# Build and send request
|
|
711
795
|
control_request = {
|
|
712
796
|
type: 'control_request',
|
|
713
797
|
request_id: request_id,
|
|
@@ -717,24 +801,12 @@ module ClaudeAgentSDK
|
|
|
717
801
|
|
|
718
802
|
writeln(JSON.generate(control_request))
|
|
719
803
|
|
|
720
|
-
# Wait for response with timeout. Use the current task's timeout so we
|
|
721
|
-
# stay in the caller's fiber (a nested `Async do ... end.wait` spawned a
|
|
722
|
-
# separate task and could leak the pending entries when an Async::Stop
|
|
723
|
-
# propagated through `.wait` before either the success-path or the
|
|
724
|
-
# timeout-path cleanup ran). Control requests must run inside an Async
|
|
725
|
-
# reactor — `Query#start` already enforces this precondition, so the
|
|
726
|
-
# cleanest place to surface the contract is the start hand-off; here we
|
|
727
|
-
# assume an active task is present.
|
|
728
804
|
begin
|
|
729
|
-
|
|
730
|
-
condition.wait
|
|
731
|
-
end
|
|
805
|
+
await_control_response(request_id, waiter, task, timeout_seconds, request[:subtype])
|
|
732
806
|
result = @pending_control_results[request_id]
|
|
733
807
|
raise result if result.is_a?(Exception)
|
|
734
808
|
|
|
735
809
|
result&.[](:response) || {}
|
|
736
|
-
rescue Async::TimeoutError
|
|
737
|
-
raise ControlRequestTimeoutError, "Control request timeout: #{request[:subtype]}"
|
|
738
810
|
ensure
|
|
739
811
|
# Always evict the entries so a late control_response (after timeout)
|
|
740
812
|
# or an Async::Stop propagating through wait does not leak state.
|
|
@@ -743,6 +815,37 @@ module ClaudeAgentSDK
|
|
|
743
815
|
end
|
|
744
816
|
end
|
|
745
817
|
|
|
818
|
+
# Level-triggered wait: every signal site stores the result BEFORE
|
|
819
|
+
# signaling, so checking the result slot before (and between) waits
|
|
820
|
+
# cannot lose a wakeup — Async::Condition is edge-triggered and a signal
|
|
821
|
+
# arriving before the sender reaches wait would otherwise be dropped
|
|
822
|
+
# (reachable when a custom transport's #write suspends after delivery,
|
|
823
|
+
# or when the read loop's rescue broadcast fires mid-write). Mirrors
|
|
824
|
+
# anyio.Event's level-trigger semantics in Python.
|
|
825
|
+
#
|
|
826
|
+
# Do NOT reimplement the reactor wait as a nested `Async do ... end.wait`
|
|
827
|
+
# — that spawned a separate task and leaked the pending entries when an
|
|
828
|
+
# Async::Stop propagated through `.wait` before cleanup ran.
|
|
829
|
+
def await_control_response(request_id, waiter, task, timeout_seconds, subtype)
|
|
830
|
+
if task
|
|
831
|
+
begin
|
|
832
|
+
task.with_timeout(timeout_seconds) do
|
|
833
|
+
waiter.wait until @pending_control_results.key?(request_id)
|
|
834
|
+
end
|
|
835
|
+
rescue Async::TimeoutError
|
|
836
|
+
raise ControlRequestTimeoutError, "Control request timeout: #{subtype}"
|
|
837
|
+
end
|
|
838
|
+
else
|
|
839
|
+
deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout_seconds
|
|
840
|
+
until @pending_control_results.key?(request_id)
|
|
841
|
+
remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
842
|
+
raise ControlRequestTimeoutError, "Control request timeout: #{subtype}" if remaining <= 0
|
|
843
|
+
|
|
844
|
+
waiter.wait(remaining)
|
|
845
|
+
end
|
|
846
|
+
end
|
|
847
|
+
end
|
|
848
|
+
|
|
746
849
|
def handle_sdk_mcp_request(server_name, message)
|
|
747
850
|
# Convert server_name to symbol if needed for hash lookup
|
|
748
851
|
server_key = @sdk_mcp_servers.key?(server_name) ? server_name : server_name.to_sym
|
|
@@ -824,27 +927,15 @@ module ClaudeAgentSDK
|
|
|
824
927
|
}
|
|
825
928
|
end
|
|
826
929
|
|
|
827
|
-
def handle_mcp_tools_call(server, message,
|
|
828
|
-
#
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
#
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
is_error = ClaudeAgentSDK.flexible_fetch(result, 'isError', 'is_error')
|
|
838
|
-
response_data[:isError] = !!is_error unless is_error.nil?
|
|
839
|
-
|
|
840
|
-
structured_content = ClaudeAgentSDK.flexible_fetch(result, 'structuredContent', 'structured_content')
|
|
841
|
-
response_data[:structuredContent] = structured_content unless structured_content.nil?
|
|
842
|
-
|
|
843
|
-
{
|
|
844
|
-
jsonrpc: '2.0',
|
|
845
|
-
id: message[:id],
|
|
846
|
-
result: response_data
|
|
847
|
-
}
|
|
930
|
+
def handle_mcp_tools_call(server, message, _params)
|
|
931
|
+
# Route through the official MCP::Server (Python parity: its lowlevel
|
|
932
|
+
# server validates arguments against the tool's inputSchema BEFORE the
|
|
933
|
+
# handler runs and reports validation failures, unknown tools, and
|
|
934
|
+
# handler exceptions as in-band isError results). tools/list,
|
|
935
|
+
# initialize, resources/* and prompts/* stay on the SDK paths — the
|
|
936
|
+
# gem drops annotations/_meta from tools/list and negotiates newer
|
|
937
|
+
# protocol versions.
|
|
938
|
+
server.handle_message(message)
|
|
848
939
|
end
|
|
849
940
|
|
|
850
941
|
def handle_mcp_resources_list(server, message)
|
|
@@ -976,31 +1067,60 @@ module ClaudeAgentSDK
|
|
|
976
1067
|
|
|
977
1068
|
# Wait for the first result before closing stdin when hooks or SDK MCP
|
|
978
1069
|
# servers may still need to exchange control messages with the CLI.
|
|
1070
|
+
# The control protocol requires stdin to stay open for the entire turn
|
|
1071
|
+
# (hook replies, can_use_tool replies and SDK MCP tool results are all
|
|
1072
|
+
# written to stdin), so no timeout is applied — closing stdin mid-turn
|
|
1073
|
+
# silently broke hooks/MCP on turns longer than the old 60s bound
|
|
1074
|
+
# (mirrors Python SDK commit c3d96cb). The condition is guaranteed to be
|
|
1075
|
+
# signaled: by the result branch in read_messages, or by its ensure block
|
|
1076
|
+
# when the process exits early.
|
|
979
1077
|
def wait_for_result_and_end_input
|
|
980
1078
|
if !@first_result_received &&
|
|
981
1079
|
((@sdk_mcp_servers && !@sdk_mcp_servers.empty?) || (@hooks && !@hooks.empty?))
|
|
982
|
-
|
|
983
|
-
@first_result_condition.wait unless @first_result_received
|
|
984
|
-
end
|
|
1080
|
+
@first_result_condition.wait
|
|
985
1081
|
end
|
|
986
|
-
rescue Async::TimeoutError
|
|
987
|
-
nil
|
|
988
1082
|
ensure
|
|
989
1083
|
@transport.end_input
|
|
990
1084
|
end
|
|
991
1085
|
|
|
992
|
-
# Stream input messages to transport
|
|
1086
|
+
# Stream input messages to transport. NOTE: iteration runs on the
|
|
1087
|
+
# reactor (the deliberate FiberBoundary carve-out — see
|
|
1088
|
+
# fiber_boundary.rb): scheduler-aware blocking (Thread::Queue#pop,
|
|
1089
|
+
# sleep, socket IO) parks only this task; CPU-bound or scheduler-opaque
|
|
1090
|
+
# work in the enumerator must be moved to a producer Thread by the user.
|
|
993
1091
|
def stream_input(stream)
|
|
1092
|
+
wrote_message = false
|
|
994
1093
|
stream.each do |message|
|
|
995
1094
|
break if @closed
|
|
996
1095
|
serialized = message.is_a?(Hash) ? JSON.generate(message) : message.to_s
|
|
997
1096
|
writeln(serialized)
|
|
1097
|
+
wrote_message = true
|
|
998
1098
|
end
|
|
999
1099
|
rescue StandardError => e
|
|
1000
1100
|
# Log error but don't raise
|
|
1001
1101
|
warn "Error streaming input: #{e.message}"
|
|
1002
1102
|
ensure
|
|
1003
|
-
|
|
1103
|
+
# Three teardown shapes:
|
|
1104
|
+
# - #close in progress (@closed, Async::Stop unwinding): do nothing —
|
|
1105
|
+
# the transport is about to be closed, and waiting on
|
|
1106
|
+
# @first_result_condition inside a stopping fiber could suspend
|
|
1107
|
+
# teardown. Mirrors Python, where cancellation skips this entirely.
|
|
1108
|
+
# - A turn is in flight (some message reached the CLI): hold stdin
|
|
1109
|
+
# open until its first result so hooks/SDK MCP control replies can
|
|
1110
|
+
# still be written (no timeout — the result or process exit is
|
|
1111
|
+
# guaranteed to signal).
|
|
1112
|
+
# - No complete message ever reached the CLI (empty stream, or the
|
|
1113
|
+
# stream raised before the first write): no result can ever arrive,
|
|
1114
|
+
# so waiting would park query() forever beside an idle CLI. Close
|
|
1115
|
+
# stdin so the CLI sees EOF and exits. Deliberate improvement over
|
|
1116
|
+
# Python, which leaves stdin open and hangs on this path.
|
|
1117
|
+
unless @closed
|
|
1118
|
+
if wrote_message
|
|
1119
|
+
wait_for_result_and_end_input
|
|
1120
|
+
else
|
|
1121
|
+
@transport.end_input
|
|
1122
|
+
end
|
|
1123
|
+
end
|
|
1004
1124
|
end
|
|
1005
1125
|
|
|
1006
1126
|
def writeln(string)
|
|
@@ -1027,9 +1147,24 @@ module ClaudeAgentSDK
|
|
|
1027
1147
|
# Close the query and transport
|
|
1028
1148
|
def close
|
|
1029
1149
|
@closed = true
|
|
1150
|
+
# Wake pending control-request waiters (same shape as the read-loop
|
|
1151
|
+
# rescue broadcast): close stops the read task with Async::Stop, which
|
|
1152
|
+
# bypasses that broadcast — a worker-thread caller parked in
|
|
1153
|
+
# ThreadWaiter#wait would otherwise leak its OS thread for the full
|
|
1154
|
+
# control-request timeout (up to 1200s) in long-lived processes.
|
|
1155
|
+
# INVARIANT: store the result before signaling (level-trigger).
|
|
1156
|
+
@pending_control_responses.dup.each do |request_id, waiter|
|
|
1157
|
+
@pending_control_results[request_id] ||= CLIConnectionError.new('Query closed')
|
|
1158
|
+
waiter.signal
|
|
1159
|
+
end
|
|
1030
1160
|
# Final mirror flush BEFORE stopping the read task, so the last turn's
|
|
1031
1161
|
# entries reach the store. #close on the batcher never raises.
|
|
1032
1162
|
@transcript_mirror_batcher&.close
|
|
1163
|
+
# Stop tracked child tasks (e.g. stream_input) before the read task and
|
|
1164
|
+
# transport so a parked input stream can never keep the reactor alive
|
|
1165
|
+
# (mirrors Python close() cancelling _child_tasks).
|
|
1166
|
+
@child_tasks.each(&:stop)
|
|
1167
|
+
@child_tasks.clear
|
|
1033
1168
|
@task&.stop
|
|
1034
1169
|
@transport.close
|
|
1035
1170
|
end
|