claude-agent-sdk 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +56 -0
- data/README.md +4 -2
- data/docs/configuration.md +13 -2
- data/docs/observability.md +28 -4
- data/docs/sessions.md +15 -2
- data/lib/claude_agent_sdk/command_builder.rb +69 -22
- data/lib/claude_agent_sdk/fiber_boundary.rb +39 -1
- data/lib/claude_agent_sdk/instrumentation/otel.rb +97 -23
- data/lib/claude_agent_sdk/message_parser.rb +4 -1
- data/lib/claude_agent_sdk/observer.rb +23 -3
- data/lib/claude_agent_sdk/query.rb +223 -88
- data/lib/claude_agent_sdk/sdk_mcp_server.rb +232 -181
- data/lib/claude_agent_sdk/session_store.rb +4 -0
- data/lib/claude_agent_sdk/sessions.rb +144 -24
- data/lib/claude_agent_sdk/subprocess_cli_transport.rb +184 -50
- data/lib/claude_agent_sdk/testing/session_store_conformance.rb +15 -1
- data/lib/claude_agent_sdk/types.rb +43 -5
- data/lib/claude_agent_sdk/version.rb +1 -1
- data/lib/claude_agent_sdk.rb +359 -93
- metadata +12 -6
|
@@ -81,6 +81,11 @@ module ClaudeAgentSDK
|
|
|
81
81
|
end
|
|
82
82
|
end
|
|
83
83
|
|
|
84
|
+
# Recording-only by design: a Client session can survive an error (the
|
|
85
|
+
# user may rescue one bad message and keep receiving), so finishing here
|
|
86
|
+
# would orphan later spans and break the next turn. Finish ownership
|
|
87
|
+
# stays with end_trace/on_close; start_trace also finishes any dangling
|
|
88
|
+
# span from a previous trace as the never-disconnected backstop.
|
|
84
89
|
def on_error(error)
|
|
85
90
|
return unless @root_span
|
|
86
91
|
|
|
@@ -89,16 +94,25 @@ module ClaudeAgentSDK
|
|
|
89
94
|
end
|
|
90
95
|
|
|
91
96
|
def on_close
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
@root_span&.finish
|
|
95
|
-
@root_span = nil
|
|
96
|
-
@root_context = nil
|
|
97
|
+
finish_open_spans
|
|
98
|
+
reset_session_buffers
|
|
97
99
|
end
|
|
98
100
|
|
|
99
101
|
private
|
|
100
102
|
|
|
101
103
|
def start_trace(message)
|
|
104
|
+
# A new init without an intervening ResultMessage (e.g. /clear or an
|
|
105
|
+
# interrupted turn) supersedes the current trace; finish it so it is
|
|
106
|
+
# exported instead of leaking as a never-ended span, and reset the
|
|
107
|
+
# buffers so the superseded turn's prompt/output cannot mislabel the
|
|
108
|
+
# new trace. The reset is conditional on an actual supersede — in the
|
|
109
|
+
# normal flow the buffered pre-init prompt belongs to THIS trace.
|
|
110
|
+
# finish_open_spans itself stays unconditional: end_trace nils
|
|
111
|
+
# @root_span but can leave @tool_spans populated.
|
|
112
|
+
superseding = !@root_span.nil?
|
|
113
|
+
finish_open_spans
|
|
114
|
+
reset_session_buffers if superseding
|
|
115
|
+
|
|
102
116
|
attrs = {
|
|
103
117
|
# gen_ai semantic conventions (recognized by Langfuse, Datadog, etc.)
|
|
104
118
|
'gen_ai.system' => 'anthropic',
|
|
@@ -146,20 +160,15 @@ module ClaudeAgentSDK
|
|
|
146
160
|
@last_assistant_text = combined_text unless combined_text.empty?
|
|
147
161
|
|
|
148
162
|
# Create generation span
|
|
149
|
-
usage = message.usage || {}
|
|
150
|
-
input_tokens = usage[:input_tokens] || usage['input_tokens']
|
|
151
|
-
output_tokens = usage[:output_tokens] || usage['output_tokens']
|
|
152
163
|
attrs = {
|
|
153
164
|
'openinference.span.kind' => 'LLM',
|
|
154
165
|
'langfuse.observation.type' => 'generation',
|
|
155
166
|
'gen_ai.response.model' => message.model,
|
|
156
167
|
'llm.model_name' => message.model,
|
|
157
|
-
'gen_ai.usage.input_tokens' => input_tokens,
|
|
158
|
-
'gen_ai.usage.output_tokens' => output_tokens,
|
|
159
168
|
'gen_ai.completion' => truncate(combined_text),
|
|
160
169
|
# OpenInference: Langfuse maps output.value to the Preview Output field
|
|
161
170
|
'output.value' => truncate(combined_text)
|
|
162
|
-
}
|
|
171
|
+
}.merge(usage_token_attrs(message.usage || {}))
|
|
163
172
|
|
|
164
173
|
OpenTelemetry::Context.with_current(@root_context) do
|
|
165
174
|
span = @tracer.start_span('claude_agent.generation', attributes: compact_attrs(attrs))
|
|
@@ -188,9 +197,17 @@ module ClaudeAgentSDK
|
|
|
188
197
|
return unless @root_span
|
|
189
198
|
|
|
190
199
|
usage = message.usage || {}
|
|
191
|
-
input_tokens = usage
|
|
192
|
-
output_tokens = usage
|
|
193
|
-
|
|
200
|
+
input_tokens = usage_value(usage, :input_tokens)
|
|
201
|
+
output_tokens = usage_value(usage, :output_tokens)
|
|
202
|
+
cache_creation_tokens = usage_value(usage, :cache_creation_input_tokens)
|
|
203
|
+
cache_read_tokens = usage_value(usage, :cache_read_input_tokens)
|
|
204
|
+
# OpenInference subset semantics: prompt_details.* break down
|
|
205
|
+
# llm.token_count.prompt, so the prompt count must INCLUDE cache
|
|
206
|
+
# tokens (Anthropic's input_tokens excludes them; OpenInference's own
|
|
207
|
+
# Anthropic instrumentation sums them in). gen_ai.usage.* keys keep
|
|
208
|
+
# the raw exclusive values — Langfuse prices those additively.
|
|
209
|
+
prompt_tokens = (input_tokens || 0) + (cache_creation_tokens || 0) + (cache_read_tokens || 0) if input_tokens || cache_creation_tokens || cache_read_tokens
|
|
210
|
+
total_tokens = (prompt_tokens || 0) + (output_tokens || 0) if prompt_tokens || output_tokens
|
|
194
211
|
|
|
195
212
|
# Set trace output (last assistant response — shown in Langfuse UI)
|
|
196
213
|
# ResultMessage.result has the final text; fall back to last tracked assistant text
|
|
@@ -199,12 +216,14 @@ module ClaudeAgentSDK
|
|
|
199
216
|
attrs = {
|
|
200
217
|
# gen_ai conventions
|
|
201
218
|
'gen_ai.usage.cost' => message.total_cost_usd,
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
'llm.token_count.prompt' => input_tokens,
|
|
219
|
+
# OpenInference conventions (Langfuse maps these to usage/cost);
|
|
220
|
+
# prompt includes cache tokens so prompt_details.* are true subsets
|
|
221
|
+
'llm.token_count.prompt' => prompt_tokens,
|
|
206
222
|
'llm.token_count.completion' => output_tokens,
|
|
207
223
|
'llm.token_count.total' => total_tokens,
|
|
224
|
+
# OpenInference prompt-cache breakdown (cache_read/cache_write details)
|
|
225
|
+
'llm.token_count.prompt_details.cache_read' => cache_read_tokens,
|
|
226
|
+
'llm.token_count.prompt_details.cache_write' => cache_creation_tokens,
|
|
208
227
|
'llm.cost.total' => message.total_cost_usd,
|
|
209
228
|
# Trace output (Langfuse shows this in the trace detail view)
|
|
210
229
|
'output.value' => truncate(trace_output),
|
|
@@ -213,14 +232,17 @@ module ClaudeAgentSDK
|
|
|
213
232
|
'claude_agent.duration_api_ms' => message.duration_api_ms,
|
|
214
233
|
'claude_agent.num_turns' => message.num_turns,
|
|
215
234
|
'claude_agent.stop_reason' => message.stop_reason
|
|
216
|
-
}
|
|
235
|
+
}.merge(usage_token_attrs(usage))
|
|
217
236
|
|
|
218
237
|
@root_span.status = OpenTelemetry::Trace::Status.error(message.stop_reason || 'error') if message.is_error
|
|
219
238
|
|
|
220
239
|
@root_span.add_attributes(compact_attrs(attrs))
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
240
|
+
# A tool span still open at the ResultMessage means the tool never
|
|
241
|
+
# completed (interrupt/error/denial) — finish it with the trace so a
|
|
242
|
+
# later same-id tool_result cannot mutate a dead-trace span. Buffer
|
|
243
|
+
# reset must come AFTER trace_output consumed @last_assistant_text.
|
|
244
|
+
finish_open_spans
|
|
245
|
+
reset_session_buffers
|
|
224
246
|
end
|
|
225
247
|
|
|
226
248
|
def start_tool_span(block)
|
|
@@ -247,11 +269,34 @@ module ClaudeAgentSDK
|
|
|
247
269
|
return unless span
|
|
248
270
|
|
|
249
271
|
# OpenInference: Langfuse maps output.value to the Preview Output field
|
|
250
|
-
|
|
272
|
+
value, mime = serialize_tool_output(block.content)
|
|
273
|
+
if value
|
|
274
|
+
span.set_attribute('output.value', truncate(value))
|
|
275
|
+
span.set_attribute('output.mime_type', mime)
|
|
276
|
+
end
|
|
251
277
|
span.status = OpenTelemetry::Trace::Status.error('tool error') if block.is_error
|
|
252
278
|
span.finish
|
|
253
279
|
end
|
|
254
280
|
|
|
281
|
+
# Finish any spans still open (unfinished spans are never exported by
|
|
282
|
+
# OTel batch processors) and drop references to them. Tool spans are
|
|
283
|
+
# finished before the root span (children before parent). Idempotent.
|
|
284
|
+
def finish_open_spans
|
|
285
|
+
@tool_spans.each_value(&:finish)
|
|
286
|
+
@tool_spans.clear
|
|
287
|
+
@root_span&.finish
|
|
288
|
+
@root_span = nil
|
|
289
|
+
@root_context = nil
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Clear per-trace buffers so a reused observer instance (sequential
|
|
293
|
+
# query() calls or multi-turn Client sessions) does not stamp stale
|
|
294
|
+
# input/output onto later traces.
|
|
295
|
+
def reset_session_buffers
|
|
296
|
+
@first_user_input = nil
|
|
297
|
+
@last_assistant_text = nil
|
|
298
|
+
end
|
|
299
|
+
|
|
255
300
|
def record_retry_event(message)
|
|
256
301
|
return unless @root_span
|
|
257
302
|
|
|
@@ -286,6 +331,26 @@ module ClaudeAgentSDK
|
|
|
286
331
|
))
|
|
287
332
|
end
|
|
288
333
|
|
|
334
|
+
# gen_ai.usage.* attributes from a CLI usage hash. Cache tokens are
|
|
335
|
+
# emitted alongside input/output because Anthropic's input_tokens
|
|
336
|
+
# EXCLUDES cached tokens; Langfuse maps every gen_ai.usage.* key into
|
|
337
|
+
# usage details and natively prices cache_read_input_tokens /
|
|
338
|
+
# cache_creation_input_tokens.
|
|
339
|
+
def usage_token_attrs(usage)
|
|
340
|
+
{
|
|
341
|
+
'gen_ai.usage.input_tokens' => usage_value(usage, :input_tokens),
|
|
342
|
+
'gen_ai.usage.output_tokens' => usage_value(usage, :output_tokens),
|
|
343
|
+
'gen_ai.usage.cache_creation_input_tokens' => usage_value(usage, :cache_creation_input_tokens),
|
|
344
|
+
'gen_ai.usage.cache_read_input_tokens' => usage_value(usage, :cache_read_input_tokens)
|
|
345
|
+
}
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
# Usage hashes arrive symbol-keyed from the live CLI (symbolize_names:
|
|
349
|
+
# true) and string-keyed from session transcripts.
|
|
350
|
+
def usage_value(usage, key)
|
|
351
|
+
usage[key] || usage[key.to_s]
|
|
352
|
+
end
|
|
353
|
+
|
|
289
354
|
# Remove nil values from attributes hash (OTel rejects nil attribute values)
|
|
290
355
|
def compact_attrs(attrs)
|
|
291
356
|
attrs.compact
|
|
@@ -302,6 +367,15 @@ module ClaudeAgentSDK
|
|
|
302
367
|
rescue StandardError
|
|
303
368
|
obj.to_s
|
|
304
369
|
end
|
|
370
|
+
|
|
371
|
+
# Tool result content is a String or an Array of content-block hashes;
|
|
372
|
+
# serialize structured content as JSON, consistent with input.value.
|
|
373
|
+
def serialize_tool_output(content)
|
|
374
|
+
return nil if content.nil?
|
|
375
|
+
return [content, 'text/plain'] if content.is_a?(String)
|
|
376
|
+
|
|
377
|
+
[safe_json(content), 'application/json']
|
|
378
|
+
end
|
|
305
379
|
end
|
|
306
380
|
end
|
|
307
381
|
end
|
|
@@ -156,7 +156,10 @@ module ClaudeAgentSDK
|
|
|
156
156
|
)
|
|
157
157
|
when 'server_tool_use'
|
|
158
158
|
ServerToolUseBlock.new(id: get.call(:id), name: get.call(:name), input: get.call(:input))
|
|
159
|
-
when '
|
|
159
|
+
when 'advisor_tool_result'
|
|
160
|
+
# The CLI's wire type for server-side tool results is
|
|
161
|
+
# advisor_tool_result (the old 'server_tool_result' branch was dead
|
|
162
|
+
# code — no CLI version emits it; Python parses advisor_tool_result).
|
|
160
163
|
ServerToolResultBlock.new(
|
|
161
164
|
tool_use_id: get.call(:tool_use_id),
|
|
162
165
|
content: get.call(:content),
|
|
@@ -20,7 +20,14 @@ module ClaudeAgentSDK
|
|
|
20
20
|
# end
|
|
21
21
|
# end
|
|
22
22
|
module Observer
|
|
23
|
-
# Called with the user's prompt text (not echoed back by CLI in streaming
|
|
23
|
+
# Called with the user's prompt text (not echoed back by CLI in streaming
|
|
24
|
+
# mode): the verbatim string for String prompts (query() / Client#query),
|
|
25
|
+
# and once per `type: 'user'` message for Enumerator/streaming input with
|
|
26
|
+
# extracted text (string content, or newline-joined non-empty top-level
|
|
27
|
+
# text blocks). User messages with no extractable text (tool_result-only,
|
|
28
|
+
# image-only, empty text) are skipped; only Hash or JSON-string stream
|
|
29
|
+
# items are inspected. In streaming mode, ordering relative to on_message
|
|
30
|
+
# is not guaranteed.
|
|
24
31
|
# @param prompt [String] The user's prompt string
|
|
25
32
|
def on_user_prompt(prompt); end
|
|
26
33
|
|
|
@@ -28,11 +35,24 @@ module ClaudeAgentSDK
|
|
|
28
35
|
# @param message [Object] A typed message (AssistantMessage, ResultMessage, etc.)
|
|
29
36
|
def on_message(message); end
|
|
30
37
|
|
|
31
|
-
# Called
|
|
32
|
-
#
|
|
38
|
+
# Called once per error that surfaces from query() or from
|
|
39
|
+
# Client#query/#receive_messages/#receive_response/#connect (after
|
|
40
|
+
# argument/configuration validation — usage errors such as 'Not
|
|
41
|
+
# connected' or invalid options do not notify) — including errors raised
|
|
42
|
+
# by the user's own message block — before on_close where both fire. query() fires on_close even for connect-phase failures (its
|
|
43
|
+
# ensure always runs); a Client#connect failure before the handshake
|
|
44
|
+
# completes fires on_error WITHOUT on_close (the session never opened).
|
|
45
|
+
# Not notified (by design): errors raised by control-request methods
|
|
46
|
+
# (interrupt, set_model, …) — the same error also reaches the message
|
|
47
|
+
# stream where it is notified once; errors during query()'s own teardown;
|
|
48
|
+
# and input-stream errors swallowed by streaming input (warn only,
|
|
49
|
+
# matching the Python SDK).
|
|
50
|
+
# @param error [StandardError] The error that occurred
|
|
33
51
|
def on_error(error); end
|
|
34
52
|
|
|
35
53
|
# Called when the query or client disconnects. Use this to flush buffers.
|
|
54
|
+
# In Client mode call disconnect (ideally in an ensure block) so on_close
|
|
55
|
+
# runs and instrumentation (e.g. OTel spans) is flushed/exported.
|
|
36
56
|
def on_close; end
|
|
37
57
|
end
|
|
38
58
|
end
|