claude-agent-sdk 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,6 +81,11 @@ module ClaudeAgentSDK
81
81
  end
82
82
  end
83
83
 
84
+ # Recording-only by design: a Client session can survive an error (the
85
+ # user may rescue one bad message and keep receiving), so finishing here
86
+ # would orphan later spans and break the next turn. Finish ownership
87
+ # stays with end_trace/on_close; start_trace also finishes any dangling
88
+ # span from a previous trace as the never-disconnected backstop.
84
89
  def on_error(error)
85
90
  return unless @root_span
86
91
 
@@ -89,16 +94,25 @@ module ClaudeAgentSDK
89
94
  end
90
95
 
91
96
  def on_close
92
- @tool_spans.each_value(&:finish)
93
- @tool_spans.clear
94
- @root_span&.finish
95
- @root_span = nil
96
- @root_context = nil
97
+ finish_open_spans
98
+ reset_session_buffers
97
99
  end
98
100
 
99
101
  private
100
102
 
101
103
  def start_trace(message)
104
+ # A new init without an intervening ResultMessage (e.g. /clear or an
105
+ # interrupted turn) supersedes the current trace; finish it so it is
106
+ # exported instead of leaking as a never-ended span, and reset the
107
+ # buffers so the superseded turn's prompt/output cannot mislabel the
108
+ # new trace. The reset is conditional on an actual supersede — in the
109
+ # normal flow the buffered pre-init prompt belongs to THIS trace.
110
+ # finish_open_spans itself stays unconditional: end_trace nils
111
+ # @root_span but can leave @tool_spans populated.
112
+ superseding = !@root_span.nil?
113
+ finish_open_spans
114
+ reset_session_buffers if superseding
115
+
102
116
  attrs = {
103
117
  # gen_ai semantic conventions (recognized by Langfuse, Datadog, etc.)
104
118
  'gen_ai.system' => 'anthropic',
@@ -146,20 +160,15 @@ module ClaudeAgentSDK
146
160
  @last_assistant_text = combined_text unless combined_text.empty?
147
161
 
148
162
  # Create generation span
149
- usage = message.usage || {}
150
- input_tokens = usage[:input_tokens] || usage['input_tokens']
151
- output_tokens = usage[:output_tokens] || usage['output_tokens']
152
163
  attrs = {
153
164
  'openinference.span.kind' => 'LLM',
154
165
  'langfuse.observation.type' => 'generation',
155
166
  'gen_ai.response.model' => message.model,
156
167
  'llm.model_name' => message.model,
157
- 'gen_ai.usage.input_tokens' => input_tokens,
158
- 'gen_ai.usage.output_tokens' => output_tokens,
159
168
  'gen_ai.completion' => truncate(combined_text),
160
169
  # OpenInference: Langfuse maps output.value to the Preview Output field
161
170
  'output.value' => truncate(combined_text)
162
- }
171
+ }.merge(usage_token_attrs(message.usage || {}))
163
172
 
164
173
  OpenTelemetry::Context.with_current(@root_context) do
165
174
  span = @tracer.start_span('claude_agent.generation', attributes: compact_attrs(attrs))
@@ -188,9 +197,17 @@ module ClaudeAgentSDK
188
197
  return unless @root_span
189
198
 
190
199
  usage = message.usage || {}
191
- input_tokens = usage[:input_tokens] || usage['input_tokens']
192
- output_tokens = usage[:output_tokens] || usage['output_tokens']
193
- total_tokens = (input_tokens || 0) + (output_tokens || 0) if input_tokens || output_tokens
200
+ input_tokens = usage_value(usage, :input_tokens)
201
+ output_tokens = usage_value(usage, :output_tokens)
202
+ cache_creation_tokens = usage_value(usage, :cache_creation_input_tokens)
203
+ cache_read_tokens = usage_value(usage, :cache_read_input_tokens)
204
+ # OpenInference subset semantics: prompt_details.* break down
205
+ # llm.token_count.prompt, so the prompt count must INCLUDE cache
206
+ # tokens (Anthropic's input_tokens excludes them; OpenInference's own
207
+ # Anthropic instrumentation sums them in). gen_ai.usage.* keys keep
208
+ # the raw exclusive values — Langfuse prices those additively.
209
+ prompt_tokens = (input_tokens || 0) + (cache_creation_tokens || 0) + (cache_read_tokens || 0) if input_tokens || cache_creation_tokens || cache_read_tokens
210
+ total_tokens = (prompt_tokens || 0) + (output_tokens || 0) if prompt_tokens || output_tokens
194
211
 
195
212
  # Set trace output (last assistant response — shown in Langfuse UI)
196
213
  # ResultMessage.result has the final text; fall back to last tracked assistant text
@@ -199,12 +216,14 @@ module ClaudeAgentSDK
199
216
  attrs = {
200
217
  # gen_ai conventions
201
218
  'gen_ai.usage.cost' => message.total_cost_usd,
202
- 'gen_ai.usage.input_tokens' => input_tokens,
203
- 'gen_ai.usage.output_tokens' => output_tokens,
204
- # OpenInference conventions (Langfuse maps these to usage/cost)
205
- 'llm.token_count.prompt' => input_tokens,
219
+ # OpenInference conventions (Langfuse maps these to usage/cost);
220
+ # prompt includes cache tokens so prompt_details.* are true subsets
221
+ 'llm.token_count.prompt' => prompt_tokens,
206
222
  'llm.token_count.completion' => output_tokens,
207
223
  'llm.token_count.total' => total_tokens,
224
+ # OpenInference prompt-cache breakdown (cache_read/cache_write details)
225
+ 'llm.token_count.prompt_details.cache_read' => cache_read_tokens,
226
+ 'llm.token_count.prompt_details.cache_write' => cache_creation_tokens,
208
227
  'llm.cost.total' => message.total_cost_usd,
209
228
  # Trace output (Langfuse shows this in the trace detail view)
210
229
  'output.value' => truncate(trace_output),
@@ -213,14 +232,17 @@ module ClaudeAgentSDK
213
232
  'claude_agent.duration_api_ms' => message.duration_api_ms,
214
233
  'claude_agent.num_turns' => message.num_turns,
215
234
  'claude_agent.stop_reason' => message.stop_reason
216
- }
235
+ }.merge(usage_token_attrs(usage))
217
236
 
218
237
  @root_span.status = OpenTelemetry::Trace::Status.error(message.stop_reason || 'error') if message.is_error
219
238
 
220
239
  @root_span.add_attributes(compact_attrs(attrs))
221
- @root_span.finish
222
- @root_span = nil
223
- @root_context = nil
240
+ # A tool span still open at the ResultMessage means the tool never
241
+ # completed (interrupt/error/denial) — finish it with the trace so a
242
+ # later same-id tool_result cannot mutate a dead-trace span. Buffer
243
+ # reset must come AFTER trace_output consumed @last_assistant_text.
244
+ finish_open_spans
245
+ reset_session_buffers
224
246
  end
225
247
 
226
248
  def start_tool_span(block)
@@ -247,11 +269,34 @@ module ClaudeAgentSDK
247
269
  return unless span
248
270
 
249
271
  # OpenInference: Langfuse maps output.value to the Preview Output field
250
- span.set_attribute('output.value', truncate(block.content.to_s))
272
+ value, mime = serialize_tool_output(block.content)
273
+ if value
274
+ span.set_attribute('output.value', truncate(value))
275
+ span.set_attribute('output.mime_type', mime)
276
+ end
251
277
  span.status = OpenTelemetry::Trace::Status.error('tool error') if block.is_error
252
278
  span.finish
253
279
  end
254
280
 
281
+ # Finish any spans still open (unfinished spans are never exported by
282
+ # OTel batch processors) and drop references to them. Tool spans are
283
+ # finished before the root span (children before parent). Idempotent.
284
+ def finish_open_spans
285
+ @tool_spans.each_value(&:finish)
286
+ @tool_spans.clear
287
+ @root_span&.finish
288
+ @root_span = nil
289
+ @root_context = nil
290
+ end
291
+
292
+ # Clear per-trace buffers so a reused observer instance (sequential
293
+ # query() calls or multi-turn Client sessions) does not stamp stale
294
+ # input/output onto later traces.
295
+ def reset_session_buffers
296
+ @first_user_input = nil
297
+ @last_assistant_text = nil
298
+ end
299
+
255
300
  def record_retry_event(message)
256
301
  return unless @root_span
257
302
 
@@ -286,6 +331,26 @@ module ClaudeAgentSDK
286
331
  ))
287
332
  end
288
333
 
334
+ # gen_ai.usage.* attributes from a CLI usage hash. Cache tokens are
335
+ # emitted alongside input/output because Anthropic's input_tokens
336
+ # EXCLUDES cached tokens; Langfuse maps every gen_ai.usage.* key into
337
+ # usage details and natively prices cache_read_input_tokens /
338
+ # cache_creation_input_tokens.
339
+ def usage_token_attrs(usage)
340
+ {
341
+ 'gen_ai.usage.input_tokens' => usage_value(usage, :input_tokens),
342
+ 'gen_ai.usage.output_tokens' => usage_value(usage, :output_tokens),
343
+ 'gen_ai.usage.cache_creation_input_tokens' => usage_value(usage, :cache_creation_input_tokens),
344
+ 'gen_ai.usage.cache_read_input_tokens' => usage_value(usage, :cache_read_input_tokens)
345
+ }
346
+ end
347
+
348
+ # Usage hashes arrive symbol-keyed from the live CLI (symbolize_names:
349
+ # true) and string-keyed from session transcripts.
350
+ def usage_value(usage, key)
351
+ usage[key] || usage[key.to_s]
352
+ end
353
+
289
354
  # Remove nil values from attributes hash (OTel rejects nil attribute values)
290
355
  def compact_attrs(attrs)
291
356
  attrs.compact
@@ -302,6 +367,15 @@ module ClaudeAgentSDK
302
367
  rescue StandardError
303
368
  obj.to_s
304
369
  end
370
+
371
+ # Tool result content is a String or an Array of content-block hashes;
372
+ # serialize structured content as JSON, consistent with input.value.
373
+ def serialize_tool_output(content)
374
+ return nil if content.nil?
375
+ return [content, 'text/plain'] if content.is_a?(String)
376
+
377
+ [safe_json(content), 'application/json']
378
+ end
305
379
  end
306
380
  end
307
381
  end
@@ -156,7 +156,10 @@ module ClaudeAgentSDK
156
156
  )
157
157
  when 'server_tool_use'
158
158
  ServerToolUseBlock.new(id: get.call(:id), name: get.call(:name), input: get.call(:input))
159
- when 'server_tool_result'
159
+ when 'advisor_tool_result'
160
+ # The CLI's wire type for server-side tool results is
161
+ # advisor_tool_result (the old 'server_tool_result' branch was dead
162
+ # code — no CLI version emits it; Python parses advisor_tool_result).
160
163
  ServerToolResultBlock.new(
161
164
  tool_use_id: get.call(:tool_use_id),
162
165
  content: get.call(:content),
@@ -20,7 +20,14 @@ module ClaudeAgentSDK
20
20
  # end
21
21
  # end
22
22
  module Observer
23
- # Called with the user's prompt text (not echoed back by CLI in streaming mode).
23
+ # Called with the user's prompt text (not echoed back by CLI in streaming
24
+ # mode): the verbatim string for String prompts (query() / Client#query),
25
+ # and once per `type: 'user'` message for Enumerator/streaming input with
26
+ # extracted text (string content, or newline-joined non-empty top-level
27
+ # text blocks). User messages with no extractable text (tool_result-only,
28
+ # image-only, empty text) are skipped; only Hash or JSON-string stream
29
+ # items are inspected. In streaming mode, ordering relative to on_message
30
+ # is not guaranteed.
24
31
  # @param prompt [String] The user's prompt string
25
32
  def on_user_prompt(prompt); end
26
33
 
@@ -28,11 +35,24 @@ module ClaudeAgentSDK
28
35
  # @param message [Object] A typed message (AssistantMessage, ResultMessage, etc.)
29
36
  def on_message(message); end
30
37
 
31
- # Called when a transport or parse error occurs.
32
- # @param error [Exception] The error that occurred
38
+ # Called once per error that surfaces from query() or from
39
+ # Client#query/#receive_messages/#receive_response/#connect (after
40
+ # argument/configuration validation — usage errors such as 'Not
41
+ # connected' or invalid options do not notify) — including errors raised
42
+ # by the user's own message block — before on_close where both fire. query() fires on_close even for connect-phase failures (its
43
+ # ensure always runs); a Client#connect failure before the handshake
44
+ # completes fires on_error WITHOUT on_close (the session never opened).
45
+ # Not notified (by design): errors raised by control-request methods
46
+ # (interrupt, set_model, …) — the same error also reaches the message
47
+ # stream where it is notified once; errors during query()'s own teardown;
48
+ # and input-stream errors swallowed by streaming input (warn only,
49
+ # matching the Python SDK).
50
+ # @param error [StandardError] The error that occurred
33
51
  def on_error(error); end
34
52
 
35
53
  # Called when the query or client disconnects. Use this to flush buffers.
54
+ # In Client mode call disconnect (ideally in an ensure block) so on_close
55
+ # runs and instrumentation (e.g. OTel spans) is flushed/exported.
36
56
  def on_close; end
37
57
  end
38
58
  end