claude-agent-sdk 0.16.10 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,444 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'fileutils'
5
+ require 'tmpdir'
6
+ require 'open3'
7
+ require 'rbconfig'
8
+ require_relative 'fiber_boundary'
9
+ require_relative 'sessions'
10
+ require_relative 'session_store'
11
+ require_relative 'transcript_mirror_batcher'
12
+
13
+ module ClaudeAgentSDK
14
+ # Result of SessionResume.materialize_resume_session.
15
+ #
16
+ # +config_dir+ is a temp directory laid out like ~/.claude/ — point the
17
+ # subprocess at it via CLAUDE_CONFIG_DIR. +resume_session_id+ is passed as
18
+ # --resume. Call #cleanup after the subprocess exits to remove the temp dir.
19
+ class MaterializedResume
20
+ attr_reader :config_dir, :resume_session_id
21
+
22
+ def initialize(config_dir:, resume_session_id:)
23
+ @config_dir = config_dir
24
+ @resume_session_id = resume_session_id
25
+ end
26
+
27
+ # Best-effort removal of the temp config dir (never raises).
28
+ def cleanup
29
+ SessionResume.rmtree_with_retry(@config_dir)
30
+ end
31
+ end
32
+
33
+ # Materialize a SessionStore-backed resume into a temp CLAUDE_CONFIG_DIR.
34
+ #
35
+ # When `resume` (or `continue_conversation`) is paired with a `session_store`,
36
+ # the session JSONL usually doesn't exist on local disk — it lives in the
37
+ # store. The CLI only resumes from a local file. This module loads the session
38
+ # from the store, writes it to a temp dir laid out like ~/.claude/, and returns
39
+ # the path so the caller can point the subprocess at it via CLAUDE_CONFIG_DIR.
40
+ module SessionResume # rubocop:disable Metrics/ModuleLength
41
+ KEYCHAIN_SERVICE_NAME = 'Claude Code-credentials'
42
+ KEYCHAIN_TIMEOUT_SECONDS = 5
43
+
44
+ # SystemCallError classes that indicate a transiently-held handle (Windows
45
+ # AV/indexer scanning a freshly-written file) or a recoverable resource
46
+ # shortage (file-table exhaustion) rather than a permanent failure. EMFILE/
47
+ # ENFILE are treated as transient so the backoff loop can succeed once
48
+ # descriptors free up, matching the Python SDK's retryable errno set.
49
+ RETRYABLE_RMTREE_ERRORS = [
50
+ Errno::EBUSY, Errno::ENOTEMPTY, Errno::EPERM, Errno::EACCES, Errno::EMFILE, Errno::ENFILE
51
+ ].freeze
52
+
53
+ module_function
54
+
55
+ # Return a copy of +options+ repointed at a materialized temp config dir:
56
+ # CLAUDE_CONFIG_DIR in env, resume set to the materialized session id, and
57
+ # continue_conversation cleared (already resolved to a concrete session id).
58
+ def apply_materialized_options(options, materialized)
59
+ options.dup_with(
60
+ env: options.env.merge('CLAUDE_CONFIG_DIR' => materialized.config_dir.to_s),
61
+ resume: materialized.resume_session_id,
62
+ continue_conversation: false
63
+ )
64
+ end
65
+
66
+ # Build a TranscriptMirrorBatcher for a configured session_store. Shared by
67
+ # both entry points (Client#install_transcript_mirror and the one-shot
68
+ # query()) so projects_dir resolution and the eager/batched threshold choice
69
+ # live in one place. +env+ supplies the CLAUDE_CONFIG_DIR override used to
70
+ # locate the projects dir (already repointed at the temp dir when resuming
71
+ # from a store). Eager flush mode zeroes the buffer thresholds so every
72
+ # transcript_mirror frame triggers a background flush.
73
+ def build_mirror_batcher(store:, env:, on_error:, eager: false)
74
+ TranscriptMirrorBatcher.new(
75
+ store: store,
76
+ projects_dir: SessionStores.projects_dir(env),
77
+ on_error: on_error,
78
+ max_pending_entries: eager ? 0 : TranscriptMirrorBatcher::MAX_PENDING_ENTRIES,
79
+ max_pending_bytes: eager ? 0 : TranscriptMirrorBatcher::MAX_PENDING_BYTES
80
+ )
81
+ end
82
+
83
+ # Load a session from options.session_store and write it to a temp dir.
84
+ # Returns a MaterializedResume, or nil when no materialization is needed
85
+ # (no store, no resume/continue, store has no entries, or the resolved
86
+ # session id is not a valid UUID) — the caller then falls through to the
87
+ # normal spawn path. Raises RuntimeError if a store call fails or times out.
88
+ def materialize_resume_session(options)
89
+ store = options.session_store
90
+ return nil if store.nil?
91
+ return nil if options.resume.nil? && !options.continue_conversation
92
+
93
+ timeout_s = options.load_timeout_ms / 1000.0
94
+ project_key = Sessions.project_key_for_directory(options.cwd)
95
+
96
+ resolved =
97
+ if options.resume
98
+ # session_id is used as a path component below; reject non-UUIDs to
99
+ # prevent traversal and match every other resume path.
100
+ return nil unless options.resume.match?(Sessions::UUID_RE)
101
+
102
+ load_candidate(store, project_key, options.resume, timeout_s)
103
+ else
104
+ resolve_continue_candidate(store, project_key, timeout_s)
105
+ end
106
+ return nil if resolved.nil?
107
+
108
+ session_id, entries = resolved
109
+ tmp_base = Dir.mktmpdir('claude-resume-')
110
+ begin
111
+ project_dir = File.join(tmp_base, 'projects', project_key)
112
+ FileUtils.mkdir_p(project_dir)
113
+ write_jsonl(File.join(project_dir, "#{session_id}.jsonl"), entries)
114
+
115
+ # The subprocess runs with CLAUDE_CONFIG_DIR=tmp_base; copy auth config
116
+ # so it can authenticate. Missing files are fine (API-key auth, etc.).
117
+ copy_auth_files(tmp_base, options.env)
118
+
119
+ materialize_subkeys(store, project_dir, project_key, session_id, timeout_s) if SessionStore.implements?(store, :list_subkeys)
120
+ rescue Exception # rubocop:disable Lint/RescueException
121
+ # Any failure after mkdtemp leaves tmp_base (which may already hold a
122
+ # .credentials.json copy) on disk with no path for the caller to clean
123
+ # up. Remove it before re-raising. Rescue Exception (not StandardError)
124
+ # so reactor stop/cancel also triggers cleanup.
125
+ rmtree_with_retry(tmp_base)
126
+ raise
127
+ end
128
+
129
+ MaterializedResume.new(config_dir: tmp_base, resume_session_id: session_id)
130
+ end
131
+
132
+ # -- Helpers --
133
+
134
+ # Load entries for session_id; return [session_id, entries] or nil if empty.
135
+ def load_candidate(store, project_key, session_id, timeout_s)
136
+ entries = with_timeout(timeout_s, "SessionStore#load for session #{session_id}") do
137
+ store.load('project_key' => project_key, 'session_id' => session_id)
138
+ end
139
+ return nil if entries.nil? || entries.empty?
140
+
141
+ [session_id, entries]
142
+ end
143
+
144
+ # Pick the most-recently-modified non-sidechain session. Sidechain
145
+ # transcripts are mirrored as ordinary top-level keys and often have the
146
+ # highest mtime, so walk newest->oldest and skip them so --continue resumes
147
+ # the user's conversation, not a subagent's.
148
+ def resolve_continue_candidate(store, project_key, timeout_s)
149
+ sessions = with_timeout(timeout_s, 'SessionStore#list_sessions') do
150
+ store.list_sessions(project_key)
151
+ end
152
+ return nil if sessions.nil? || sessions.empty?
153
+
154
+ sessions.sort_by { |s| -(s['mtime'] || 0) }.each do |cand|
155
+ sid = cand['session_id']
156
+ next unless sid.is_a?(String) && sid.match?(Sessions::UUID_RE)
157
+
158
+ loaded = load_candidate(store, project_key, sid, timeout_s)
159
+ next if loaded.nil?
160
+
161
+ first = loaded[1][0]
162
+ next if first.is_a?(Hash) && first['isSidechain'] == true
163
+
164
+ return loaded
165
+ end
166
+ nil
167
+ end
168
+
169
+ # Run a store call (user code) on a plain thread bounded by timeout_s,
170
+ # re-raising failures/timeouts as RuntimeError with context. The thread hop
171
+ # (FiberBoundary with a timeout always hops) both keeps the async scheduler
172
+ # out of the user's store code AND enforces load_timeout_ms unconditionally
173
+ # — including when materialization runs outside an Async reactor, where a
174
+ # direct call would let a hung adapter block connect forever. A timed-out
175
+ # worker is left running (not killed) since it may still complete.
176
+ def with_timeout(timeout_s, what, &block)
177
+ FiberBoundary.invoke(timeout: timeout_s, &block)
178
+ rescue FiberBoundary::JoinTimeout
179
+ raise "#{what} timed out after #{(timeout_s * 1000).to_i}ms during resume materialization"
180
+ rescue RuntimeError
181
+ raise
182
+ rescue StandardError => e
183
+ raise "#{what} failed during resume materialization: #{e}"
184
+ end
185
+
186
+ # Stream-write entries as one compact JSON line each (mode 0600).
187
+ def write_jsonl(path, entries)
188
+ FileUtils.mkdir_p(File.dirname(path))
189
+ File.open(path, 'w') do |f|
190
+ entries.each do |entry|
191
+ f.write(JSON.generate(entry))
192
+ f.write("\n")
193
+ end
194
+ end
195
+ chmod_owner_only(path)
196
+ end
197
+
198
+ # Copy .credentials.json (refreshToken redacted) and .claude.json from the
199
+ # caller's effective config locations so the resumed subprocess can auth.
200
+ def copy_auth_files(tmp_base, opt_env)
201
+ caller_config_dir = env_value(opt_env, 'CLAUDE_CONFIG_DIR')
202
+ source_config_dir = caller_config_dir || File.join(Dir.home, '.claude')
203
+
204
+ creds_json = read_file_if_present(File.join(source_config_dir, '.credentials.json'))
205
+
206
+ # macOS default keeps OAuth tokens in the Keychain, not a file. Redirecting
207
+ # CLAUDE_CONFIG_DIR changes the Keychain service suffix so the subprocess's
208
+ # lookup misses; populate the plaintext file from the parent's Keychain.
209
+ # Skipped when env-based auth or a custom config dir is already in play.
210
+ if caller_config_dir.nil? && env_value(opt_env, 'ANTHROPIC_API_KEY').nil? &&
211
+ env_value(opt_env, 'CLAUDE_CODE_OAUTH_TOKEN').nil?
212
+ keychain = read_keychain_credentials
213
+ creds_json = keychain unless keychain.nil?
214
+ end
215
+
216
+ write_redacted_credentials(creds_json, File.join(tmp_base, '.credentials.json'))
217
+
218
+ claude_json_src = caller_config_dir ? File.join(caller_config_dir, '.claude.json') : File.join(Dir.home, '.claude.json')
219
+ copy_if_present(claude_json_src, File.join(tmp_base, '.claude.json'))
220
+ end
221
+
222
+ # Write creds_json with claudeAiOauth.refreshToken removed. The resumed
223
+ # subprocess runs under a redirected CLAUDE_CONFIG_DIR; if it refreshed, the
224
+ # single-use refresh token would be consumed and the new tokens written
225
+ # somewhere the parent never reads — revoking the parent's creds. Stripping
226
+ # refreshToken short-circuits the subprocess's refresh check.
227
+ def write_redacted_credentials(creds_json, dst)
228
+ return if creds_json.nil?
229
+
230
+ out = creds_json
231
+ begin
232
+ data = JSON.parse(creds_json)
233
+ oauth = data.is_a?(Hash) ? data['claudeAiOauth'] : nil
234
+ if oauth.is_a?(Hash) && oauth.key?('refreshToken')
235
+ oauth.delete('refreshToken')
236
+ out = JSON.generate(data)
237
+ end
238
+ rescue JSON::ParserError
239
+ # Unparseable — write through; the subprocess will fail to parse it too.
240
+ end
241
+ File.write(dst, out)
242
+ chmod_owner_only(dst)
243
+ end
244
+
245
+ # Read OAuth credentials JSON from the macOS Keychain (default service name).
246
+ # Best-effort — returns nil on any error or non-macOS platforms.
247
+ def read_keychain_credentials
248
+ return nil unless RbConfig::CONFIG['host_os'].match?(/darwin/)
249
+
250
+ user = (ENV['USER'] && !ENV['USER'].empty? ? ENV['USER'] : nil) || begin
251
+ require 'etc'
252
+ Etc.getlogin
253
+ rescue StandardError
254
+ 'claude-code-user'
255
+ end
256
+
257
+ stdout, status = capture_with_timeout(
258
+ ['security', 'find-generic-password', '-a', user, '-w', '-s', KEYCHAIN_SERVICE_NAME],
259
+ KEYCHAIN_TIMEOUT_SECONDS
260
+ )
261
+ return nil if status.nil? || !status.success?
262
+
263
+ out = stdout.to_s.strip
264
+ out.empty? ? nil : out
265
+ rescue StandardError
266
+ nil
267
+ end
268
+
269
+ # Run a command with a hard timeout, draining stdout on a side thread and
270
+ # SIGKILL-ing on deadline (Timeout.timeout is unsafe under the fiber
271
+ # scheduler). Returns [stdout, status] or [nil, nil] on timeout/error.
272
+ def capture_with_timeout(argv, timeout_s)
273
+ stdin, stdout, stderr, wait_thr = Open3.popen3(*argv)
274
+ stdin.close
275
+ out_buf = +''
276
+ out_reader = Thread.new { out_buf << stdout.read.to_s }
277
+ err_reader = Thread.new { stderr.read }
278
+ # Closing the pipes in `ensure` while a reader is mid-read raises IOError
279
+ # in that thread; silence it (this is a best-effort credential bridge).
280
+ out_reader.report_on_exception = false
281
+ err_reader.report_on_exception = false
282
+
283
+ if wait_thr.join(timeout_s)
284
+ # The child has exited, so stdout has hit EOF: join with no short timeout
285
+ # to fully drain out_buf before returning it, avoiding a truncated /
286
+ # concurrently-mutated buffer (which would yield unparseable credentials).
287
+ out_reader.join
288
+ [out_buf, wait_thr.value]
289
+ else
290
+ begin
291
+ Process.kill('KILL', wait_thr.pid)
292
+ rescue Errno::ESRCH
293
+ nil
294
+ end
295
+ wait_thr.join
296
+ [nil, nil]
297
+ end
298
+ rescue StandardError
299
+ [nil, nil]
300
+ ensure
301
+ out_reader&.kill if out_reader&.alive?
302
+ err_reader&.kill if err_reader&.alive?
303
+ [stdout, stderr].each { |io| io&.close rescue nil } # rubocop:disable Style/RescueModifier
304
+ end
305
+
306
+ # Load and write all subagent transcripts/metadata under session_id.
307
+ def materialize_subkeys(store, project_dir, project_key, session_id, timeout_s)
308
+ session_dir = File.join(project_dir, session_id)
309
+ subkeys = with_timeout(timeout_s, "SessionStore#list_subkeys for session #{session_id}") do
310
+ store.list_subkeys('project_key' => project_key, 'session_id' => session_id)
311
+ end
312
+
313
+ Array(subkeys).each do |subpath|
314
+ # Subpaths come from an external store and become filesystem path
315
+ # components — reject anything that would escape the session directory.
316
+ unless safe_subpath?(subpath, session_dir)
317
+ warn "Claude SDK: [SessionStore] skipping unsafe subpath from list_subkeys: #{subpath.inspect}"
318
+ next
319
+ end
320
+
321
+ sub_entries = with_timeout(timeout_s, "SessionStore#load for session #{session_id} subpath #{subpath}") do
322
+ store.load('project_key' => project_key, 'session_id' => session_id, 'subpath' => subpath)
323
+ end
324
+ next if sub_entries.nil? || sub_entries.empty?
325
+
326
+ write_subagent_files(session_dir, subpath, sub_entries)
327
+ end
328
+ end
329
+
330
+ # Partition entries into transcript vs agent_metadata and write the
331
+ # <subpath>.jsonl transcript and, if present, the <subpath>.meta.json sidecar.
332
+ def write_subagent_files(session_dir, subpath, entries)
333
+ metadata, transcript = entries.partition { |e| e.is_a?(Hash) && e['type'] == 'agent_metadata' }
334
+ sub_file = File.join(session_dir, "#{subpath}.jsonl")
335
+
336
+ write_jsonl(sub_file, transcript) unless transcript.empty?
337
+
338
+ return if metadata.empty?
339
+
340
+ # Last metadata entry wins; strip the synthetic type field.
341
+ meta_content = metadata.last.except('type')
342
+ meta_file = "#{sub_file.delete_suffix('.jsonl')}.meta.json"
343
+ FileUtils.mkdir_p(File.dirname(meta_file))
344
+ File.write(meta_file, JSON.generate(meta_content))
345
+ chmod_owner_only(meta_file)
346
+ end
347
+
348
+ # Reject subpaths that are empty, absolute, drive/UNC-prefixed, contain "."
349
+ # or ".." components or a NUL byte, or escape session_dir after resolution.
350
+ def safe_subpath?(subpath, session_dir)
351
+ return false if subpath.nil? || subpath.empty?
352
+ return false if subpath.start_with?('/', '\\')
353
+ return false if subpath.match?(/\A[a-zA-Z]:/) # drive-prefixed (C:foo) / UNC
354
+ return false if subpath.split(%r{[\\/]}).any? { |part| ['.', '..'].include?(part) }
355
+ return false if subpath.include?("\u0000")
356
+
357
+ base = resolve_dir(session_dir)
358
+ # Join BEFORE expanding: expand_path on a relative first argument performs
359
+ # tilde expansion, so a store-supplied "~nosuchuser/x" would raise
360
+ # ArgumentError (and "~root/x" would resolve outside base even though the
361
+ # literal path the writer uses is contained). The joined path is absolute,
362
+ # so expand_path only normalizes it.
363
+ target = File.expand_path(File.join(base, "#{subpath}.jsonl"))
364
+ target == base || target.start_with?("#{base}#{File::SEPARATOR}")
365
+ rescue ArgumentError
366
+ false
367
+ end
368
+
369
+ def resolve_dir(dir)
370
+ File.realpath(dir)
371
+ rescue SystemCallError
372
+ File.expand_path(dir)
373
+ end
374
+
375
+ # Best-effort recursive removal with retries on transient lock errors
376
+ # (Windows AV/indexer). Never raises. The temp dir holds an access token, so
377
+ # the final sweep matters for not leaking secrets.
378
+ def rmtree_with_retry(path, retries: 4, delay: 0.1)
379
+ return unless path && File.exist?(path)
380
+
381
+ retries.times do
382
+ begin
383
+ FileUtils.remove_entry(path)
384
+ return
385
+ rescue Errno::ENOENT
386
+ return
387
+ rescue SystemCallError => e
388
+ break unless RETRYABLE_RMTREE_ERRORS.any? { |klass| e.is_a?(klass) }
389
+ end
390
+ sleep(delay)
391
+ end
392
+ FileUtils.rm_rf(path)
393
+ end
394
+
395
+ def read_file_if_present(path)
396
+ File.read(path)
397
+ rescue SystemCallError
398
+ nil
399
+ end
400
+
401
+ # Best-effort lock of a freshly-written materialized file to owner-only
402
+ # (0600); silently ignores filesystems that reject chmod. These files can
403
+ # hold a redacted .credentials.json / MCP-header secrets, so default to
404
+ # owner-only rather than inheriting the umask.
405
+ def chmod_owner_only(path)
406
+ File.chmod(0o600, path)
407
+ rescue SystemCallError
408
+ nil
409
+ end
410
+
411
+ # Copy src to dst (locked to 0600) when src exists; no-op otherwise. The
412
+ # only caller copies .claude.json, which can hold MCP-header secrets and
413
+ # customApiKeyResponses, so it gets the same owner-only mode as the other
414
+ # materialized files rather than inheriting the source's (often 0644) mode.
415
+ def copy_if_present(src, dst)
416
+ FileUtils.copy_file(src, dst)
417
+ chmod_owner_only(dst)
418
+ rescue SystemCallError
419
+ nil
420
+ end
421
+
422
+ # Resolve the value the CHILD process will see for env var +name+. Presence
423
+ # in options.env is detected by KEY: an explicit nil (or empty) value means
424
+ # the transport unsets the var for the child, so resolve to nil rather than
425
+ # falling back to the parent's environment (which the child won't inherit
426
+ # for that key). Only an absent key consults the parent ENV.
427
+ def env_value(opt_env, name)
428
+ if opt_env.respond_to?(:key?) && (opt_env.key?(name) || opt_env.key?(name.to_sym))
429
+ value = opt_env[name] || opt_env[name.to_sym]
430
+ return nil if value.nil? || (value.respond_to?(:empty?) && value.empty?)
431
+
432
+ return value
433
+ end
434
+
435
+ value = ENV.fetch(name, nil)
436
+ value && (!value.respond_to?(:empty?) || !value.empty?) ? value : nil
437
+ end
438
+
439
+ private_class_method :load_candidate, :resolve_continue_candidate, :with_timeout, :write_jsonl,
440
+ :copy_auth_files, :write_redacted_credentials, :read_keychain_credentials,
441
+ :capture_with_timeout, :materialize_subkeys, :write_subagent_files,
442
+ :resolve_dir, :read_file_if_present, :chmod_owner_only, :copy_if_present, :env_value
443
+ end
444
+ end