claude-agent-sdk 0.16.9 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,9 @@
3
3
  require 'json'
4
4
  require 'open3'
5
5
  require 'pathname'
6
+ require_relative 'session_store'
7
+ require_relative 'session_summary'
8
+ require_relative 'transcript_mirror_batcher'
6
9
 
7
10
  module ClaudeAgentSDK
8
11
  # Session info returned by list_sessions
@@ -77,6 +80,12 @@ module ClaudeAgentSDK
77
80
 
78
81
  UUID_RE = /\A[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\z/i
79
82
 
83
+ # Transcript entry types that participate in conversation reads. One shared
84
+ # constant for the disk (parse_jsonl_entries) and store
85
+ # (filter_transcript_entries) paths so the two read paths can't drift when
86
+ # the CLI adds a new entry type (mirrors Python's _TRANSCRIPT_ENTRY_TYPES).
87
+ TRANSCRIPT_ENTRY_TYPES = %w[user assistant progress system attachment].freeze
88
+
80
89
  SKIP_FIRST_PROMPT_PATTERN = %r{\A(?:<local-command-stdout>|<session-start-hook>|<tick>|<goal>|
81
90
  \[Request\ interrupted\ by\ user[^\]]*\]|
82
91
  \s*<ide_opened_file>[\s\S]*</ide_opened_file>\s*\z|
@@ -120,6 +129,30 @@ module ClaudeAgentSDK
120
129
  "#{sanitized[0, MAX_SANITIZED_LENGTH]}-#{simple_hash(name)}"
121
130
  end
122
131
 
132
+ # Resolve a directory to its canonical form (realpath + NFC), matching the
133
+ # CLI's project-directory naming. Falls back to an absolute NFC path when
134
+ # realpath can't resolve it (e.g. the directory does not exist yet) — Ruby's
135
+ # File.realpath raises on missing paths whereas Python's os.path.realpath is
136
+ # lexical for the missing suffix, so expand_path restores that behavior.
137
+ def canonicalize_path(dir)
138
+ File.realpath(dir).unicode_normalize(:nfc)
139
+ rescue SystemCallError
140
+ File.expand_path(dir).unicode_normalize(:nfc)
141
+ end
142
+
143
+ # Derive the SessionStore +project_key+ for a directory (default: cwd).
144
+ #
145
+ # Uses the same realpath + NFC normalization + djb2-hashed sanitization the
146
+ # CLI uses for project directory names, so keys match between local-disk
147
+ # transcripts and store-mirrored transcripts even on filesystems that
148
+ # decompose Unicode (macOS HFS+).
149
+ #
150
+ # @param directory [String, Pathname, nil] Directory to key (nil = cwd)
151
+ # @return [String] The project key
152
+ def project_key_for_directory(directory = nil)
153
+ sanitize_path(canonicalize_path(directory.nil? ? '.' : directory.to_s))
154
+ end
155
+
123
156
  # Get the Claude config directory
124
157
  def config_dir
125
158
  ENV.fetch('CLAUDE_CONFIG_DIR', File.expand_path('~/.claude'))
@@ -309,6 +342,12 @@ module ClaudeAgentSDK
309
342
 
310
343
  # Parse an ISO 8601 timestamp string into epoch milliseconds
311
344
  def parse_iso_timestamp_ms(timestamp_str)
345
+ # Entries are opaque external blobs: a non-String timestamp (e.g. an epoch
346
+ # integer) makes Time.iso8601 raise TypeError, which the ArgumentError
347
+ # rescue would NOT catch and which would escape callers like
348
+ # mtime_from_entries / get_session_info_from_store. Guard the type first.
349
+ return nil unless timestamp_str.is_a?(String)
350
+
312
351
  require 'time'
313
352
  (Time.iso8601(timestamp_str).to_f * 1000).to_i
314
353
  rescue ArgumentError
@@ -344,10 +383,13 @@ module ClaudeAgentSDK
344
383
  list_all_sessions
345
384
  end
346
385
 
347
- # Sort by last_modified descending, then apply offset and limit
386
+ # Sort by last_modified descending, then apply offset and limit.
387
+ # [limit, 0].max: limit <= 0 yields [] across the whole read-API family
388
+ # (a bare first(-1) would raise ArgumentError here but silently clamp on
389
+ # the store paths).
348
390
  sessions.sort_by! { |s| -s.last_modified }
349
391
  sessions = sessions[offset..] || [] if offset.positive?
350
- sessions = sessions.first(limit) if limit
392
+ sessions = sessions.first([limit, 0].max) if limit
351
393
  sessions
352
394
  end
353
395
 
@@ -395,14 +437,422 @@ module ClaudeAgentSDK
395
437
  chain = build_conversation_chain(entries)
396
438
  messages = filter_visible_messages(chain)
397
439
 
398
- # Apply offset and limit
440
+ # Apply offset and limit (limit <= 0 yields [], like every other reader)
399
441
  messages = messages[offset..] || []
400
- messages = messages.first(limit) if limit
442
+ messages = messages.first([limit, 0].max) if limit
401
443
  messages
402
444
  end
403
445
 
446
+ # ---- SessionStore-backed reads (store counterparts to the disk readers) ----
447
+
448
+ # List sessions from a SessionStore. Store-backed counterpart to
449
+ # list_sessions. Uses the store's incremental summaries (one batch call +
450
+ # gap-fill) when available, else falls back to list_sessions + one load per
451
+ # session. Sessions are derived through the same fold the disk path uses, so
452
+ # both paths agree for identical transcript content.
453
+ #
454
+ # @param session_store [SessionStore] store implementing list_session_summaries and/or list_sessions
455
+ # @return [Array<SDKSessionInfo>] sorted by last_modified descending
456
+ def list_sessions_from_store(session_store:, directory: nil, limit: nil, offset: 0)
457
+ offset ||= 0
458
+ project_path = canonicalize_path(directory.nil? ? '.' : directory.to_s)
459
+ project_key = sanitize_path(project_path)
460
+
461
+ if SessionStore.implements?(session_store, :list_session_summaries)
462
+ via = list_sessions_via_summaries(session_store, project_key, project_path, limit, offset)
463
+ return via unless via.nil?
464
+ end
465
+
466
+ unless SessionStore.implements?(session_store, :list_sessions)
467
+ raise ArgumentError,
468
+ 'session_store implements neither list_session_summaries nor list_sessions -- cannot list sessions'
469
+ end
470
+
471
+ listing = Array(session_store.list_sessions(project_key))
472
+ # Build all-placeholder slots (the shape the summaries fast path uses) and
473
+ # reuse its bounded pagination: sessions are loaded newest-first only
474
+ # until the page fills (~offset + limit + dropped), instead of one full
475
+ # transcript load per listed session before pagination — the sort key
476
+ # (the listing mtime) is known before any load.
477
+ slots = listing.filter_map do |entry|
478
+ sid = entry['session_id']
479
+ next if sid.nil?
480
+
481
+ { mtime: entry['mtime'] || 0, session_id: sid, info: nil }
482
+ end
483
+ slots.sort_by! { |slot| -slot[:mtime] }
484
+ paginate_resolving_gaps(session_store, project_key, project_path, slots, limit, offset)
485
+ end
486
+
487
+ # Read metadata for a single session from a SessionStore. Store-backed
488
+ # counterpart to get_session_info. Returns nil for an invalid UUID, an
489
+ # unknown session, a sidechain session, or one with no extractable summary.
490
+ def get_session_info_from_store(session_store:, session_id:, directory: nil)
491
+ return nil unless session_id.match?(UUID_RE)
492
+
493
+ project_path = canonicalize_path(directory.nil? ? '.' : directory.to_s)
494
+ entries = session_store.load('project_key' => sanitize_path(project_path), 'session_id' => session_id)
495
+ return nil if entries.nil? || entries.empty?
496
+
497
+ derive_info_from_entries(session_id, entries, mtime_from_entries(entries), project_path)
498
+ end
499
+
500
+ # Read a session's conversation messages from a SessionStore. Store-backed
501
+ # counterpart to get_session_messages.
502
+ def get_session_messages_from_store(session_store:, session_id:, directory: nil, limit: nil, offset: 0)
503
+ return [] unless session_id.match?(UUID_RE)
504
+
505
+ offset ||= 0
506
+ entries = session_store.load('project_key' => project_key_for_directory(directory), 'session_id' => session_id)
507
+ return [] if entries.nil? || entries.empty?
508
+
509
+ entries_to_messages(filter_transcript_entries(entries), limit, offset)
510
+ end
511
+
512
+ # List subagent IDs for a session from a SessionStore. Requires the store to
513
+ # implement list_subkeys.
514
+ def list_subagents_from_store(session_store:, session_id:, directory: nil)
515
+ return [] unless session_id.match?(UUID_RE)
516
+
517
+ unless SessionStore.implements?(session_store, :list_subkeys)
518
+ raise ArgumentError,
519
+ 'session_store does not implement list_subkeys -- cannot list subagents'
520
+ end
521
+
522
+ project_key = project_key_for_directory(directory)
523
+ subkeys = Array(session_store.list_subkeys('project_key' => project_key, 'session_id' => session_id))
524
+ seen = {}
525
+ subkeys.filter_map do |subpath|
526
+ next unless subpath.start_with?('subagents/')
527
+
528
+ last = subpath.rpartition('/').last
529
+ next unless last.start_with?('agent-')
530
+
531
+ agent_id = last.delete_prefix('agent-')
532
+ next if seen[agent_id]
533
+
534
+ seen[agent_id] = true
535
+ agent_id
536
+ end
537
+ end
538
+
539
+ # Read a subagent's conversation messages from a SessionStore. Subagents may
540
+ # live at subagents/agent-<id> or nested under
541
+ # subagents/workflows/<runId>/agent-<id>; scans subkeys to resolve the path
542
+ # when the store implements list_subkeys, else tries the direct path.
543
+ def get_subagent_messages_from_store(session_store:, session_id:, agent_id:, directory: nil, limit: nil, offset: 0)
544
+ return [] unless session_id.match?(UUID_RE)
545
+ return [] if agent_id.nil? || agent_id.empty?
546
+
547
+ project_key = project_key_for_directory(directory)
548
+ subpath = resolve_subagent_subpath(session_store, project_key, session_id, agent_id)
549
+ return [] if subpath.nil?
550
+
551
+ entries = session_store.load('project_key' => project_key, 'session_id' => session_id, 'subpath' => subpath)
552
+ return [] if entries.nil? || entries.empty?
553
+
554
+ # Drop synthetic agent_metadata entries (they describe the .meta.json
555
+ # sidecar, not transcript lines).
556
+ transcript = entries.reject { |e| e.is_a?(Hash) && e['type'] == 'agent_metadata' }
557
+ return [] if transcript.empty?
558
+
559
+ entries_to_subagent_messages(filter_transcript_entries(transcript), limit, offset)
560
+ end
561
+
562
+ # Replay a local on-disk session transcript into a SessionStore (inverse of
563
+ # resume materialization). Streams the JSONL line-by-line and appends in
564
+ # batches. Keys under the on-disk project directory name so the imported
565
+ # session is indistinguishable from a live-mirrored one and resumable via
566
+ # session_store + resume from the original cwd. Adapters should treat
567
+ # entry["uuid"] as an idempotency key so re-import is duplicate-safe.
568
+ #
569
+ # @raise [ArgumentError] if session_id is not a valid UUID
570
+ # @raise [Errno::ENOENT] if the session JSONL cannot be found
571
+ def import_session_to_store(session_id:, session_store:, directory: nil, include_subagents: true,
572
+ batch_size: TranscriptMirrorBatcher::MAX_PENDING_ENTRIES)
573
+ raise ArgumentError, "Invalid session_id: #{session_id}" unless session_id.match?(UUID_RE)
574
+
575
+ resolved = find_session_file(session_id, directory)
576
+ raise Errno::ENOENT, "Session #{session_id} not found" if resolved.nil? || !File.exist?(resolved)
577
+
578
+ # Key under the on-disk project directory name — matches
579
+ # file_path_to_session_key / TranscriptMirrorBatcher even when the resolver
580
+ # found the file via worktree fallback or a global scan.
581
+ project_key = File.basename(File.dirname(resolved))
582
+ # &.: an explicit batch_size: nil gets the default too, instead of
583
+ # crashing on nil.positive? (matches the nil-tolerant limit:/offset:
584
+ # convention across this API family).
585
+ batch_size = TranscriptMirrorBatcher::MAX_PENDING_ENTRIES unless batch_size&.positive?
586
+
587
+ append_jsonl_file_in_batches(resolved, { 'project_key' => project_key, 'session_id' => session_id },
588
+ session_store, batch_size)
589
+ return unless include_subagents
590
+
591
+ import_subagent_files(resolved, project_key, session_id, session_store, batch_size)
592
+ end
593
+
404
594
  # -- Private helpers --
405
595
 
596
+ # Summary fast-path for list_sessions_from_store. Returns the paginated
597
+ # result, or nil if the store's list_session_summaries raises
598
+ # NotImplementedError (caller falls back to the slow path). Sessions missing
599
+ # a sidecar or whose sidecar is stale (summary.mtime < the session's current
600
+ # mtime) are routed through gap-fill so the fold is recomputed from source.
601
+ def list_sessions_via_summaries(store, project_key, project_path, limit, offset)
602
+ begin
603
+ # Array(): a non-conformant store returning nil (e.g. a NULL JSONB read)
604
+ # degrades to gap-fill instead of crashing on nil.each, matching the
605
+ # defensive Array() already applied to list_sessions / list_subkeys.
606
+ summaries = Array(store.list_session_summaries(project_key))
607
+ rescue NotImplementedError
608
+ return nil
609
+ end
610
+
611
+ has_list_sessions = SessionStore.implements?(store, :list_sessions)
612
+ listing = has_list_sessions ? Array(store.list_sessions(project_key)) : []
613
+ known_mtimes = listing.to_h { |e| [e['session_id'], e['mtime']] }
614
+
615
+ slots = []
616
+ fresh = {}
617
+ summaries.each do |summary|
618
+ sid = summary['session_id']
619
+ # || 0: a non-conformant adapter's missing mtime degrades to gap-fill, not a crash.
620
+ s_mtime = summary['mtime'] || 0
621
+ if has_list_sessions
622
+ known = known_mtimes[sid]
623
+ # known.nil?: no longer listed (drop). s_mtime < known: stale sidecar (re-fold).
624
+ next if known.nil? || s_mtime < known
625
+ end
626
+ fresh[sid] = true
627
+ info = SessionSummary.summary_entry_to_sdk_info(summary, project_path)
628
+ slots << { mtime: s_mtime, info: info } unless info.nil?
629
+ end
630
+ listing.each do |e|
631
+ next if fresh[e['session_id']]
632
+
633
+ slots << { mtime: e['mtime'] || 0, session_id: e['session_id'], info: nil }
634
+ end
635
+
636
+ slots.sort_by! { |slot| -slot[:mtime] }
637
+ paginate_resolving_gaps(store, project_key, project_path, slots, limit, offset)
638
+ end
639
+
640
+ # Walk slots newest-first, resolving gap-fill placeholders (info nil) on
641
+ # demand and skipping any that resolve to sidechain / no-summary, then apply
642
+ # offset/limit to the RESOLVED results. Paginating over surviving sessions
643
+ # (not raw slots) matches the disk reader, so a placeholder that drops never
644
+ # leaves a short page; loads stay bounded to ~offset + limit + (the dropped
645
+ # placeholders encountered before the page fills), preserving the fast
646
+ # path's "don't load every session" intent.
647
+ def paginate_resolving_gaps(store, project_key, project_path, slots, limit, offset)
648
+ offset = 0 unless offset&.positive?
649
+ results = []
650
+ skipped = 0
651
+ slots.each do |slot|
652
+ # Stop once we have `limit` results. Checking before resolving avoids an
653
+ # extra gap-fill load, and treats limit <= 0 as "at most none" so limit:0
654
+ # yields [] — consistent with apply_sort_limit_offset and the disk
655
+ # readers, instead of the old limit&.positive? which ignored a 0 limit.
656
+ break if limit && results.length >= [limit, 0].max
657
+
658
+ info = slot[:info] || resolve_gap_slot(store, project_key, project_path, slot)
659
+ next if info.nil?
660
+
661
+ if skipped < offset
662
+ skipped += 1
663
+ next
664
+ end
665
+ results << info
666
+ end
667
+ results
668
+ end
669
+
670
+ # Load + fold one placeholder slot into an SDKSessionInfo, or nil when the
671
+ # session is absent / sidechain / has no extractable summary.
672
+ def resolve_gap_slot(store, project_key, project_path, slot)
673
+ sid = slot[:session_id]
674
+ return nil if sid.nil?
675
+
676
+ begin
677
+ entries = store.load('project_key' => project_key, 'session_id' => sid)
678
+ rescue StandardError => e
679
+ # One failing gap-fill load degrades to an empty-summary row (kept, with
680
+ # its mtime) rather than aborting the whole listing — matches the disk
681
+ # path's per-file rescue and the store path's degrade-the-row contract.
682
+ warn "Claude SDK: [SessionStore] gap-fill load failed for session #{sid}: #{e.message}"
683
+ return SDKSessionInfo.new(session_id: sid, summary: '', last_modified: slot[:mtime])
684
+ end
685
+ return nil if entries.nil? || entries.empty?
686
+
687
+ derive_info_from_entries(sid, entries, slot[:mtime], project_path)
688
+ end
689
+
690
+ # Fold store entries into an SDKSessionInfo, stamping the given mtime.
691
+ def derive_info_from_entries(session_id, entries, mtime, project_path)
692
+ summary = SessionSummary.fold_session_summary(nil, { 'session_id' => session_id }, entries)
693
+ summary['mtime'] = mtime
694
+ SessionSummary.summary_entry_to_sdk_info(summary, project_path)
695
+ end
696
+
697
+ # Last parseable entry timestamp (epoch ms), scanning from the tail; 0 if none.
698
+ def mtime_from_entries(entries)
699
+ entries.reverse_each do |entry|
700
+ next unless entry.is_a?(Hash) && entry['timestamp']
701
+
702
+ ms = parse_iso_timestamp_ms(entry['timestamp'])
703
+ return ms if ms
704
+ end
705
+ 0
706
+ end
707
+
708
+ def apply_sort_limit_offset(results, limit, offset)
709
+ results = results.sort_by { |s| -s.last_modified }
710
+ results = results[offset..] || [] if offset.positive?
711
+ # A non-nil limit caps the result. limit <= 0 yields [] (matching the disk
712
+ # readers' `first(limit) if limit` and entries_to_messages), and the
713
+ # `.max` keeps a negative limit from raising in Array#first.
714
+ results = results.first([limit, 0].max) if limit
715
+ results
716
+ end
717
+
718
+ def filter_transcript_entries(entries)
719
+ entries.select { |e| e.is_a?(Hash) && TRANSCRIPT_ENTRY_TYPES.include?(e['type']) && e['uuid'].is_a?(String) }
720
+ end
721
+
722
+ def entries_to_messages(entries, limit, offset)
723
+ offset ||= 0
724
+ messages = filter_visible_messages(build_conversation_chain(entries))
725
+ messages = messages[offset..] || []
726
+ messages = messages.first([limit, 0].max) if limit
727
+ messages
728
+ end
729
+
730
+ # Subagent counterpart to entries_to_messages. Subagent transcripts are
731
+ # simpler than main sessions — no compaction and no sidechains to exclude;
732
+ # every CLI-written subagent entry CARRIES isSidechain: true, so the main
733
+ # pipeline (build_conversation_chain rejects sidechain leaves and
734
+ # filter_visible_messages drops sidechain entries) would return [] for
735
+ # every real subagent transcript. Mirrors Python's
736
+ # _entries_to_subagent_messages: type-only filter, no flag rejection.
737
+ def entries_to_subagent_messages(entries, limit, offset)
738
+ offset ||= 0
739
+ messages = build_subagent_chain(entries).filter_map do |entry|
740
+ next unless %w[user assistant].include?(entry['type'])
741
+
742
+ SessionMessage.new(
743
+ type: entry['type'],
744
+ uuid: entry['uuid'],
745
+ session_id: entry['sessionId'] || entry['session_id'] || '',
746
+ message: entry['message']
747
+ )
748
+ end
749
+ messages = messages[offset..] || []
750
+ messages = messages.first([limit, 0].max) if limit
751
+ messages
752
+ end
753
+
754
+ # Find the last user/assistant entry and walk parentUuid links back to the
755
+ # root (subagent transcripts are linear). Mirrors Python's
756
+ # _build_subagent_chain.
757
+ def build_subagent_chain(entries)
758
+ return [] if entries.empty?
759
+
760
+ by_uuid = entries.to_h { |e| [e['uuid'], e] }
761
+ leaf = entries.reverse_each.find { |e| %w[user assistant].include?(e['type']) }
762
+ leaf ? walk_to_root(by_uuid, leaf) : []
763
+ end
764
+
765
+ # Find the subpath for a subagent, scanning subkeys (subagents may be nested
766
+ # under subagents/workflows/<runId>/agent-<id>) when list_subkeys is
767
+ # available, else falling back to the direct subagents/agent-<id> path.
768
+ def resolve_subagent_subpath(store, project_key, session_id, agent_id)
769
+ return "subagents/agent-#{agent_id}" unless SessionStore.implements?(store, :list_subkeys)
770
+
771
+ target = "agent-#{agent_id}"
772
+ matches = Array(store.list_subkeys('project_key' => project_key, 'session_id' => session_id))
773
+ .select { |sk| sk.start_with?('subagents/') && sk.rpartition('/').last == target }
774
+ # Several subpaths can share a trailing agent-<id> (a top-level agent and a
775
+ # nested subagents/workflows/<run>/agent-<id>). Prefer the canonical
776
+ # top-level path, else pick deterministically (shortest, then lexical) so
777
+ # the result never depends on the store's list_subkeys ordering.
778
+ return "subagents/#{target}" if matches.include?("subagents/#{target}")
779
+
780
+ matches.min_by { |sk| [sk.length, sk] }
781
+ end
782
+
783
+ # Import subagent transcripts (and their .meta.json sidecars) under
784
+ # <projectDir>/<sessionId>/subagents/**. The on-disk .jsonl lacks
785
+ # agent_metadata entries (those are sent only to live mirrors); re-inject
786
+ # the sidecar as an agent_metadata entry so resume can recreate it.
787
+ def import_subagent_files(resolved, project_key, session_id, store, batch_size)
788
+ session_dir = resolved.delete_suffix('.jsonl')
789
+ collect_jsonl_files(File.join(session_dir, 'subagents')).each do |file_path|
790
+ rel = file_path.delete_prefix("#{session_dir}#{File::SEPARATOR}")
791
+ subpath = rel.delete_suffix('.jsonl').split(File::SEPARATOR).join('/')
792
+ sub_key = { 'project_key' => project_key, 'session_id' => session_id, 'subpath' => subpath }
793
+ append_jsonl_file_in_batches(file_path, sub_key, store, batch_size)
794
+
795
+ meta_text = begin
796
+ File.read("#{file_path.delete_suffix('.jsonl')}.meta.json", encoding: 'UTF-8')
797
+ rescue Errno::ENOENT
798
+ nil
799
+ end
800
+ next if meta_text.nil?
801
+
802
+ meta = JSON.parse(meta_text)
803
+ # Synthetic 'agent_metadata' marker must always win so a future meta key
804
+ # named 'type' can't reclassify the sidecar as a transcript line on resume.
805
+ store.append(sub_key, [meta.merge('type' => 'agent_metadata')]) if meta.is_a?(Hash)
806
+ end
807
+ end
808
+
809
+ def append_jsonl_file_in_batches(file_path, key, store, batch_size)
810
+ batch = []
811
+ nbytes = 0
812
+ # encoding: transcripts are UTF-8 regardless of locale; without it a
813
+ # LANG=C process raises Encoding::InvalidByteSequenceError on the first
814
+ # multibyte line, aborting the import mid-way (Python pins utf-8 here).
815
+ File.foreach(file_path, encoding: 'UTF-8') do |line|
816
+ line = line.chomp
817
+ next if line.empty?
818
+
819
+ batch << JSON.parse(line)
820
+ nbytes += line.bytesize
821
+ next unless batch.length >= batch_size || nbytes >= TranscriptMirrorBatcher::MAX_PENDING_BYTES
822
+
823
+ store.append(key, batch)
824
+ batch = []
825
+ nbytes = 0
826
+ end
827
+ store.append(key, batch) unless batch.empty?
828
+ end
829
+
830
+ # Recursively collect *.jsonl paths under base_dir, sorted per directory for
831
+ # deterministic import order. Empty when base_dir is absent or unreadable —
832
+ # SystemCallError (Ruby's Errno umbrella, the analog of Python's OSError
833
+ # guard here) must not abort the import after the main transcript was
834
+ # already appended.
835
+ def collect_jsonl_files(base_dir)
836
+ return [] unless File.directory?(base_dir)
837
+
838
+ begin
839
+ children = Dir.children(base_dir).sort
840
+ rescue SystemCallError
841
+ return []
842
+ end
843
+
844
+ children.flat_map do |name|
845
+ path = File.join(base_dir, name)
846
+ if File.directory?(path)
847
+ collect_jsonl_files(path)
848
+ elsif File.file?(path) && name.end_with?('.jsonl')
849
+ [path]
850
+ else
851
+ []
852
+ end
853
+ end
854
+ end
855
+
406
856
  def get_session_info_for_directory(file_name, directory)
407
857
  canonical = File.realpath(directory).unicode_normalize(:nfc)
408
858
  project_dir = find_project_dir(canonical)
@@ -560,12 +1010,11 @@ module ClaudeAgentSDK
560
1010
 
561
1011
  def parse_jsonl_entries(file_path)
562
1012
  entries = []
563
- valid_types = %w[user assistant progress system attachment].freeze
564
1013
 
565
1014
  File.foreach(file_path) do |line|
566
1015
  entry = JSON.parse(line.strip, symbolize_names: false)
567
1016
  next unless entry.is_a?(Hash)
568
- next unless valid_types.include?(entry['type'])
1017
+ next unless TRANSCRIPT_ENTRY_TYPES.include?(entry['type'])
569
1018
  next unless entry['uuid'].is_a?(String)
570
1019
 
571
1020
  entries << entry
@@ -667,7 +1116,11 @@ module ClaudeAgentSDK
667
1116
  :find_session_file, :parse_jsonl_entries,
668
1117
  :build_conversation_chain, :walk_to_leaf, :walk_to_root,
669
1118
  :filter_visible_messages, :read_head_tail, :build_session_info,
670
- :parse_iso_timestamp_ms
1119
+ :list_sessions_via_summaries, :paginate_resolving_gaps, :resolve_gap_slot,
1120
+ :derive_info_from_entries, :mtime_from_entries, :apply_sort_limit_offset,
1121
+ :filter_transcript_entries, :entries_to_messages,
1122
+ :entries_to_subagent_messages, :build_subagent_chain, :resolve_subagent_subpath,
1123
+ :import_subagent_files, :append_jsonl_file_in_batches, :collect_jsonl_files
671
1124
 
672
1125
  # These remain accessible for SessionMutations:
673
1126
  # config_dir, sanitize_path, find_project_dir, detect_worktrees
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'json'
4
4
  require 'open3'
5
+ require 'set'
5
6
  require 'timeout'
6
7
  require_relative 'transport'
7
8
  require_relative 'errors'
@@ -15,6 +16,75 @@ module ClaudeAgentSDK
15
16
  MINIMUM_CLAUDE_CODE_VERSION = '2.0.0'
16
17
  RECENT_STDERR_LINES_LIMIT = 20
17
18
 
19
+ # Track live CLI subprocesses so we can terminate them when the parent Ruby
20
+ # process exits. Mirrors the Python (PR #916, a `set[Process]`) and
21
+ # TypeScript SDKs' parent-exit cleanup, preventing orphaned `claude`
22
+ # processes from leaking when callers crash or exit before reaching #close.
23
+ # A Set keyed by object identity (like Python's set) keeps the hot path
24
+ # off `#pid` — only #kill_active_processes touches `#pid`/`#alive?`, at exit.
25
+ # Guarded by a mutex because #close can run on a FiberBoundary worker thread
26
+ # while #connect runs on the reactor fiber.
27
+ # Stored in CONSTANTS (not class instance variables) so the registry is a
28
+ # single shared instance across this class and any subclass: constants
29
+ # resolve through the ancestor chain, whereas class ivars are NOT inherited
30
+ # — a `SubprocessCLITransport` subclass instance calling
31
+ # `self.class.register_active_process` would otherwise reach a nil mutex and
32
+ # raise mid-#connect, orphaning the just-spawned child. The base-class
33
+ # at_exit handler must be able to see every subprocess, a subclass's too.
34
+ ACTIVE_PROCESSES = Set.new
35
+ ACTIVE_PROCESSES_MUTEX = Mutex.new
36
+
37
+ class << self
38
+ # Public readers (the test suite uses `described_class.active_processes`);
39
+ # they return the shared constants so subclasses observe the same objects.
40
+ def active_processes
41
+ ACTIVE_PROCESSES
42
+ end
43
+
44
+ def active_processes_mutex
45
+ ACTIVE_PROCESSES_MUTEX
46
+ end
47
+
48
+ # +wait_thr+ is the Process::Waiter returned by Open3.popen3.
49
+ def register_active_process(wait_thr)
50
+ return unless wait_thr
51
+
52
+ active_processes_mutex.synchronize { active_processes.add(wait_thr) }
53
+ end
54
+
55
+ def deregister_active_process(wait_thr)
56
+ return unless wait_thr
57
+
58
+ active_processes_mutex.synchronize { active_processes.delete(wait_thr) }
59
+ end
60
+
61
+ # Best-effort SIGTERM to every still-running child. Registered with
62
+ # at_exit at the bottom of this file. Never reaps (a blocking wait could
63
+ # hang interpreter shutdown) — the OS reparents and reaps orphans.
64
+ #
65
+ # Deliberately does NOT take active_processes_mutex: at interpreter
66
+ # shutdown Ruby runs at_exit handlers *before* terminating other threads,
67
+ # and Mutex is unfair, so blocking here while a still-live worker churns
68
+ # register/deregister can starve this handler and hang the process. A
69
+ # lock-free read is safe — a torn snapshot at worst misses or repeats a
70
+ # SIGTERM, both harmless. The outer rescue guarantees the handler never
71
+ # raises (e.g. ThreadError if reached from a trap context, or a
72
+ # concurrent-modification error from the unlocked read), honoring the
73
+ # "never interrupt interpreter shutdown" contract.
74
+ def kill_active_processes
75
+ active_processes.to_a.each do |wait_thr|
76
+ next unless wait_thr.alive?
77
+
78
+ Process.kill('TERM', wait_thr.pid)
79
+ rescue StandardError
80
+ # Process already gone (Errno::ESRCH), not permitted, or invalid pid.
81
+ end
82
+ active_processes.clear
83
+ rescue StandardError
84
+ # Never let cleanup interfere with interpreter shutdown.
85
+ end
86
+ end
87
+
18
88
  def initialize(options_or_prompt = nil, options = nil)
19
89
  # Support both new single-arg form and legacy two-arg form
20
90
  @options = options.nil? ? options_or_prompt : options
@@ -104,6 +174,7 @@ module ClaudeAgentSDK
104
174
  opts = { chdir: @cwd&.to_s }.compact
105
175
 
106
176
  @stdin, @stdout, @stderr, @process = Open3.popen3(process_env, *cmd, opts)
177
+ self.class.register_active_process(@process)
107
178
 
108
179
  # Always drain stderr to prevent pipe buffer deadlock.
109
180
  # Without this, --verbose output fills the OS pipe buffer (~64KB),
@@ -273,6 +344,7 @@ module ClaudeAgentSDK
273
344
  warn "Claude SDK: Cleanup warnings: #{cleanup_errors.join(', ')}"
274
345
  end
275
346
 
347
+ self.class.deregister_active_process(@process)
276
348
  @process = nil
277
349
  @stdout = nil
278
350
  # @stdin already nilled under the mutex above.
@@ -403,6 +475,13 @@ module ClaudeAgentSDK
403
475
  returncode = nil
404
476
  end
405
477
 
478
+ # The child has exited and been reaped; drop it from the parent-exit
479
+ # registry now rather than waiting for #close, which a caller may never
480
+ # reach (e.g. a Client abandoned without #disconnect, or direct transport
481
+ # use). Idempotent — #close's own deregister becomes a harmless no-op, and
482
+ # #close still sees @process (left set here) for its termination logic.
483
+ self.class.deregister_active_process(@process)
484
+
406
485
  if returncode && returncode != 0
407
486
  # Wait briefly for stderr thread to finish draining
408
487
  @stderr_task&.join(1)
@@ -457,3 +536,8 @@ module ClaudeAgentSDK
457
536
  end
458
537
  end
459
538
  end
539
+
540
+ # Terminate any CLI subprocess still live when the parent Ruby process exits.
541
+ # Registered once at require time (require is idempotent). Best-effort: the
542
+ # handler swallows all errors so it never interferes with interpreter shutdown.
543
+ at_exit { ClaudeAgentSDK::SubprocessCLITransport.kill_active_processes }