dspy 0.34.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01f38786c88d525a1031cf41931f578c3d2dcbfa29ee6a8dac1a381cafe47edf
4
- data.tar.gz: 6334bfb483b3011fa91e163f688127be763a126ea7cd0edc44f07b0557dc2a30
3
+ metadata.gz: 85ae2299dbddc20bfd710c68e9e8dfbeeb201b742e3fd18b768669ddc4443261
4
+ data.tar.gz: ba0ee2d5f637f499448e456fb58f0513aa58b5b258e754bf1b32839ea8c49b63
5
5
  SHA512:
6
- metadata.gz: 744087dd87e936b247d194539407f2a74b29d5e6a28b4ba872c4aa0ef77103c4a6957c97b6bed3ee7e8ef899824f3e6e0f40c2b429c47312aa10924bb1fbca3c
7
- data.tar.gz: 4e343687e84570d199ce9c7695d19d0a0a551cac66693fda131fe03268d3907e2d20f4648530d1e6a5de0a73092b03f3ec7bcec877d9c23662332193aaee0e31
6
+ metadata.gz: 310de5ce11b23d29bd168069eae4f5ce3ac154ba91974c96b9d70e5d02a0dcb45122dc4c83f80097a409ff448a0a30d45ebec3ff0883d80b2cdd8f1215d2dfff
7
+ data.tar.gz: 1ce5ff1bfe900bcedabab28efba1e8352fed81c6cbf458d5b7c54e8f7eea29fa6b7ee33a62dcc73b456ede6181a85bbf055541e4fb70037ac4de05e3be2b8ce9
data/README.md CHANGED
@@ -9,6 +9,7 @@
9
9
  **Build reliable LLM applications in idiomatic Ruby using composable, type-safe modules.**
10
10
 
11
11
  DSPy.rb is the Ruby port of Stanford's [DSPy](https://dspy.ai). Instead of wrestling with brittle prompt strings, you define typed signatures and let the framework handle the rest. Prompts become functions. LLM calls become predictable.
12
+ The `1.x` line is the stable release track for production Ruby LLM applications.
12
13
 
13
14
  ```ruby
14
15
  require 'dspy'
@@ -137,26 +138,18 @@ result.answer # => "60 km/h"
137
138
  Build agents that use tools to accomplish tasks:
138
139
 
139
140
  ```ruby
140
- class SearchTool < DSPy::Tools::Tool
141
+ class SearchTool < DSPy::Tools::Base
141
142
  tool_name "search"
142
- description "Search for information"
143
-
144
- input do
145
- const :query, String
146
- end
147
-
148
- output do
149
- const :results, T::Array[String]
150
- end
143
+ tool_description "Search for information"
151
144
 
145
+ sig { params(query: String).returns(String) }
152
146
  def call(query:)
153
147
  # Your search implementation
154
- { results: ["Result 1", "Result 2"] }
148
+ "Result 1, Result 2"
155
149
  end
156
150
  end
157
151
 
158
- toolset = DSPy::Tools::Toolset.new(tools: [SearchTool.new])
159
- agent = DSPy::ReAct.new(signature: ResearchTask, tools: toolset, max_iterations: 5)
152
+ agent = DSPy::ReAct.new(ResearchTask, tools: [SearchTool.new], max_iterations: 5)
160
153
  result = agent.call(question: "What's the latest on Ruby 3.4?")
161
154
  ```
162
155
 
@@ -185,8 +178,8 @@ result = agent.call(question: "What's the latest on Ruby 3.4?")
185
178
  A [Claude Skill](https://github.com/vicentereig/dspy-rb-skill) is available to help you build DSPy.rb applications:
186
179
 
187
180
  ```bash
188
- # Claude Code
189
- git clone https://github.com/vicentereig/dspy-rb-skill ~/.claude/skills/dspy-rb
181
+ # Claude Code — install from the vicentereig/engineering marketplace
182
+ claude install-skill vicentereig/engineering --skill dspy-rb
190
183
  ```
191
184
 
192
185
  For Claude.ai Pro/Max, download the [skill ZIP](https://github.com/vicentereig/dspy-rb-skill/archive/refs/heads/main.zip) and upload via Settings > Skills.
@@ -201,7 +194,7 @@ The [examples/](examples/) directory has runnable code for common patterns:
201
194
  - Prompt optimization
202
195
 
203
196
  ```bash
204
- bundle exec ruby examples/first_predictor.rb
197
+ bundle exec ruby examples/basic_search_agent.rb
205
198
  ```
206
199
 
207
200
  ## Optional Gems
data/lib/dspy/context.rb CHANGED
@@ -74,8 +74,9 @@ module DSPy
74
74
  # Prepare attributes and add trace name for root spans
75
75
  span_attributes = sanitized_attributes.transform_keys(&:to_s).reject { |k, v| v.nil? }
76
76
 
77
- # Set trace name if this is likely a root span (no parent in our stack)
78
- if current[:span_stack].length == 1 # This will be the first span
77
+ # Set trace name if this is likely a root span (no parent in our stack),
78
+ # unless callers already specified one explicitly.
79
+ if current[:span_stack].length == 1 && !span_attributes.key?('langfuse.trace.name')
79
80
  span_attributes['langfuse.trace.name'] = operation
80
81
  end
81
82
 
@@ -84,6 +85,12 @@ module DSPy
84
85
 
85
86
  # Get parent OpenTelemetry span for proper context propagation
86
87
  parent_otel_span = current[:otel_span_stack].last
88
+ if !parent_otel_span && defined?(OpenTelemetry::Trace)
89
+ current_span = OpenTelemetry::Trace.current_span
90
+ if current_span && current_span != OpenTelemetry::Trace::Span::INVALID
91
+ parent_otel_span = current_span
92
+ end
93
+ end
87
94
 
88
95
  # Create span with proper parent context
89
96
  if parent_otel_span
@@ -96,20 +103,18 @@ module DSPy
96
103
  ) do |span|
97
104
  # Add to our OpenTelemetry span stack
98
105
  current[:otel_span_stack].push(span)
106
+ succeeded = false
99
107
 
100
108
  begin
101
109
  result = yield(span)
102
-
103
- # Add explicit timing information to help Langfuse
104
- if span
105
- duration_ms = ((Time.now - otel_start_time) * 1000).round(3)
106
- span.set_attribute('duration.ms', duration_ms)
107
- span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
108
- span.set_attribute('langfuse.observation.endTime', Time.now.iso8601(3))
109
- end
110
-
110
+ succeeded = true
111
111
  result
112
+ rescue StandardError => e
113
+ set_span_error_attributes(span, e)
114
+ raise
112
115
  ensure
116
+ set_span_status_attribute(span, succeeded)
117
+ set_span_timing_attributes(span, otel_start_time)
113
118
  # Remove from our OpenTelemetry span stack
114
119
  current[:otel_span_stack].pop
115
120
  end
@@ -124,20 +129,18 @@ module DSPy
124
129
  ) do |span|
125
130
  # Add to our OpenTelemetry span stack
126
131
  current[:otel_span_stack].push(span)
132
+ succeeded = false
127
133
 
128
134
  begin
129
135
  result = yield(span)
130
-
131
- # Add explicit timing information to help Langfuse
132
- if span
133
- duration_ms = ((Time.now - otel_start_time) * 1000).round(3)
134
- span.set_attribute('duration.ms', duration_ms)
135
- span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
136
- span.set_attribute('langfuse.observation.endTime', Time.now.iso8601(3))
137
- end
138
-
136
+ succeeded = true
139
137
  result
138
+ rescue StandardError => e
139
+ set_span_error_attributes(span, e)
140
+ raise
140
141
  ensure
142
+ set_span_status_attribute(span, succeeded)
143
+ set_span_timing_attributes(span, otel_start_time)
141
144
  # Remove from our OpenTelemetry span stack
142
145
  current[:otel_span_stack].pop
143
146
  end
@@ -296,6 +299,36 @@ module DSPy
296
299
  label: explicit_label || (module_instance.respond_to?(:module_scope_label) ? module_instance.module_scope_label : nil)
297
300
  }
298
301
  end
302
+
303
+ def set_span_timing_attributes(span, otel_start_time)
304
+ return unless span
305
+
306
+ now = Time.now
307
+ duration_ms = ((now - otel_start_time) * 1000).round(3)
308
+ span.set_attribute('duration.ms', duration_ms)
309
+ span.set_attribute('langfuse.observation.startTime', otel_start_time.iso8601(3))
310
+ span.set_attribute('langfuse.observation.endTime', now.iso8601(3))
311
+ rescue StandardError
312
+ nil
313
+ end
314
+
315
+ def set_span_error_attributes(span, error)
316
+ return unless span
317
+
318
+ span.set_attribute('error', true)
319
+ span.set_attribute('error.type', error.class.name)
320
+ span.set_attribute('error.message', error.message.to_s[0, 2000]) if error.message
321
+ rescue StandardError
322
+ nil
323
+ end
324
+
325
+ def set_span_status_attribute(span, succeeded)
326
+ return unless span
327
+
328
+ span.set_attribute('dspy.status', succeeded ? 'completed' : 'error')
329
+ rescue StandardError
330
+ nil
331
+ end
299
332
  end
300
333
  end
301
334
  end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'base64'
4
+ require 'stringio'
5
+ require 'uri'
6
+
7
+ module DSPy
8
+ class Document
9
+ class RubyLLMInlineAttachment < StringIO
10
+ attr_reader :path
11
+
12
+ def initialize(content, path:)
13
+ super(content)
14
+ @path = path
15
+ binmode
16
+ end
17
+ end
18
+
19
+ private_constant :RubyLLMInlineAttachment
20
+
21
+ attr_reader :url, :base64, :data, :content_type
22
+
23
+ SUPPORTED_FORMATS = %w[application/pdf].freeze
24
+ MAX_SIZE_BYTES = 32 * 1024 * 1024 # 32MB limit
25
+
26
+ def initialize(url: nil, base64: nil, data: nil, content_type: nil)
27
+ validate_input!(url, base64, data)
28
+
29
+ if url
30
+ @url = url
31
+ @content_type = content_type || infer_content_type_from_url(url)
32
+ elsif base64
33
+ raise ArgumentError, "content_type is required when using base64" unless content_type
34
+
35
+ @base64 = base64
36
+ @content_type = content_type
37
+ validate_size!(Base64.decode64(base64).bytesize)
38
+ elsif data
39
+ raise ArgumentError, "content_type is required when using data" unless content_type
40
+
41
+ @data = data
42
+ @content_type = content_type
43
+ validate_size!(data.size)
44
+ end
45
+
46
+ validate_content_type!
47
+ end
48
+
49
+ def to_openai_format
50
+ raise DSPy::LM::IncompatibleDocumentFeatureError,
51
+ "OpenAI document inputs are not supported in this release. Use Anthropic directly or Anthropic via RubyLLM."
52
+ end
53
+
54
+ def to_anthropic_format
55
+ if url
56
+ {
57
+ type: 'document',
58
+ source: {
59
+ type: 'url',
60
+ url: url
61
+ }
62
+ }
63
+ else
64
+ {
65
+ type: 'document',
66
+ source: {
67
+ type: 'base64',
68
+ media_type: content_type,
69
+ data: to_base64
70
+ }
71
+ }
72
+ end
73
+ end
74
+
75
+ def to_gemini_format
76
+ raise DSPy::LM::IncompatibleDocumentFeatureError,
77
+ "Gemini document inputs are not supported in this release. Use Anthropic directly or Anthropic via RubyLLM."
78
+ end
79
+
80
+ def to_ruby_llm_attachment
81
+ if url
82
+ url
83
+ else
84
+ RubyLLMInlineAttachment.new(to_binary, path: 'document.pdf')
85
+ end
86
+ end
87
+
88
+ def to_base64
89
+ return base64 if base64
90
+ return Base64.strict_encode64(data.pack('C*')) if data
91
+
92
+ nil
93
+ end
94
+
95
+ def validate_for_provider!(provider)
96
+ case provider
97
+ when 'anthropic'
98
+ true
99
+ when 'openai'
100
+ raise DSPy::LM::IncompatibleDocumentFeatureError,
101
+ "OpenAI document inputs are not supported in this release. Use Anthropic directly or Anthropic via RubyLLM."
102
+ when 'gemini'
103
+ raise DSPy::LM::IncompatibleDocumentFeatureError,
104
+ "Gemini document inputs are not supported in this release. Use Anthropic directly or Anthropic via RubyLLM."
105
+ else
106
+ raise DSPy::LM::IncompatibleDocumentFeatureError,
107
+ "Unknown provider '#{provider}'. Document inputs are currently supported only for Anthropic."
108
+ end
109
+ end
110
+
111
+ private
112
+
113
+ def validate_input!(url, base64, data)
114
+ inputs = [url, base64, data].compact
115
+
116
+ if inputs.empty?
117
+ raise ArgumentError, "Must provide either url, base64, or data"
118
+ elsif inputs.size > 1
119
+ raise ArgumentError, "Only one of url, base64, or data can be provided"
120
+ end
121
+ end
122
+
123
+ def validate_content_type!
124
+ unless SUPPORTED_FORMATS.include?(content_type)
125
+ raise ArgumentError, "Unsupported document format: #{content_type}. Supported formats: #{SUPPORTED_FORMATS.join(', ')}"
126
+ end
127
+ end
128
+
129
+ def validate_size!(size_bytes)
130
+ if size_bytes > MAX_SIZE_BYTES
131
+ raise ArgumentError, "Document size exceeds 32MB limit (got #{size_bytes} bytes)"
132
+ end
133
+ end
134
+
135
+ def infer_content_type_from_url(url)
136
+ extension = File.extname(URI.parse(url).path).downcase
137
+
138
+ case extension
139
+ when '.pdf'
140
+ 'application/pdf'
141
+ else
142
+ raise ArgumentError, "Document URL must point to a PDF (.pdf): #{url}"
143
+ end
144
+ end
145
+
146
+ def to_binary
147
+ return Base64.decode64(base64) if base64
148
+ return data.pack('C*') if data
149
+
150
+ raise ArgumentError, "Document has no binary content"
151
+ end
152
+ end
153
+ end
@@ -58,6 +58,17 @@ module DSPy
58
58
  end
59
59
  end
60
60
 
61
+ def contains_documents?(messages)
62
+ messages.any? do |msg|
63
+ content = msg[:content] || msg.content
64
+ content.is_a?(Array) && content.any? { |item| item[:type] == 'document' }
65
+ end
66
+ end
67
+
68
+ def contains_media?(messages)
69
+ contains_images?(messages) || contains_documents?(messages)
70
+ end
71
+
61
72
  # Format multimodal messages for a specific provider
62
73
  # @param messages [Array<Hash>] Array of message hashes
63
74
  # @param provider_name [String] Provider name for image validation and formatting
@@ -71,6 +82,8 @@ module DSPy
71
82
  { type: 'text', text: item[:text] }
72
83
  when 'image'
73
84
  format_image_for_provider(item[:image], provider_name)
85
+ when 'document'
86
+ format_document_for_provider(item[:document], provider_name)
74
87
  else
75
88
  item
76
89
  end
@@ -96,6 +109,16 @@ module DSPy
96
109
  { type: 'image', image: image }
97
110
  end
98
111
  end
112
+
113
+ def format_document_for_provider(document, provider_name)
114
+ document.validate_for_provider!(provider_name)
115
+ format_method = "to_#{provider_name}_format"
116
+ if document.respond_to?(format_method)
117
+ document.send(format_method)
118
+ else
119
+ { type: 'document', document: document }
120
+ end
121
+ end
99
122
  end
100
123
  end
101
124
  end
@@ -7,8 +7,6 @@ module DSPy
7
7
  class UnsupportedProviderError < Error; end
8
8
  class ConfigurationError < Error; end
9
9
  class MissingAdapterError < Error; end
10
- class UnsupportedVersionError < Error; end
11
- class MissingOfficialSDKError < Error; end
12
10
 
13
11
  # Raised when API key is missing or invalid
14
12
  class MissingAPIKeyError < Error
@@ -29,5 +27,12 @@ module DSPy
29
27
  super(message)
30
28
  end
31
29
  end
30
+
31
+ # Raised when document features are incompatible with the target provider
32
+ class IncompatibleDocumentFeatureError < AdapterError
33
+ def initialize(message)
34
+ super(message)
35
+ end
36
+ end
32
37
  end
33
38
  end
@@ -38,17 +38,8 @@ module DSPy
38
38
  # OpenAI/Ollama: try to extract JSON from various formats
39
39
  extract_json_from_content(response.content)
40
40
  elsif adapter_class_name.include?('AnthropicAdapter')
41
- # Anthropic: try tool use first if structured_outputs enabled, else use content extraction
42
- structured_outputs_enabled = adapter.instance_variable_get(:@structured_outputs_enabled)
43
- structured_outputs_enabled = true if structured_outputs_enabled.nil? # Default to true
44
-
45
- if structured_outputs_enabled
46
- extracted = extract_anthropic_tool_json(response)
47
- extracted || extract_json_from_content(response.content)
48
- else
49
- # Skip tool extraction, use enhanced prompting extraction
50
- extract_json_from_content(response.content)
51
- end
41
+ # Anthropic: Beta API returns JSON in content, same as OpenAI/Gemini
42
+ extract_json_from_content(response.content)
52
43
  elsif adapter_class_name.include?('GeminiAdapter')
53
44
  # Gemini: try to extract JSON from various formats
54
45
  extract_json_from_content(response.content)
@@ -90,25 +81,30 @@ module DSPy
90
81
  # Anthropic preparation
91
82
  sig { params(messages: T::Array[T::Hash[Symbol, T.untyped]], request_params: T::Hash[Symbol, T.untyped]).void }
92
83
  def prepare_anthropic_request(messages, request_params)
93
- # Only use tool-based extraction if structured_outputs is enabled (default: true)
94
- structured_outputs_enabled = adapter.instance_variable_get(:@structured_outputs_enabled)
84
+ begin
85
+ require "dspy/anthropic/lm/schema_converter"
86
+ rescue LoadError
87
+ msg = <<~MSG
88
+ Anthropic adapter is optional; structured output helpers will be unavailable until the gem is installed.
89
+ Add `gem 'dspy-anthropic'` to your Gemfile and run `bundle install`.
90
+ MSG
91
+ raise DSPy::LM::MissingAdapterError, msg
92
+ end
95
93
 
96
- # Default to true if not set (backward compatibility)
94
+ # Only use Beta API structured outputs if enabled (default: true)
95
+ structured_outputs_enabled = adapter.instance_variable_get(:@structured_outputs_enabled)
97
96
  structured_outputs_enabled = true if structured_outputs_enabled.nil?
98
97
 
99
98
  return unless structured_outputs_enabled
100
99
 
101
- # Convert signature to tool schema
102
- tool_schema = convert_to_anthropic_tool_schema
103
-
104
- # Add tool definition
105
- request_params[:tools] = [tool_schema]
100
+ # Use Anthropic Beta API structured outputs
101
+ schema = DSPy::Anthropic::LM::SchemaConverter.to_beta_format(signature_class)
106
102
 
107
- # Force tool use
108
- request_params[:tool_choice] = {
109
- type: "tool",
110
- name: "json_output"
111
- }
103
+ request_params[:output_format] = ::Anthropic::Models::Beta::BetaJSONOutputFormat.new(
104
+ type: :json_schema,
105
+ schema: schema
106
+ )
107
+ request_params[:betas] = ["structured-outputs-2025-11-13"]
112
108
  end
113
109
 
114
110
  # Gemini preparation
@@ -135,84 +131,6 @@ module DSPy
135
131
  end
136
132
  end
137
133
 
138
- # Convert signature to Anthropic tool schema
139
- # Uses strict: true for constrained decoding (Anthropic structured outputs)
140
- # Anthropic strict mode requires ALL properties in required at every level.
141
- sig { returns(T::Hash[Symbol, T.untyped]) }
142
- def convert_to_anthropic_tool_schema
143
- output_fields = signature_class.output_field_descriptors
144
-
145
- schema = {
146
- name: "json_output",
147
- description: "Output the result in the required JSON format",
148
- strict: true,
149
- input_schema: {
150
- type: "object",
151
- properties: build_properties_from_fields(output_fields),
152
- required: build_required_from_fields(output_fields),
153
- additionalProperties: false
154
- }
155
- }
156
-
157
- # Anthropic strict mode: ALL properties must be in required at every level.
158
- # Non-required properties get auto-wrapped in null unions by the grammar compiler,
159
- # which counts against the 16-union-parameter limit.
160
- enforce_all_required(schema[:input_schema])
161
-
162
- schema
163
- end
164
-
165
- # Build required field list, excluding fields that have defaults
166
- sig { params(fields: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
167
- def build_required_from_fields(fields)
168
- fields.reject { |_name, descriptor| descriptor.has_default }.keys.map(&:to_s)
169
- end
170
-
171
- # Recursively enforce that all properties are in required and
172
- # additionalProperties is false, as required by Anthropic strict mode.
173
- sig { params(schema: T::Hash[Symbol, T.untyped]).void }
174
- def enforce_all_required(schema)
175
- return unless schema.is_a?(Hash)
176
-
177
- if schema[:type] == "object" && schema[:properties]
178
- schema[:required] = schema[:properties].keys.map(&:to_s)
179
- schema[:additionalProperties] = false
180
- schema[:properties].each_value { |v| enforce_all_required(v) }
181
- elsif schema[:type] == "array" && schema[:items]
182
- enforce_all_required(schema[:items])
183
- elsif schema[:type].is_a?(Array)
184
- # type: ["array", "null"] — check items if present
185
- enforce_all_required(schema[:items]) if schema[:items]
186
- end
187
- end
188
-
189
- # Build JSON schema properties from output fields
190
- sig { params(fields: T::Hash[Symbol, T.untyped]).returns(T::Hash[String, T.untyped]) }
191
- def build_properties_from_fields(fields)
192
- properties = {}
193
- fields.each do |field_name, descriptor|
194
- properties[field_name.to_s] = DSPy::TypeSystem::SorbetJsonSchema.type_to_json_schema(descriptor.type)
195
- end
196
- properties
197
- end
198
-
199
- # Extract JSON from Anthropic tool use response
200
- sig { params(response: DSPy::LM::Response).returns(T.nilable(String)) }
201
- def extract_anthropic_tool_json(response)
202
- # Check for tool calls in metadata
203
- if response.metadata.respond_to?(:tool_calls) && response.metadata.tool_calls
204
- tool_calls = response.metadata.tool_calls
205
- if tool_calls.is_a?(Array) && !tool_calls.empty?
206
- first_call = tool_calls.first
207
- if first_call[:name] == "json_output" && first_call[:input]
208
- return JSON.generate(first_call[:input])
209
- end
210
- end
211
- end
212
-
213
- nil
214
- end
215
-
216
134
  # Extract JSON from content that may contain markdown or plain JSON
217
135
  sig { params(content: String).returns(String) }
218
136
  def extract_json_from_content(content)
@@ -221,48 +139,93 @@ module DSPy
221
139
  # Try 1: Check for ```json code block (with or without preceding text)
222
140
  if content.include?('```json')
223
141
  json_match = content.match(/```json\s*\n(.*?)\n```/m)
224
- return json_match[1].strip if json_match
142
+ if json_match
143
+ normalized = normalize_json_candidate(json_match[1].strip)
144
+ return normalized if valid_json?(normalized)
145
+ end
225
146
  end
226
147
 
227
148
  # Try 2: Check for generic ``` code block
228
149
  if content.include?('```')
229
150
  code_match = content.match(/```\s*\n(.*?)\n```/m)
230
151
  if code_match
231
- potential_json = code_match[1].strip
232
- # Verify it's JSON
233
- begin
234
- JSON.parse(potential_json)
235
- return potential_json
236
- rescue JSON::ParserError
237
- # Not valid JSON, continue
238
- end
152
+ potential_json = normalize_json_candidate(code_match[1].strip)
153
+ return potential_json if valid_json?(potential_json)
239
154
  end
240
155
  end
241
156
 
242
157
  # Try 3: Try parsing entire content as JSON
243
- begin
244
- JSON.parse(content)
245
- return content
246
- rescue JSON::ParserError
247
- # Not pure JSON, try extracting
248
- end
158
+ normalized_content = normalize_json_candidate(content)
159
+ return normalized_content if valid_json?(normalized_content)
249
160
 
250
161
  # Try 4: Look for JSON object pattern in text (greedy match for nested objects)
251
162
  json_pattern = /\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}/m
252
163
  json_match = content.match(json_pattern)
253
164
  if json_match
254
- potential_json = json_match[0]
255
- begin
256
- JSON.parse(potential_json)
257
- return potential_json
258
- rescue JSON::ParserError
259
- # Not valid JSON
260
- end
165
+ potential_json = normalize_json_candidate(json_match[0])
166
+ return potential_json if valid_json?(potential_json)
261
167
  end
262
168
 
263
169
  # Return content as-is if no JSON found
264
170
  content
265
171
  end
172
+
173
+ sig { params(content: String).returns(String) }
174
+ def normalize_json_candidate(content)
175
+ escape_control_characters_in_strings(remove_trailing_object_commas(content))
176
+ end
177
+
178
+ sig { params(content: String).returns(String) }
179
+ def remove_trailing_object_commas(content)
180
+ content.sub(/,(\s*\}\s*)$/, '\1')
181
+ end
182
+
183
+ sig { params(content: String).returns(T::Boolean) }
184
+ def valid_json?(content)
185
+ JSON.parse(content)
186
+ true
187
+ rescue JSON::ParserError
188
+ false
189
+ end
190
+
191
+ sig { params(content: String).returns(String) }
192
+ def escape_control_characters_in_strings(content)
193
+ escaped = +""
194
+ in_string = false
195
+ escaping = false
196
+
197
+ content.each_char do |char|
198
+ if in_string
199
+ if escaping
200
+ escaped << char
201
+ escaping = false
202
+ next
203
+ end
204
+
205
+ case char
206
+ when '\\'
207
+ escaped << char
208
+ escaping = true
209
+ when '"'
210
+ escaped << char
211
+ in_string = false
212
+ when "\n"
213
+ escaped << '\n'
214
+ when "\r"
215
+ escaped << '\r'
216
+ when "\t"
217
+ escaped << '\t'
218
+ else
219
+ escaped << (char.ord < 0x20 ? "" : char)
220
+ end
221
+ else
222
+ escaped << char
223
+ in_string = true if char == '"'
224
+ end
225
+ end
226
+
227
+ escaped
228
+ end
266
229
  end
267
230
  end
268
231
  end