lex-llm 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92e15ead2801ddcfdb692c3135b0a8774bf6ffbc137e70562afaf107dcd182da
4
- data.tar.gz: 79ac6bb87c7c57f22857982096fe7818f1d07de3c62b7dbd114588451ed494cb
3
+ metadata.gz: 6401d7f1b284c2cee7577ff84c1ee099f5a8ae872c51e8a8410c8627c8514a16
4
+ data.tar.gz: 1ae7e5ff99ee54f5aae6dcc1a8cad4c65c9537d6c2ede8992cdd93d4c2933b9c
5
5
  SHA512:
6
- metadata.gz: c46aaae88b383e6ffd8e24076d64287b7fd04bc0c3108b7f7b760807b5ce38d3b49b88294c5cf183d289255ccfecdf2c6513b63646ac3178cf79136f837f67d2
7
- data.tar.gz: 4ca741e7ea3dcd3bfee9ad59a30a0379a130a3a885f344ad9b0e60509b0d4c18e5ef1e2e2bdf8c0c818e313a493357f475c60d7c27219df1c9302a77b2cebfbf
6
+ metadata.gz: 7575751b784ebe46be8c03591d748fc81e62695ea3142972bc3257f72e44c1822b8d9438ed1f2958c27287f10bcb0ebe4af5081e85f08e57f6ecf10e47c6c253
7
+ data.tar.gz: 5a7a68f00cce655fe2912e64350c302044cbabc2bafd874d03683d40502e9edfdfc2a987bb7c591ee9d302df0e2a89cae1040c6b22f71e72f1c5700988dd57eb
data/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.6.4 - 2026-06-30
4
+
5
+ ### Fixed
6
+ - **Canonical Data structs now serialize correctly via MultiJson/Oj/::JSON** — all 10 `Data.define` canonical structs (`ToolCall`, `Message`, `ContentBlock`, `Chunk`, `Params`, `Request`, `Response`, `Thinking`, `ToolDefinition`, `Usage`) now implement `as_json` and `to_json`, delegating to their existing `#to_h` method. Without these callbacks, `MultiJson.dump` (and any JSON encoder) fell back to `obj.to_s` on canonical structs, producing the Ruby `#inspect` dump (e.g. `#<data Legion::Extensions::Llm::Canonical::ToolCall id="toolu_bdrk_...", ...>`) wherever a struct appeared inside a Hash/Array being serialized. This leaked into:
7
+ - Client responses (`/v1/chat/completions`, `/v1/messages`, `/v1/responses`) — `tool_calls` in assistant message history appeared as inspect strings instead of structured JSON objects, breaking LangGraph/Freelens supervisors that expect structured routing decisions.
8
+ - Ledger persistence (`llm_message_inference_requests.request_json`) — tool_calls stored as unparseable Ruby inspect strings, breaking history reconstruction on subsequent turns.
9
+ - AMQP wire payloads — any consumer receiving a message containing canonical structs saw inspect strings instead of structured data.
10
+ - Debug echo-request (`X-Legion-Debug: echo-request`) — canonical request snapshot contained inspect strings for any `tool_calls` in message history.
11
+
12
+ The fix is structural: canonical structs now self-enforce correct JSON serialization at the architectural boundary (per Amendment A of the N×N routing design), so every downstream consumer — ledger, AMQP publisher, client translator, debug formatter — serializes correctly without needing to call `.to_h` explicitly.
13
+
14
+ ## 0.6.3 - 2026-06-25
15
+
16
+ ### Fixed
17
+ - `ContentBlock.from_hash` rescues `NoMethodError` when content arrays contain corrupted String elements (serialized `#inspect` output from prior storage bugs) — returns a text block instead of crashing with `undefined method 'transform_keys' for an instance of String`.
18
+ - `ContentBlock.from_hash` normalizes `output_text`/`input_text` types to `:text` via `TEXT_TYPE_ALIASES` so Responses API content blocks are recognized by `text?` and extracted by `Message#text`.
19
+ - `ContentBlock#to_s` returns clean text for all text-type blocks; `#inspect` returns a concise debug representation instead of the full 18-field Data.define dump.
20
+ - `Canonical::Message#to_s` delegates to `#text` to prevent Array#inspect leaking struct internals into string contexts.
21
+
3
22
  ## 0.6.2 - 2026-06-20
4
23
 
5
24
  ### Fixed
@@ -162,6 +162,15 @@ module Legion
162
162
  }.compact
163
163
  end
164
164
 
165
+ # MultiJson/Oj/::JSON callback — prevents Data.define #inspect leak into JSON.
166
+ def as_json(*)
167
+ to_h
168
+ end
169
+
170
+ def to_json(*)
171
+ to_h.to_json(*)
172
+ end
173
+
165
174
  # Type predicate helpers.
166
175
  def text_delta? = type == :text_delta
167
176
  def thinking_delta? = type == :thinking_delta
@@ -6,6 +6,7 @@ module Legion
6
6
  module Canonical
7
7
  # Typed content block with media_type support per G20a.
8
8
  # Ports field vocabulary from Legion::LLM::Types::ContentBlock.
9
+ # rubocop:disable Lint/ConstantDefinitionInBlock -- required for Data.define block scope
9
10
  ContentBlock = ::Data.define(
10
11
  :type, :text, :data, :source_type, :media_type,
11
12
  :detail, :name, :file_id,
@@ -13,6 +14,8 @@ module Legion
13
14
  :source, :start_index, :end_index,
14
15
  :code, :message, :cache_control
15
16
  ) do
17
+ TEXT_TYPE_ALIASES = %i[text output_text input_text].freeze
18
+
16
19
  # Build a text content block.
17
20
  def self.text(content, cache_control: nil)
18
21
  new(
@@ -64,12 +67,17 @@ module Legion
64
67
  end
65
68
 
66
69
  # Build from a Hash (raw provider response or deserialized wire payload).
70
+ # Rescues NoMethodError from corrupted inputs (e.g. String elements from
71
+ # prior serialization bugs where ContentBlock#inspect leaked into storage).
67
72
  def self.from_hash(source)
68
73
  return nil if source.nil?
69
74
 
70
75
  h = source.transform_keys(&:to_sym)
71
76
  type_raw = h.delete(:type)
72
- h[:type] = type_raw&.to_sym if type_raw
77
+ if type_raw
78
+ type_sym = type_raw.to_sym
79
+ h[:type] = TEXT_TYPE_ALIASES.include?(type_sym) ? :text : type_sym
80
+ end
73
81
 
74
82
  new(
75
83
  type: h[:type],
@@ -91,6 +99,10 @@ module Legion
91
99
  message: h[:message],
92
100
  cache_control: h[:cache_control]
93
101
  )
102
+ rescue NoMethodError => e
103
+ Legion::Logging.log.warn('[canonical][content_block] from_hash received non-Hash input ' \
104
+ "(#{source.class}): #{e.message}")
105
+ text(source.to_s)
94
106
  end
95
107
 
96
108
  # Serialize to a Hash for AMQP/fleet/wire transport.
@@ -98,9 +110,31 @@ module Legion
98
110
  super.compact
99
111
  end
100
112
 
113
+ # MultiJson/Oj/::JSON callback — prevents Data.define #inspect leak into JSON.
114
+ def as_json(*)
115
+ to_h
116
+ end
117
+
118
+ def to_json(*)
119
+ to_h.to_json(*)
120
+ end
121
+
122
+ # Human-readable string — prevents #inspect leaking into user-facing output.
123
+ def to_s
124
+ return "[tool_use:#{name}]" if type == :tool_use
125
+ return '[image]' if type == :image
126
+
127
+ text.to_s
128
+ end
129
+
130
+ # Concise inspect — prevents raw Data.define dump in Array#inspect output.
131
+ def inspect
132
+ "#<ContentBlock:#{type} #{to_s.slice(0, 80).inspect}>"
133
+ end
134
+
101
135
  # Whether this block carries textual content.
102
136
  def text?
103
- type == :text
137
+ TEXT_TYPE_ALIASES.include?(type)
104
138
  end
105
139
 
106
140
  # Whether this block carries thinking/reasoning content.
@@ -120,6 +154,7 @@ module Legion
120
154
  end
121
155
 
122
156
  ContentBlock::CONTENT_BLOCK_TYPES = %i[text thinking tool_use tool_result image audio video].freeze
157
+ # rubocop:enable Lint/ConstantDefinitionInBlock
123
158
  end
124
159
  end
125
160
  end
@@ -123,6 +123,20 @@ module Legion
123
123
  super.compact
124
124
  end
125
125
 
126
+ # MultiJson/Oj/::JSON callback — prevents Data.define #inspect leak into JSON.
127
+ def as_json(*)
128
+ to_h
129
+ end
130
+
131
+ def to_json(*)
132
+ to_h.to_json(*)
133
+ end
134
+
135
+ # Human-readable string — prevents #inspect leaking into user-facing output.
136
+ def to_s
137
+ text
138
+ end
139
+
126
140
  # Minimal provider-facing hash (role + text content).
127
141
  def to_provider_hash
128
142
  { role: role.to_s, content: text }.compact
@@ -53,6 +53,15 @@ module Legion
53
53
  def to_h
54
54
  super.compact
55
55
  end
56
+
57
+ # MultiJson/Oj/::JSON callback — prevents Data.define #inspect leak into JSON.
58
+ def as_json(*)
59
+ to_h
60
+ end
61
+
62
+ def to_json(*)
63
+ to_h.to_json(*)
64
+ end
56
65
  end
57
66
  # rubocop:enable Lint/ConstantDefinitionInBlock
58
67
  end
@@ -92,6 +92,15 @@ module Legion
92
92
  }.compact
93
93
  end
94
94
 
95
+ # MultiJson/Oj/::JSON callback — prevents Data.define #inspect leak into JSON.
96
+ def as_json(*)
97
+ to_h
98
+ end
99
+
100
+ def to_json(*)
101
+ to_h.to_json(*)
102
+ end
103
+
95
104
  def self.normalize_tools(tools)
96
105
  return {} if tools.nil? || tools.empty?
97
106
 
@@ -104,6 +104,15 @@ module Legion
104
104
  end
105
105
  end
106
106
 
107
+ # MultiJson/Oj/::JSON callback — prevents Data.define #inspect leak into JSON.
108
+ def as_json(*)
109
+ to_h
110
+ end
111
+
112
+ def to_json(*)
113
+ to_h.to_json(*)
114
+ end
115
+
107
116
  # Whether the response includes tool calls.
108
117
  def tool_call?
109
118
  !tool_calls.nil? && !tool_calls.empty?
@@ -31,6 +31,15 @@ module Legion
31
31
  super.compact
32
32
  end
33
33
 
34
+ # MultiJson/Oj/::JSON callback — prevents Data.define #inspect leak into JSON.
35
+ def as_json(*)
36
+ to_h
37
+ end
38
+
39
+ def to_json(*)
40
+ to_h.to_json(*)
41
+ end
42
+
34
43
  # Whether this thinking block has any content.
35
44
  def empty?
36
45
  content.nil? && signature.nil?
@@ -106,6 +106,16 @@ module Legion
106
106
  super.compact
107
107
  end
108
108
 
109
+ # MultiJson/Oj/::JSON callback for unknown types — without this, fallback is
110
+ # obj.to_s which for Data.define returns the #inspect dump and leaks into JSON.
111
+ def as_json(*)
112
+ to_h
113
+ end
114
+
115
+ def to_json(*)
116
+ to_h.to_json(*)
117
+ end
118
+
109
119
  # Subset for audit/ledger emission.
110
120
  def to_audit_hash
111
121
  {
@@ -91,6 +91,15 @@ module Legion
91
91
  parameters: parameters
92
92
  }.compact.reject { |k, v| k == :description && v == '' }
93
93
  end
94
+
95
+ # MultiJson/Oj/::JSON callback — prevents Data.define #inspect leak into JSON.
96
+ def as_json(*)
97
+ to_h
98
+ end
99
+
100
+ def to_json(*)
101
+ to_h.to_json(*)
102
+ end
94
103
  end
95
104
  end
96
105
  end
@@ -61,6 +61,15 @@ module Legion
61
61
  super.compact
62
62
  end
63
63
 
64
+ # MultiJson/Oj/::JSON callback — prevents Data.define #inspect leak into JSON.
65
+ def as_json(*)
66
+ to_h
67
+ end
68
+
69
+ def to_json(*)
70
+ to_h.to_json(*)
71
+ end
72
+
64
73
  # Total tokens across all categories.
65
74
  def total_tokens
66
75
  [input_tokens, output_tokens, cache_read_tokens, cache_write_tokens,
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Llm
6
- VERSION = '0.6.2'
6
+ VERSION = '0.6.4'
7
7
  end
8
8
  end
9
9
  end
@@ -166,6 +166,52 @@ RSpec.describe Legion::Extensions::Llm::Canonical::ContentBlock do
166
166
  end
167
167
  end
168
168
 
169
+ describe 'Responses API type normalization (output_text/input_text)' do
170
+ it 'normalizes output_text to :text via from_hash' do
171
+ block = described_class.from_hash(type: 'output_text', text: 'The seat templates')
172
+
173
+ expect(block.type).to eq(:text)
174
+ expect(block.text).to eq('The seat templates')
175
+ expect(block.text?).to be true
176
+ end
177
+
178
+ it 'normalizes input_text to :text via from_hash' do
179
+ block = described_class.from_hash(type: 'input_text', text: 'user message')
180
+
181
+ expect(block.type).to eq(:text)
182
+ expect(block.text?).to be true
183
+ end
184
+
185
+ it 'returns text content from to_s for output_text blocks' do
186
+ block = described_class.from_hash(type: 'output_text', text: "The seat templates don't")
187
+
188
+ expect(block.to_s).to eq("The seat templates don't")
189
+ end
190
+
191
+ it 'does not leak #inspect struct into Array#to_s' do
192
+ blocks = [described_class.from_hash(type: 'output_text', text: 'hello')]
193
+
194
+ expect(blocks.inspect).not_to include('data Legion::Extensions')
195
+ expect(blocks.inspect).not_to include('source_type=nil')
196
+ end
197
+ end
198
+
199
+ describe '#to_s' do
200
+ it 'returns text for text blocks' do
201
+ expect(described_class.text('hello').to_s).to eq('hello')
202
+ end
203
+
204
+ it 'returns placeholder for tool_use blocks' do
205
+ block = described_class.tool_use(id: '1', name: 'bash', input: {})
206
+ expect(block.to_s).to eq('[tool_use:bash]')
207
+ end
208
+
209
+ it 'returns placeholder for image blocks' do
210
+ block = described_class.image(data: 'x', media_type: 'image/png')
211
+ expect(block.to_s).to eq('[image]')
212
+ end
213
+ end
214
+
169
215
  describe 'round-trip' do
170
216
  it 'preserves text block through from_hash/to_h' do
171
217
  original = { type: 'text', text: 'hello world' }
@@ -163,6 +163,29 @@ RSpec.describe Legion::Extensions::Llm::Canonical::Message do
163
163
 
164
164
  expect(msg.text).to eq('')
165
165
  end
166
+
167
+ it 'extracts text from output_text ContentBlock array (Responses API / Codex)' do
168
+ msg = described_class.from_hash(
169
+ role: :assistant,
170
+ content: [{ type: 'output_text', text: "The seat templates don't" }]
171
+ )
172
+
173
+ expect(msg.text).to eq("The seat templates don't")
174
+ expect(msg.text).not_to include('#<data')
175
+ expect(msg.text).not_to include('ContentBlock')
176
+ end
177
+
178
+ it 'extracts text from mixed output_text and text blocks' do
179
+ msg = described_class.from_hash(
180
+ role: :assistant,
181
+ content: [
182
+ { type: 'output_text', text: 'first ' },
183
+ { type: 'text', text: 'second' }
184
+ ]
185
+ )
186
+
187
+ expect(msg.text).to eq('first second')
188
+ end
166
189
  end
167
190
 
168
191
  describe '#to_h' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2
4
+ version: 0.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO