llm_gateway 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.pi/skills/live-provider-testing/SKILL.md +183 -0
- data/.pi/skills/options-development/SKILL.md +131 -0
- data/CHANGELOG.md +43 -0
- data/README.md +110 -41
- data/Rakefile +1 -0
- data/docs/migration_guide_0.6.0.md +386 -0
- data/lib/llm_gateway/adapters/adapter.rb +8 -44
- data/lib/llm_gateway/adapters/anthropic/acts_like_messages.rb +0 -2
- data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +106 -27
- data/lib/llm_gateway/adapters/anthropic/output_mapper.rb +0 -33
- data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +59 -47
- data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +48 -6
- data/lib/llm_gateway/adapters/groq/chat_completions_adapter.rb +3 -2
- data/lib/llm_gateway/adapters/groq/input_mapper.rb +44 -0
- data/lib/llm_gateway/adapters/groq/option_mapper.rb +89 -4
- data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +336 -0
- data/lib/llm_gateway/adapters/openai/acts_like_chat_completions.rb +0 -2
- data/lib/llm_gateway/adapters/openai/acts_like_responses.rb +0 -6
- data/lib/llm_gateway/adapters/openai/chat_completions/input_mapper.rb +135 -72
- data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +100 -10
- data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +193 -170
- data/lib/llm_gateway/adapters/openai/chat_completions_adapter.rb +0 -1
- data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +128 -68
- data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +99 -10
- data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +106 -275
- data/lib/llm_gateway/adapters/openai/responses_adapter.rb +0 -1
- data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb +3 -3
- data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +0 -5
- data/lib/llm_gateway/adapters/stream_mapper.rb +57 -0
- data/lib/llm_gateway/adapters/structs.rb +102 -52
- data/lib/llm_gateway/base_client.rb +2 -4
- data/lib/llm_gateway/client.rb +10 -66
- data/lib/llm_gateway/clients/anthropic.rb +5 -4
- data/lib/llm_gateway/clients/groq.rb +18 -4
- data/lib/llm_gateway/clients/openai.rb +20 -18
- data/lib/llm_gateway/prompt.rb +35 -17
- data/lib/llm_gateway/version.rb +1 -1
- data/lib/llm_gateway.rb +5 -29
- metadata +8 -10
- data/lib/llm_gateway/adapters/anthropic/bidirectional_message_mapper.rb +0 -111
- data/lib/llm_gateway/adapters/openai/chat_completions/bidirectional_message_mapper.rb +0 -110
- data/lib/llm_gateway/adapters/openai/chat_completions/output_mapper.rb +0 -40
- data/lib/llm_gateway/adapters/openai/responses/bidirectional_message_mapper.rb +0 -120
- data/lib/llm_gateway/adapters/openai/responses/output_mapper.rb +0 -47
- data/lib/llm_gateway/adapters/stream_accumulator.rb +0 -91
- data/scripts/generate_handoff_live_fixture.rb +0 -169
- data/scripts/generate_handoff_media_fixture.rb +0 -167
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# Migration Guide: 0.5.0 to 0.6.0
|
|
2
|
+
|
|
3
|
+
This guide covers user-facing changes between `v0.5.0` and the latest commit on the 0.6.0 branch.
|
|
4
|
+
|
|
5
|
+
## Summary
|
|
6
|
+
|
|
7
|
+
0.6.0 separates provider authentication/configuration from model selection.
|
|
8
|
+
|
|
9
|
+
- Provider config now contains only provider/auth settings such as `provider`, `api_key`, `access_token`, and `account_id`.
|
|
10
|
+
- `model_key` is no longer accepted in provider/client configuration.
|
|
11
|
+
- Pass the model per request with `model:` when calling `chat`, `stream`, Responses/Codex methods, or embeddings.
|
|
12
|
+
- Legacy provider keys such as `openai_apikey_responses` were removed. Use the shorter provider keys.
|
|
13
|
+
- `LlmGateway::Prompt` now accepts/configures a provider and model separately, and uses `stream` internally.
|
|
14
|
+
- The `client.model_key` reader was removed; track the selected model at the call site or read it from returned messages.
|
|
15
|
+
- Streaming events now expose accumulated partial messages during the stream, while `:message_end` exposes the final message through `event.message`.
|
|
16
|
+
- Non-final stream event hashes now include `partial`; normal stream consumers are unaffected, but strict `event.to_h` snapshots/comparisons may need updates.
|
|
17
|
+
- Normalized usage counters were renamed to concise keys: `:input`, `:cache_write`, `:cache_read`, `:output`, and `:total`; `:reasoning_tokens` was removed.
|
|
18
|
+
- Streamed assistant messages now include `timestamp` as Unix milliseconds.
|
|
19
|
+
- Custom stream mappers must initialize with provider/API metadata and emit a final `:message_end` patch.
|
|
20
|
+
|
|
21
|
+
## 1. Replace legacy provider keys
|
|
22
|
+
|
|
23
|
+
0.6.0 removes the backward-compatible legacy provider registry entries.
|
|
24
|
+
|
|
25
|
+
| 0.5.0 provider key | 0.6.0 provider key |
|
|
26
|
+
|---|---|
|
|
27
|
+
| `anthropic_apikey_messages` | `anthropic_messages` |
|
|
28
|
+
| `anthropic_oauth_messages` | `anthropic_messages` |
|
|
29
|
+
| `openai_apikey_completions` | `openai_completions` |
|
|
30
|
+
| `openai_apikey_responses` | `openai_responses` |
|
|
31
|
+
| `openai_oauth_codex` | `openai_codex` |
|
|
32
|
+
| `groq_apikey_completions` | `groq_completions` |
|
|
33
|
+
|
|
34
|
+
### Before
|
|
35
|
+
|
|
36
|
+
```ruby
|
|
37
|
+
adapter = LlmGateway.build_provider(
|
|
38
|
+
provider: "openai_apikey_responses",
|
|
39
|
+
api_key: ENV.fetch("OPENAI_API_KEY"),
|
|
40
|
+
model_key: "gpt-5.4"
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### After
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
adapter = LlmGateway.build_provider(
|
|
48
|
+
provider: "openai_responses",
|
|
49
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
50
|
+
)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## 2. Move `model_key` from provider config to request calls
|
|
54
|
+
|
|
55
|
+
`model_key` is no longer a provider option. Passing it to `LlmGateway.build_provider` raises:
|
|
56
|
+
|
|
57
|
+
```text
|
|
58
|
+
ArgumentError: model_key is no longer a provider option; pass model: to chat/stream instead
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Pass `model:` on each request instead.
|
|
62
|
+
|
|
63
|
+
### Streaming
|
|
64
|
+
|
|
65
|
+
```ruby
|
|
66
|
+
# Before
|
|
67
|
+
adapter = LlmGateway.build_provider(
|
|
68
|
+
provider: "openai_apikey_responses",
|
|
69
|
+
api_key: ENV.fetch("OPENAI_API_KEY"),
|
|
70
|
+
model_key: "gpt-5.4"
|
|
71
|
+
)
|
|
72
|
+
result = adapter.stream("Write one short sentence about Ruby.")
|
|
73
|
+
|
|
74
|
+
# After
|
|
75
|
+
adapter = LlmGateway.build_provider(
|
|
76
|
+
provider: "openai_responses",
|
|
77
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
78
|
+
)
|
|
79
|
+
result = adapter.stream("Write one short sentence about Ruby.", model: "gpt-5.4")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Configure arrays
|
|
83
|
+
|
|
84
|
+
```ruby
|
|
85
|
+
# Before
|
|
86
|
+
LlmGateway.configure([
|
|
87
|
+
{
|
|
88
|
+
name: "primary",
|
|
89
|
+
config: {
|
|
90
|
+
provider: "groq_apikey_completions",
|
|
91
|
+
api_key: ENV.fetch("GROQ_API_KEY"),
|
|
92
|
+
model_key: "openai/gpt-oss-120b"
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
])
|
|
96
|
+
|
|
97
|
+
# After
|
|
98
|
+
LlmGateway.configure([
|
|
99
|
+
{
|
|
100
|
+
name: "primary",
|
|
101
|
+
config: {
|
|
102
|
+
provider: "groq_completions",
|
|
103
|
+
api_key: ENV.fetch("GROQ_API_KEY")
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
])
|
|
107
|
+
|
|
108
|
+
LlmGateway.configured_clients.fetch("primary").stream(
|
|
109
|
+
"Hello",
|
|
110
|
+
model: "openai/gpt-oss-120b"
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## 3. Update direct client usage
|
|
115
|
+
|
|
116
|
+
Direct clients no longer take `model_key:` in their constructors.
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
# Before
|
|
120
|
+
client = LlmGateway::Clients::OpenAI.new(
|
|
121
|
+
api_key: ENV.fetch("OPENAI_API_KEY"),
|
|
122
|
+
model_key: "gpt-5.4"
|
|
123
|
+
)
|
|
124
|
+
client.stream(messages)
|
|
125
|
+
|
|
126
|
+
# After
|
|
127
|
+
client = LlmGateway::Clients::OpenAI.new(
|
|
128
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
129
|
+
)
|
|
130
|
+
client.stream(messages, model: "gpt-5.4")
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
The same pattern applies to:
|
|
134
|
+
|
|
135
|
+
- `LlmGateway::Clients::Anthropic#chat` / `#stream`
|
|
136
|
+
- `LlmGateway::Clients::OpenAI#chat` / `#stream` / `#responses` / `#stream_responses`
|
|
137
|
+
- `LlmGateway::Clients::OpenAI#chat_codex` / `#stream_codex`
|
|
138
|
+
- `LlmGateway::Clients::Groq#chat` / `#stream`
|
|
139
|
+
|
|
140
|
+
Embeddings also take a per-call model:
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
client.generate_embeddings(input, model: "text-embedding-3-large")
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
If omitted, clients still provide default models.
|
|
147
|
+
|
|
148
|
+
## 4. Update `LlmGateway::Prompt` classes
|
|
149
|
+
|
|
150
|
+
`Prompt` no longer looks up a configured client by comparing a string to `client.model_key`. It now keeps the provider and model as separate values.
|
|
151
|
+
|
|
152
|
+
If you previously called `Prompt.new("gpt-5.4")`, update that code. The first initializer argument is now a provider adapter, not a model lookup key. Configure a provider on the class or pass one to the initializer.
|
|
153
|
+
|
|
154
|
+
### Class-level configuration
|
|
155
|
+
|
|
156
|
+
```ruby
|
|
157
|
+
class SummaryPrompt < LlmGateway::Prompt
|
|
158
|
+
self.provider = LlmGateway.build_provider(
|
|
159
|
+
provider: "openai_responses",
|
|
160
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
161
|
+
)
|
|
162
|
+
self.model = "gpt-5.4"
|
|
163
|
+
|
|
164
|
+
def prompt
|
|
165
|
+
"Summarize this text."
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
SummaryPrompt.new.run
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
### Instance-level configuration
|
|
174
|
+
|
|
175
|
+
```ruby
|
|
176
|
+
provider = LlmGateway.build_provider(
|
|
177
|
+
provider: "anthropic_messages",
|
|
178
|
+
api_key: ENV.fetch("ANTHROPIC_API_KEY")
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
SummaryPrompt.new(provider, "claude-sonnet-4-20250514").run
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Per-call overrides
|
|
185
|
+
|
|
186
|
+
```ruby
|
|
187
|
+
prompt = SummaryPrompt.new(default_provider, "gpt-5.1")
|
|
188
|
+
|
|
189
|
+
prompt.stream(
|
|
190
|
+
provider: other_provider,
|
|
191
|
+
model: "gpt-5.4",
|
|
192
|
+
reasoning: "high"
|
|
193
|
+
)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
If you subclassed `Prompt` and called or overrode `post`, migrate that code to `stream`. `run` now calls `stream` internally.
|
|
197
|
+
|
|
198
|
+
## 5. Stop using `client.model_key`
|
|
199
|
+
|
|
200
|
+
Direct clients no longer expose a `model_key` reader because model selection is no longer client/provider state.
|
|
201
|
+
|
|
202
|
+
```ruby
|
|
203
|
+
# Before
|
|
204
|
+
client = LlmGateway::Clients::OpenAI.new(
|
|
205
|
+
api_key: ENV.fetch("OPENAI_API_KEY"),
|
|
206
|
+
model_key: "gpt-5.4"
|
|
207
|
+
)
|
|
208
|
+
puts client.model_key
|
|
209
|
+
|
|
210
|
+
# After
|
|
211
|
+
client = LlmGateway::Clients::OpenAI.new(
|
|
212
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
213
|
+
)
|
|
214
|
+
model = "gpt-5.4"
|
|
215
|
+
result = client.stream(messages, model: model)
|
|
216
|
+
# Track `model` at the call site when you need it later.
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## 6. OAuth provider names
|
|
220
|
+
|
|
221
|
+
OAuth is now represented by credentials, not by separate legacy provider keys.
|
|
222
|
+
|
|
223
|
+
```ruby
|
|
224
|
+
# Before
|
|
225
|
+
adapter = LlmGateway.build_provider(
|
|
226
|
+
provider: "openai_oauth_codex",
|
|
227
|
+
access_token: current_access_token,
|
|
228
|
+
model_key: "gpt-5.4"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# After
|
|
232
|
+
adapter = LlmGateway.build_provider(
|
|
233
|
+
provider: "openai_codex",
|
|
234
|
+
access_token: current_access_token
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
adapter.stream("Hello from OAuth auth", model: "gpt-5.4")
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
For Anthropic OAuth, use `provider: "anthropic_messages"` with an `access_token`.
|
|
241
|
+
|
|
242
|
+
## 7. Update stream callback handling
|
|
243
|
+
|
|
244
|
+
The final `:message_end` stream callback event changed shape.
|
|
245
|
+
|
|
246
|
+
In 0.5.x, `:message_end` was an `AssistantStreamMessageEvent` and exposed the accumulated message through `event.partial`.
|
|
247
|
+
|
|
248
|
+
In 0.6.0, `:message_end` is an `AssistantStreamMessageEndEvent` and exposes the final complete `AssistantMessage` through `event.message`. It does not expose `partial`.
|
|
249
|
+
|
|
250
|
+
```ruby
|
|
251
|
+
response = adapter.stream("Hello", model: "gpt-5.4") do |event|
|
|
252
|
+
case event.type
|
|
253
|
+
when :text_delta
|
|
254
|
+
print event.delta
|
|
255
|
+
when :message_end
|
|
256
|
+
final_message = event.message
|
|
257
|
+
puts final_message.provider
|
|
258
|
+
puts final_message.api
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# The stream return value is the same final AssistantMessage.
|
|
263
|
+
response # => AssistantMessage
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
If you previously handled every event as if it had `partial`, branch on `event.type == :message_end` first or check `respond_to?(:partial)`.
|
|
267
|
+
|
|
268
|
+
```ruby
|
|
269
|
+
adapter.stream("Hello", model: "gpt-5.4") do |event|
|
|
270
|
+
if event.type == :message_end
|
|
271
|
+
persist(event.message.to_h)
|
|
272
|
+
elsif event.respond_to?(:partial)
|
|
273
|
+
update_ui(event.partial)
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## 8. Update usage accounting keys
|
|
279
|
+
|
|
280
|
+
Normalized `AssistantMessage#usage` and final stream `event.usage` patches now use provider-independent concise keys plus `:raw` for the original provider usage/token payload:
|
|
281
|
+
|
|
282
|
+
| 0.5.x key | 0.6.0 key |
|
|
283
|
+
|---|---|
|
|
284
|
+
| `:input_tokens` | `:input` |
|
|
285
|
+
| `:cache_creation_input_tokens` | `:cache_write` |
|
|
286
|
+
| `:cache_read_input_tokens` | `:cache_read` |
|
|
287
|
+
| `:output_tokens` | `:output` |
|
|
288
|
+
| computed normalized total | `:total` |
|
|
289
|
+
| original provider usage payload | `:raw` |
|
|
290
|
+
| `:reasoning_tokens` | removed |
|
|
291
|
+
|
|
292
|
+
`reasoning_tokens` was removed because providers expose and calculate reasoning token counts inconsistently. Use the streamed/final `ReasoningContent` blocks for reasoning text, and treat usage as the normalized token buckets above.
|
|
293
|
+
|
|
294
|
+
```ruby
|
|
295
|
+
# Before
|
|
296
|
+
result.usage[:input_tokens]
|
|
297
|
+
result.usage[:cache_read_input_tokens]
|
|
298
|
+
result.usage[:output_tokens]
|
|
299
|
+
|
|
300
|
+
# After
|
|
301
|
+
result.usage[:input]
|
|
302
|
+
result.usage[:cache_read]
|
|
303
|
+
result.usage[:output]
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
When checking cache behavior, use `usage[:cache_read]` and `usage[:cache_write]`. `usage[:total]` is computed as `input + cache_write + cache_read + output`. Use `usage[:raw]` when you need provider-specific token fields that are not part of the normalized counters.
|
|
307
|
+
|
|
308
|
+
## 9. Account for timestamps on streamed messages
|
|
309
|
+
|
|
310
|
+
`PartialAssistantMessage` and `AssistantMessage` now include a `timestamp` field in Unix milliseconds. Provider-supplied timestamps are preserved when available; otherwise the accumulator assigns one.
|
|
311
|
+
|
|
312
|
+
```ruby
|
|
313
|
+
response = adapter.stream("Hello", model: "gpt-5.4") do |event|
|
|
314
|
+
puts event.partial.timestamp if event.respond_to?(:partial)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
puts response.timestamp
|
|
318
|
+
puts response.to_h[:timestamp]
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
If you instantiate `PartialAssistantMessage` or `AssistantMessage` directly in tests or custom integrations, include `timestamp:`.
|
|
322
|
+
|
|
323
|
+
## 10. Update custom stream mappers
|
|
324
|
+
|
|
325
|
+
If you implemented a custom adapter or stream mapper, update it for the new final-message flow.
|
|
326
|
+
|
|
327
|
+
`LlmGateway::Adapters::StreamMapper` now requires provider/API metadata:
|
|
328
|
+
|
|
329
|
+
```ruby
|
|
330
|
+
mapper = MyStreamMapper.new(provider: "openai", api: "responses")
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
`Adapter#stream` passes these values automatically when it instantiates the configured mapper, but direct mapper construction and custom initializers must accept/pass these keywords.
|
|
334
|
+
|
|
335
|
+
Custom mappers must also push a final normalized end patch. Use the normalized usage keys shown above for final `usage`.
|
|
336
|
+
|
|
337
|
+
```ruby
|
|
338
|
+
push_patches([
|
|
339
|
+
{ type: :message_delta, delta: { stop_reason: "stop" }, usage: { output: 12 } },
|
|
340
|
+
{ type: :message_end }
|
|
341
|
+
], &block)
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
`StreamMapper#result` now returns the final `AssistantMessage` created by the `:message_end` patch. If a custom mapper never emits `:message_end`, `adapter.stream` will not have a final message to return.
|
|
345
|
+
|
|
346
|
+
## 11. Cross-provider handoff note
|
|
347
|
+
|
|
348
|
+
Message sanitization for cross-provider/model handoffs now receives the target model from the request options. When replaying or handing off transcripts across providers/models, pass `model:` explicitly on the destination call so model-specific sanitizer behavior can run.
|
|
349
|
+
|
|
350
|
+
```ruby
|
|
351
|
+
next_response = target_adapter.stream(
|
|
352
|
+
transcript_from_another_provider,
|
|
353
|
+
model: "gpt-5.4"
|
|
354
|
+
)
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
## 12. Stream event hash snapshots
|
|
358
|
+
|
|
359
|
+
Non-final stream events now expose a `partial` assistant message, so `event.to_h` includes an additional `partial` field.
|
|
360
|
+
|
|
361
|
+
This is additive for normal stream callback consumers:
|
|
362
|
+
|
|
363
|
+
```ruby
|
|
364
|
+
adapter.stream("Hello", model: "gpt-5.4") do |event|
|
|
365
|
+
puts event.type
|
|
366
|
+
puts event.delta if event.respond_to?(:delta)
|
|
367
|
+
end
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
If your tests or application code compare full `event.to_h` hashes or snapshot serialized events, update those expectations to include or ignore `partial`.
|
|
371
|
+
|
|
372
|
+
## Checklist
|
|
373
|
+
|
|
374
|
+
- [ ] Replace all legacy provider keys with the new provider keys.
|
|
375
|
+
- [ ] Remove `model_key:` from `build_provider`, `configure`, and direct client constructors.
|
|
376
|
+
- [ ] Remove any direct reads of `client.model_key` / `adapter.client.model_key`.
|
|
377
|
+
- [ ] Add `model:` to `chat`, `stream`, Responses/Codex, and embeddings calls where you need a specific model.
|
|
378
|
+
- [ ] Update `Prompt` subclasses to configure `provider` and `model` separately.
|
|
379
|
+
- [ ] Replace `Prompt.new("model-key")` model lookup usage with explicit provider/model configuration.
|
|
380
|
+
- [ ] Replace custom `Prompt#post` usage with `Prompt#stream`.
|
|
381
|
+
- [ ] Update stream callbacks to read `event.message` for `:message_end` and `event.partial` only for non-final events.
|
|
382
|
+
- [ ] Rename normalized usage lookups to `:input`, `:cache_write`, `:cache_read`, `:output`, and `:total`; use `:raw` for provider-specific token fields; remove `:reasoning_tokens` handling.
|
|
383
|
+
- [ ] Include/read `timestamp` on streamed partial and final assistant messages where you construct or persist those objects.
|
|
384
|
+
- [ ] Update custom stream mappers to accept `provider:` / `api:`, emit normalized usage keys, and emit `{ type: :message_end }`.
|
|
385
|
+
- [ ] For cross-provider handoffs, pass the target `model:` explicitly.
|
|
386
|
+
- [ ] Update strict `event.to_h` stream event snapshots/comparisons for the new `partial` field.
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "stream_accumulator"
|
|
4
3
|
require_relative "structs"
|
|
5
4
|
|
|
6
5
|
module LlmGateway
|
|
@@ -12,34 +11,19 @@ module LlmGateway
|
|
|
12
11
|
@client = client
|
|
13
12
|
end
|
|
14
13
|
|
|
15
|
-
def chat(message, tools: nil, system: nil, **options)
|
|
16
|
-
normalized_input = map_input({
|
|
17
|
-
messages: sanitize_messages(normalize_messages(message)),
|
|
18
|
-
tools: tools,
|
|
19
|
-
system: normalize_system(system)
|
|
20
|
-
})
|
|
21
|
-
|
|
22
|
-
result = perform_chat(
|
|
23
|
-
normalized_input[:messages],
|
|
24
|
-
tools: normalized_input[:tools],
|
|
25
|
-
system: normalized_input[:system],
|
|
26
|
-
**map_options(options)
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
map_output(result)
|
|
30
|
-
end
|
|
31
|
-
|
|
32
14
|
def stream(message, tools: nil, system: nil, **options, &block)
|
|
33
15
|
raise LlmGateway::Errors::MissingMapperForProvider, "No stream_mapper configured" unless stream_mapper
|
|
34
16
|
|
|
35
17
|
normalized_input = map_input({
|
|
36
|
-
messages: sanitize_messages(normalize_messages(message)),
|
|
18
|
+
messages: sanitize_messages(normalize_messages(message), target_model: options[:model]),
|
|
37
19
|
tools: tools,
|
|
38
20
|
system: normalize_system(system)
|
|
39
21
|
})
|
|
40
22
|
|
|
41
|
-
|
|
42
|
-
|
|
23
|
+
mapper = stream_mapper.new(
|
|
24
|
+
provider: LlmGateway::Client.provider_id_from_client(client),
|
|
25
|
+
api: api_name
|
|
26
|
+
)
|
|
43
27
|
|
|
44
28
|
perform_stream(
|
|
45
29
|
normalized_input[:messages],
|
|
@@ -47,17 +31,10 @@ module LlmGateway
|
|
|
47
31
|
system: normalized_input[:system],
|
|
48
32
|
**map_options(options)
|
|
49
33
|
) do |chunk|
|
|
50
|
-
|
|
51
|
-
accumulator.push(event)
|
|
52
|
-
block.call(event) if block && event
|
|
34
|
+
mapper.map(chunk, &block)
|
|
53
35
|
end
|
|
54
36
|
|
|
55
|
-
|
|
56
|
-
accumulator.result.merge(
|
|
57
|
-
provider: LlmGateway::Client.provider_id_from_client(client),
|
|
58
|
-
api: api_name
|
|
59
|
-
)
|
|
60
|
-
)
|
|
37
|
+
mapper.result
|
|
61
38
|
end
|
|
62
39
|
|
|
63
40
|
def upload_file(filename:, content:, mime_type: "application/octet-stream", purpose: "assistants")
|
|
@@ -92,10 +69,6 @@ module LlmGateway
|
|
|
92
69
|
nil
|
|
93
70
|
end
|
|
94
71
|
|
|
95
|
-
def output_mapper
|
|
96
|
-
raise NotImplementedError, "#{self.class} must implement #output_mapper"
|
|
97
|
-
end
|
|
98
|
-
|
|
99
72
|
def file_output_mapper
|
|
100
73
|
nil
|
|
101
74
|
end
|
|
@@ -108,18 +81,10 @@ module LlmGateway
|
|
|
108
81
|
input_mapper.map(input)
|
|
109
82
|
end
|
|
110
83
|
|
|
111
|
-
def map_output(output)
|
|
112
|
-
output_mapper.map(output)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
84
|
def map_options(options)
|
|
116
85
|
option_mapper.map(options)
|
|
117
86
|
end
|
|
118
87
|
|
|
119
|
-
def perform_chat(messages, tools:, system:, **options)
|
|
120
|
-
client.chat(messages, tools: tools, system: system, **options)
|
|
121
|
-
end
|
|
122
|
-
|
|
123
88
|
def perform_stream(messages, tools:, system:, **options, &block)
|
|
124
89
|
client.stream(messages, tools: tools, system: system, **options, &block)
|
|
125
90
|
end
|
|
@@ -132,12 +97,11 @@ module LlmGateway
|
|
|
132
97
|
nil
|
|
133
98
|
end
|
|
134
99
|
|
|
135
|
-
def sanitize_messages(messages)
|
|
100
|
+
def sanitize_messages(messages, target_model: nil)
|
|
136
101
|
return messages unless input_sanitizer
|
|
137
102
|
|
|
138
103
|
target_provider = LlmGateway::Client.provider_id_from_client(client)
|
|
139
104
|
target_api = api_name
|
|
140
|
-
target_model = client.model_key
|
|
141
105
|
|
|
142
106
|
return messages if target_provider.nil? || target_api.nil? || target_model.nil?
|
|
143
107
|
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "bidirectional_message_mapper"
|
|
4
|
-
|
|
5
3
|
module LlmGateway
|
|
6
4
|
module Adapters
|
|
7
5
|
module Anthropic
|
|
@@ -14,42 +12,123 @@ module LlmGateway
|
|
|
14
12
|
}
|
|
15
13
|
end
|
|
16
14
|
|
|
17
|
-
|
|
15
|
+
def self.map_content(content)
|
|
16
|
+
content = { type: "text", text: content } unless content.is_a?(Hash)
|
|
17
|
+
|
|
18
|
+
case content[:type]
|
|
19
|
+
when "text"
|
|
20
|
+
map_text_content(content)
|
|
21
|
+
when "file"
|
|
22
|
+
map_file_content(content)
|
|
23
|
+
when "image"
|
|
24
|
+
map_image_content(content)
|
|
25
|
+
when "tool_use"
|
|
26
|
+
map_tool_use_content(content)
|
|
27
|
+
when "tool_result"
|
|
28
|
+
map_tool_result_content(content)
|
|
29
|
+
when "thinking", "reasoning"
|
|
30
|
+
map_reasoning_content(content)
|
|
31
|
+
else
|
|
32
|
+
content
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
class << self
|
|
37
|
+
private
|
|
18
38
|
|
|
19
|
-
|
|
20
|
-
|
|
39
|
+
def map_messages(messages)
|
|
40
|
+
return messages unless messages
|
|
21
41
|
|
|
22
|
-
|
|
42
|
+
messages.map do |msg|
|
|
43
|
+
msg = msg.merge(role: "user") if msg[:role] == "developer"
|
|
23
44
|
|
|
24
|
-
|
|
25
|
-
|
|
45
|
+
content = if msg[:content].is_a?(Array)
|
|
46
|
+
msg[:content].map { |content| map_content(content) }
|
|
47
|
+
else
|
|
48
|
+
[ map_content(msg[:content]) ]
|
|
49
|
+
end
|
|
26
50
|
|
|
27
|
-
|
|
28
|
-
msg[:
|
|
29
|
-
|
|
30
|
-
|
|
51
|
+
{
|
|
52
|
+
role: msg[:role],
|
|
53
|
+
content: content
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def map_system(system)
|
|
59
|
+
if !system || system.empty?
|
|
60
|
+
nil
|
|
61
|
+
elsif system.length == 1 && system.first[:role] == "system"
|
|
62
|
+
mapped = { type: "text", text: system.first[:content] }
|
|
63
|
+
mapped[:cache_control] = system.first[:cache_control] if system.first[:cache_control]
|
|
64
|
+
[ mapped ]
|
|
31
65
|
else
|
|
32
|
-
|
|
66
|
+
system
|
|
33
67
|
end
|
|
68
|
+
end
|
|
34
69
|
|
|
70
|
+
def map_text_content(content)
|
|
71
|
+
result = {
|
|
72
|
+
type: "text",
|
|
73
|
+
text: content[:text]
|
|
74
|
+
}
|
|
75
|
+
result[:cache_control] = content[:cache_control] if content[:cache_control]
|
|
76
|
+
result
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def map_file_content(content)
|
|
35
80
|
{
|
|
36
|
-
|
|
37
|
-
|
|
81
|
+
type: "document",
|
|
82
|
+
source: {
|
|
83
|
+
data: content[:data],
|
|
84
|
+
type: "text",
|
|
85
|
+
media_type: content[:media_type]
|
|
86
|
+
}
|
|
38
87
|
}
|
|
39
88
|
end
|
|
40
|
-
end
|
|
41
89
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
90
|
+
def map_image_content(content)
|
|
91
|
+
{
|
|
92
|
+
type: "image",
|
|
93
|
+
source: {
|
|
94
|
+
data: content[:data],
|
|
95
|
+
type: "base64",
|
|
96
|
+
media_type: content[:media_type]
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def map_tool_use_content(content)
|
|
102
|
+
{
|
|
103
|
+
type: "tool_use",
|
|
104
|
+
id: content[:id],
|
|
105
|
+
name: content[:name],
|
|
106
|
+
input: content[:input]
|
|
107
|
+
}
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def map_tool_result_content(content)
|
|
111
|
+
mapped_content = content[:content]
|
|
112
|
+
if mapped_content.is_a?(Array)
|
|
113
|
+
mapped_content = mapped_content.map do |item|
|
|
114
|
+
item.is_a?(Hash) ? map_content(item.transform_keys(&:to_sym)) : item
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
{
|
|
119
|
+
type: "tool_result",
|
|
120
|
+
tool_use_id: content[:tool_use_id],
|
|
121
|
+
content: mapped_content
|
|
122
|
+
}
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def map_reasoning_content(content)
|
|
126
|
+
result = {
|
|
127
|
+
type: "thinking",
|
|
128
|
+
thinking: content[:reasoning]
|
|
129
|
+
}
|
|
130
|
+
result[:signature] = content[:signature] unless content[:signature].nil?
|
|
131
|
+
result
|
|
53
132
|
end
|
|
54
133
|
end
|
|
55
134
|
end
|
|
@@ -12,39 +12,6 @@ module LlmGateway
|
|
|
12
12
|
)
|
|
13
13
|
end
|
|
14
14
|
end
|
|
15
|
-
|
|
16
|
-
class OutputMapper
|
|
17
|
-
def self.map(data)
|
|
18
|
-
{
|
|
19
|
-
id: data[:id],
|
|
20
|
-
model: data[:model],
|
|
21
|
-
usage: data[:usage],
|
|
22
|
-
choices: map_choices(data)
|
|
23
|
-
}
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
private
|
|
27
|
-
|
|
28
|
-
def self.map_choices(data)
|
|
29
|
-
message_mapper = BidirectionalMessageMapper.new(LlmGateway::DIRECTION_OUT)
|
|
30
|
-
|
|
31
|
-
content = if data[:content].is_a?(Array)
|
|
32
|
-
data[:content].map do |content|
|
|
33
|
-
message_mapper.map_content(content)
|
|
34
|
-
end
|
|
35
|
-
else
|
|
36
|
-
data[:content] ? [ message_mapper.map_content(data[:content]) ] : []
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Claude returns content directly at root level, not in a choices array
|
|
40
|
-
# We need to construct the choices array from the full response data
|
|
41
|
-
[ {
|
|
42
|
-
content: content, # Use content directly from Claude response
|
|
43
|
-
finish_reason: data[:stop_reason],
|
|
44
|
-
role: "assistant"
|
|
45
|
-
} ]
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
15
|
end
|
|
49
16
|
end
|
|
50
17
|
end
|