llm_gateway 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/README.md +350 -43
- data/docs/migration_guide_0.6.0.md +386 -0
- data/docs/migration_guide_0.7.0.md +193 -0
- data/lib/llm_gateway/adapters/adapter.rb +8 -11
- data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +24 -0
- data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +61 -11
- data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/groq/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/input_message_sanitizer.rb +98 -7
- data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +132 -39
- data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +40 -16
- data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +47 -31
- data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +173 -24
- data/lib/llm_gateway/adapters/stream_mapper.rb +9 -2
- data/lib/llm_gateway/adapters/structs.rb +140 -55
- data/lib/llm_gateway/agents/event.rb +105 -0
- data/lib/llm_gateway/agents/file_session_manager.rb +100 -0
- data/lib/llm_gateway/agents/harness.rb +176 -0
- data/lib/llm_gateway/agents/in_memory_session_manager.rb +222 -0
- data/lib/llm_gateway/agents/tools/bash_tool.rb +132 -0
- data/lib/llm_gateway/agents/tools/edit_tool.rb +215 -0
- data/lib/llm_gateway/agents/tools/read_tool.rb +143 -0
- data/lib/llm_gateway/agents/tools/tool_utils.rb +164 -0
- data/lib/llm_gateway/agents/tools/write_tool.rb +34 -0
- data/lib/llm_gateway/base_client.rb +5 -7
- data/lib/llm_gateway/clients/anthropic.rb +10 -9
- data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +2 -2
- data/lib/llm_gateway/clients/groq.rb +8 -6
- data/lib/llm_gateway/clients/openai.rb +22 -20
- data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +4 -4
- data/lib/llm_gateway/prompt.rb +107 -52
- data/lib/llm_gateway/utils.rb +116 -13
- data/lib/llm_gateway/version.rb +1 -1
- data/lib/llm_gateway.rb +7 -21
- metadata +13 -2
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# Migration Guide: 0.5.0 to 0.6.0
|
|
2
|
+
|
|
3
|
+
This guide covers user-facing changes between `v0.5.0` and the latest commit on the 0.6.0 branch.
|
|
4
|
+
|
|
5
|
+
## Summary
|
|
6
|
+
|
|
7
|
+
0.6.0 separates provider authentication/configuration from model selection.
|
|
8
|
+
|
|
9
|
+
- Provider config now contains only provider/auth settings such as `provider`, `api_key`, `access_token`, and `account_id`.
|
|
10
|
+
- `model_key` is no longer accepted in provider/client configuration.
|
|
11
|
+
- Pass the model per request with `model:` when calling `chat`, `stream`, Responses/Codex methods, or embeddings.
|
|
12
|
+
- Legacy provider keys such as `openai_apikey_responses` were removed. Use the shorter provider keys.
|
|
13
|
+
- `LlmGateway::Prompt` now accepts/configures a provider and model separately, and uses `stream` internally.
|
|
14
|
+
- The `client.model_key` reader was removed; track the selected model at the call site or read it from returned messages.
|
|
15
|
+
- Streaming events now expose accumulated partial messages during the stream, while `:message_end` exposes the final message through `event.message`.
|
|
16
|
+
- Non-final stream event hashes now include `partial`; normal stream consumers are unaffected, but strict `event.to_h` snapshots/comparisons may need updates.
|
|
17
|
+
- Normalized usage counters were renamed to concise keys: `:input`, `:cache_write`, `:cache_read`, `:output`, and `:total`; `:reasoning_tokens` was removed.
|
|
18
|
+
- Streamed assistant messages now include `timestamp` as Unix milliseconds.
|
|
19
|
+
- Custom stream mappers must initialize with provider/API metadata and emit a final `:message_end` patch.
|
|
20
|
+
|
|
21
|
+
## 1. Replace legacy provider keys
|
|
22
|
+
|
|
23
|
+
0.6.0 removes the backward-compatible legacy provider registry entries.
|
|
24
|
+
|
|
25
|
+
| 0.5.0 provider key | 0.6.0 provider key |
|
|
26
|
+
|---|---|
|
|
27
|
+
| `anthropic_apikey_messages` | `anthropic_messages` |
|
|
28
|
+
| `anthropic_oauth_messages` | `anthropic_messages` |
|
|
29
|
+
| `openai_apikey_completions` | `openai_completions` |
|
|
30
|
+
| `openai_apikey_responses` | `openai_responses` |
|
|
31
|
+
| `openai_oauth_codex` | `openai_codex` |
|
|
32
|
+
| `groq_apikey_completions` | `groq_completions` |
|
|
33
|
+
|
|
34
|
+
### Before
|
|
35
|
+
|
|
36
|
+
```ruby
|
|
37
|
+
adapter = LlmGateway.build_provider(
|
|
38
|
+
provider: "openai_apikey_responses",
|
|
39
|
+
api_key: ENV.fetch("OPENAI_API_KEY"),
|
|
40
|
+
model_key: "gpt-5.4"
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### After
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
adapter = LlmGateway.build_provider(
|
|
48
|
+
provider: "openai_responses",
|
|
49
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
50
|
+
)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## 2. Move `model_key` from provider config to request calls
|
|
54
|
+
|
|
55
|
+
`model_key` is no longer a provider option. Passing it to `LlmGateway.build_provider` raises:
|
|
56
|
+
|
|
57
|
+
```text
|
|
58
|
+
ArgumentError: model_key is no longer a provider option; pass model: to chat/stream instead
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Pass `model:` on each request instead.
|
|
62
|
+
|
|
63
|
+
### Streaming
|
|
64
|
+
|
|
65
|
+
```ruby
|
|
66
|
+
# Before
|
|
67
|
+
adapter = LlmGateway.build_provider(
|
|
68
|
+
provider: "openai_apikey_responses",
|
|
69
|
+
api_key: ENV.fetch("OPENAI_API_KEY"),
|
|
70
|
+
model_key: "gpt-5.4"
|
|
71
|
+
)
|
|
72
|
+
result = adapter.stream("Write one short sentence about Ruby.")
|
|
73
|
+
|
|
74
|
+
# After
|
|
75
|
+
adapter = LlmGateway.build_provider(
|
|
76
|
+
provider: "openai_responses",
|
|
77
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
78
|
+
)
|
|
79
|
+
result = adapter.stream("Write one short sentence about Ruby.", model: "gpt-5.4")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Configure arrays
|
|
83
|
+
|
|
84
|
+
```ruby
|
|
85
|
+
# Before
|
|
86
|
+
LlmGateway.configure([
|
|
87
|
+
{
|
|
88
|
+
name: "primary",
|
|
89
|
+
config: {
|
|
90
|
+
provider: "groq_apikey_completions",
|
|
91
|
+
api_key: ENV.fetch("GROQ_API_KEY"),
|
|
92
|
+
model_key: "openai/gpt-oss-120b"
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
])
|
|
96
|
+
|
|
97
|
+
# After
|
|
98
|
+
LlmGateway.configure([
|
|
99
|
+
{
|
|
100
|
+
name: "primary",
|
|
101
|
+
config: {
|
|
102
|
+
provider: "groq_completions",
|
|
103
|
+
api_key: ENV.fetch("GROQ_API_KEY")
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
])
|
|
107
|
+
|
|
108
|
+
LlmGateway.configured_clients.fetch("primary").stream(
|
|
109
|
+
"Hello",
|
|
110
|
+
model: "openai/gpt-oss-120b"
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## 3. Update direct client usage
|
|
115
|
+
|
|
116
|
+
Direct clients no longer take `model_key:` in their constructors.
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
# Before
|
|
120
|
+
client = LlmGateway::Clients::OpenAI.new(
|
|
121
|
+
api_key: ENV.fetch("OPENAI_API_KEY"),
|
|
122
|
+
model_key: "gpt-5.4"
|
|
123
|
+
)
|
|
124
|
+
client.stream(messages)
|
|
125
|
+
|
|
126
|
+
# After
|
|
127
|
+
client = LlmGateway::Clients::OpenAI.new(
|
|
128
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
129
|
+
)
|
|
130
|
+
client.stream(messages, model: "gpt-5.4")
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
The same pattern applies to:
|
|
134
|
+
|
|
135
|
+
- `LlmGateway::Clients::Anthropic#chat` / `#stream`
|
|
136
|
+
- `LlmGateway::Clients::OpenAI#chat` / `#stream` / `#responses` / `#stream_responses`
|
|
137
|
+
- `LlmGateway::Clients::OpenAI#chat_codex` / `#stream_codex`
|
|
138
|
+
- `LlmGateway::Clients::Groq#chat` / `#stream`
|
|
139
|
+
|
|
140
|
+
Embeddings also take a per-call model:
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
client.generate_embeddings(input, model: "text-embedding-3-large")
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
If omitted, clients still provide default models.
|
|
147
|
+
|
|
148
|
+
## 4. Update `LlmGateway::Prompt` classes
|
|
149
|
+
|
|
150
|
+
`Prompt` no longer looks up a configured client by comparing a string to `client.model_key`. It now keeps the provider and model as separate values.
|
|
151
|
+
|
|
152
|
+
If you previously called `Prompt.new("gpt-5.4")`, update that code. The first initializer argument is now a provider adapter, not a model lookup key. Configure a provider on the class or pass one to the initializer.
|
|
153
|
+
|
|
154
|
+
### Class-level configuration
|
|
155
|
+
|
|
156
|
+
```ruby
|
|
157
|
+
class SummaryPrompt < LlmGateway::Prompt
|
|
158
|
+
self.provider = LlmGateway.build_provider(
|
|
159
|
+
provider: "openai_responses",
|
|
160
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
161
|
+
)
|
|
162
|
+
self.model = "gpt-5.4"
|
|
163
|
+
|
|
164
|
+
def prompt
|
|
165
|
+
"Summarize this text."
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
SummaryPrompt.new.run
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
### Instance-level configuration
|
|
174
|
+
|
|
175
|
+
```ruby
|
|
176
|
+
provider = LlmGateway.build_provider(
|
|
177
|
+
provider: "anthropic_messages",
|
|
178
|
+
api_key: ENV.fetch("ANTHROPIC_API_KEY")
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
SummaryPrompt.new(provider, "claude-sonnet-4-20250514").run
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Per-call overrides
|
|
185
|
+
|
|
186
|
+
```ruby
|
|
187
|
+
prompt = SummaryPrompt.new(default_provider, "gpt-5.1")
|
|
188
|
+
|
|
189
|
+
prompt.stream(
|
|
190
|
+
provider: other_provider,
|
|
191
|
+
model: "gpt-5.4",
|
|
192
|
+
reasoning: "high"
|
|
193
|
+
)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
If you subclassed `Prompt` and called or overrode `post`, migrate that code to `stream`. `run` now calls `stream` internally.
|
|
197
|
+
|
|
198
|
+
## 5. Stop using `client.model_key`
|
|
199
|
+
|
|
200
|
+
Direct clients no longer expose a `model_key` reader because model selection is no longer client/provider state.
|
|
201
|
+
|
|
202
|
+
```ruby
|
|
203
|
+
# Before
|
|
204
|
+
client = LlmGateway::Clients::OpenAI.new(
|
|
205
|
+
api_key: ENV.fetch("OPENAI_API_KEY"),
|
|
206
|
+
model_key: "gpt-5.4"
|
|
207
|
+
)
|
|
208
|
+
puts client.model_key
|
|
209
|
+
|
|
210
|
+
# After
|
|
211
|
+
client = LlmGateway::Clients::OpenAI.new(
|
|
212
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
213
|
+
)
|
|
214
|
+
model = "gpt-5.4"
|
|
215
|
+
result = client.stream(messages, model: model)
|
|
216
|
+
# Track `model` at the call site when you need it later.
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## 6. OAuth provider names
|
|
220
|
+
|
|
221
|
+
OAuth is now represented by credentials, not by separate legacy provider keys.
|
|
222
|
+
|
|
223
|
+
```ruby
|
|
224
|
+
# Before
|
|
225
|
+
adapter = LlmGateway.build_provider(
|
|
226
|
+
provider: "openai_oauth_codex",
|
|
227
|
+
access_token: current_access_token,
|
|
228
|
+
model_key: "gpt-5.4"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# After
|
|
232
|
+
adapter = LlmGateway.build_provider(
|
|
233
|
+
provider: "openai_codex",
|
|
234
|
+
access_token: current_access_token
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
adapter.stream("Hello from OAuth auth", model: "gpt-5.4")
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
For Anthropic OAuth, use `provider: "anthropic_messages"` with an `access_token`.
|
|
241
|
+
|
|
242
|
+
## 7. Update stream callback handling
|
|
243
|
+
|
|
244
|
+
The final `:message_end` stream callback event changed shape.
|
|
245
|
+
|
|
246
|
+
In 0.5.x, `:message_end` was an `AssistantStreamMessageEvent` and exposed the accumulated message through `event.partial`.
|
|
247
|
+
|
|
248
|
+
In 0.6.0, `:message_end` is an `AssistantStreamMessageEndEvent` and exposes the final complete `AssistantMessage` through `event.message`. It does not expose `partial`.
|
|
249
|
+
|
|
250
|
+
```ruby
|
|
251
|
+
response = adapter.stream("Hello", model: "gpt-5.4") do |event|
|
|
252
|
+
case event.type
|
|
253
|
+
when :text_delta
|
|
254
|
+
print event.delta
|
|
255
|
+
when :message_end
|
|
256
|
+
final_message = event.message
|
|
257
|
+
puts final_message.provider
|
|
258
|
+
puts final_message.api
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# The stream return value is the same final AssistantMessage.
|
|
263
|
+
response # => AssistantMessage
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
If you previously handled every event as if it had `partial`, branch on `event.type == :message_end` first or check `respond_to?(:partial)`.
|
|
267
|
+
|
|
268
|
+
```ruby
|
|
269
|
+
adapter.stream("Hello", model: "gpt-5.4") do |event|
|
|
270
|
+
if event.type == :message_end
|
|
271
|
+
persist(event.message.to_h)
|
|
272
|
+
elsif event.respond_to?(:partial)
|
|
273
|
+
update_ui(event.partial)
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## 8. Update usage accounting keys
|
|
279
|
+
|
|
280
|
+
Normalized `AssistantMessage#usage` and final stream `event.usage` patches now use provider-independent concise keys plus `:raw` for the original provider usage/token payload:
|
|
281
|
+
|
|
282
|
+
| 0.5.x key | 0.6.0 key |
|
|
283
|
+
|---|---|
|
|
284
|
+
| `:input_tokens` | `:input` |
|
|
285
|
+
| `:cache_creation_input_tokens` | `:cache_write` |
|
|
286
|
+
| `:cache_read_input_tokens` | `:cache_read` |
|
|
287
|
+
| `:output_tokens` | `:output` |
|
|
288
|
+
| computed normalized total | `:total` |
|
|
289
|
+
| original provider usage payload | `:raw` |
|
|
290
|
+
| `:reasoning_tokens` | removed |
|
|
291
|
+
|
|
292
|
+
`reasoning_tokens` was removed because providers expose and calculate reasoning token counts inconsistently. Use the streamed/final `ReasoningContent` blocks for reasoning text, and treat usage as the normalized token buckets above.
|
|
293
|
+
|
|
294
|
+
```ruby
|
|
295
|
+
# Before
|
|
296
|
+
result.usage[:input_tokens]
|
|
297
|
+
result.usage[:cache_read_input_tokens]
|
|
298
|
+
result.usage[:output_tokens]
|
|
299
|
+
|
|
300
|
+
# After
|
|
301
|
+
result.usage[:input]
|
|
302
|
+
result.usage[:cache_read]
|
|
303
|
+
result.usage[:output]
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
When checking cache behavior, use `usage[:cache_read]` and `usage[:cache_write]`. `usage[:total]` is computed as `input + cache_write + cache_read + output`. Use `usage[:raw]` when you need provider-specific token fields that are not part of the normalized counters.
|
|
307
|
+
|
|
308
|
+
## 9. Account for timestamps on streamed messages
|
|
309
|
+
|
|
310
|
+
`PartialAssistantMessage` and `AssistantMessage` now include a `timestamp` field in Unix milliseconds. Provider-supplied timestamps are preserved when available; otherwise the accumulator assigns one.
|
|
311
|
+
|
|
312
|
+
```ruby
|
|
313
|
+
response = adapter.stream("Hello", model: "gpt-5.4") do |event|
|
|
314
|
+
puts event.partial.timestamp if event.respond_to?(:partial)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
puts response.timestamp
|
|
318
|
+
puts response.to_h[:timestamp]
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
If you instantiate `PartialAssistantMessage` or `AssistantMessage` directly in tests or custom integrations, include `timestamp:`.
|
|
322
|
+
|
|
323
|
+
## 10. Update custom stream mappers
|
|
324
|
+
|
|
325
|
+
If you implemented a custom adapter or stream mapper, update it for the new final-message flow.
|
|
326
|
+
|
|
327
|
+
`LlmGateway::Adapters::StreamMapper` now requires provider/API metadata:
|
|
328
|
+
|
|
329
|
+
```ruby
|
|
330
|
+
mapper = MyStreamMapper.new(provider: "openai", api: "responses")
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
`Adapter#stream` passes these values automatically when it instantiates the configured mapper, but direct mapper construction and custom initializers must accept/pass these keywords.
|
|
334
|
+
|
|
335
|
+
Custom mappers must also push a final normalized end patch. Use the normalized usage keys shown above for final `usage`.
|
|
336
|
+
|
|
337
|
+
```ruby
|
|
338
|
+
push_patches([
|
|
339
|
+
{ type: :message_delta, delta: { stop_reason: "stop" }, usage: { output: 12 } },
|
|
340
|
+
{ type: :message_end }
|
|
341
|
+
], &block)
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
`StreamMapper#result` now returns the final `AssistantMessage` created by the `:message_end` patch. If a custom mapper never emits `:message_end`, `adapter.stream` will not have a final message to return.
|
|
345
|
+
|
|
346
|
+
## 11. Cross-provider handoff note
|
|
347
|
+
|
|
348
|
+
Message sanitization for cross-provider/model handoffs now receives the target model from the request options. When replaying or handing off transcripts across providers/models, pass `model:` explicitly on the destination call so model-specific sanitizer behavior can run.
|
|
349
|
+
|
|
350
|
+
```ruby
|
|
351
|
+
next_response = target_adapter.stream(
|
|
352
|
+
transcript_from_another_provider,
|
|
353
|
+
model: "gpt-5.4"
|
|
354
|
+
)
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
## 12. Stream event hash snapshots
|
|
358
|
+
|
|
359
|
+
Non-final stream events now expose a `partial` assistant message, so `event.to_h` includes an additional `partial` field.
|
|
360
|
+
|
|
361
|
+
This is additive for normal stream callback consumers:
|
|
362
|
+
|
|
363
|
+
```ruby
|
|
364
|
+
adapter.stream("Hello", model: "gpt-5.4") do |event|
|
|
365
|
+
puts event.type
|
|
366
|
+
puts event.delta if event.respond_to?(:delta)
|
|
367
|
+
end
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
If your tests or application code compare full `event.to_h` hashes or snapshot serialized events, update those expectations to include or ignore `partial`.
|
|
371
|
+
|
|
372
|
+
## Checklist
|
|
373
|
+
|
|
374
|
+
- [ ] Replace all legacy provider keys with the new provider keys.
|
|
375
|
+
- [ ] Remove `model_key:` from `build_provider`, `configure`, and direct client constructors.
|
|
376
|
+
- [ ] Remove any direct reads of `client.model_key` / `adapter.client.model_key`.
|
|
377
|
+
- [ ] Add `model:` to `chat`, `stream`, Responses/Codex, and embeddings calls where you need a specific model.
|
|
378
|
+
- [ ] Update `Prompt` subclasses to configure `provider` and `model` separately.
|
|
379
|
+
- [ ] Replace `Prompt.new("model-key")` model lookup usage with explicit provider/model configuration.
|
|
380
|
+
- [ ] Replace custom `Prompt#post` usage with `Prompt#stream`.
|
|
381
|
+
- [ ] Update stream callbacks to read `event.message` for `:message_end` and `event.partial` only for non-final events.
|
|
382
|
+
- [ ] Rename normalized usage lookups to `:input`, `:cache_write`, `:cache_read`, `:output`, and `:total`; use `:raw` for provider-specific token fields; remove `:reasoning_tokens` handling.
|
|
383
|
+
- [ ] Include/read `timestamp` on streamed partial and final assistant messages where you construct or persist those objects.
|
|
384
|
+
- [ ] Update custom stream mappers to accept `provider:` / `api:`, emit normalized usage keys, and emit `{ type: :message_end }`.
|
|
385
|
+
- [ ] For cross-provider handoffs, pass the target `model:` explicitly.
|
|
386
|
+
- [ ] Update strict `event.to_h` stream event snapshots/comparisons for the new `partial` field.
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# Migration guide: 0.7.0
|
|
2
|
+
|
|
3
|
+
This release refactors `LlmGateway::Prompt` around the normalized streaming response model and adds first-class prompt-owned tool loops.
|
|
4
|
+
|
|
5
|
+
## Breaking changes
|
|
6
|
+
|
|
7
|
+
### `Prompt.new` uses keyword arguments
|
|
8
|
+
|
|
9
|
+
Prompt instance configuration is now keyword-only:
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
# Before
|
|
13
|
+
SummaryPrompt.new(provider, "claude-sonnet-4-20250514").run
|
|
14
|
+
|
|
15
|
+
# After
|
|
16
|
+
SummaryPrompt.new(
|
|
17
|
+
provider: provider,
|
|
18
|
+
model: "claude-sonnet-4-20250514"
|
|
19
|
+
).run
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
The same applies when overriding class defaults for `reasoning`, `cache_key`, or `cache_retention`.
|
|
23
|
+
|
|
24
|
+
Class-level prompt defaults should be assigned with writer methods:
|
|
25
|
+
|
|
26
|
+
```ruby
|
|
27
|
+
class SummaryPrompt < LlmGateway::Prompt
|
|
28
|
+
self.provider = provider
|
|
29
|
+
self.model = "gpt-5.4"
|
|
30
|
+
self.reasoning = "medium"
|
|
31
|
+
end
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
If you used the older setter-style calls (`provider value` or `model value`) in prompt subclasses, switch to `self.provider = value` / `self.model = value`.
|
|
35
|
+
|
|
36
|
+
### `Prompt#run` uses `stream` and normalized `AssistantMessage`
|
|
37
|
+
|
|
38
|
+
`run` now calls the configured provider's `stream` method and expects it to return a normalized `LlmGateway::AssistantMessage` with `content` blocks.
|
|
39
|
+
|
|
40
|
+
If you use test doubles or custom providers with `Prompt`, update them from hash-like chat responses:
|
|
41
|
+
|
|
42
|
+
```ruby
|
|
43
|
+
# Before
|
|
44
|
+
{ choices: [ { content: "hello" } ] }
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
To `AssistantMessage` responses:
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
LlmGateway::AssistantMessage.new(
|
|
51
|
+
id: "msg_123",
|
|
52
|
+
model: "gpt-5.4",
|
|
53
|
+
role: "assistant",
|
|
54
|
+
stop_reason: "stop",
|
|
55
|
+
provider: "openai",
|
|
56
|
+
api: "responses",
|
|
57
|
+
timestamp: Time.now.to_i,
|
|
58
|
+
usage: {},
|
|
59
|
+
content: [ { type: "text", text: "hello" } ]
|
|
60
|
+
)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
`run` returns the final normalized `AssistantMessage` after tool handling is complete. It no longer extracts or concatenates text content for you; inspect `response.content` when you need text or other blocks.
|
|
64
|
+
|
|
65
|
+
`after_execute` callbacks now receive only the final `AssistantMessage` instead of both the message and extracted text.
|
|
66
|
+
|
|
67
|
+
Prompt callback storage now uses Rails-style `class_attribute` inheritance. Register callbacks with `before_execute` / `after_execute` or assign a duplicated callback array on the subclass; avoid mutating inherited callback arrays directly with `before_execute_callbacks << ...` because that can affect related classes.
|
|
68
|
+
|
|
69
|
+
### `extract_response` and `parse_response` hooks were removed
|
|
70
|
+
|
|
71
|
+
`Prompt#run` no longer calls custom `extract_response` or `parse_response` methods.
|
|
72
|
+
|
|
73
|
+
Move response transformation outside the prompt call, or wrap `run` in your subclass:
|
|
74
|
+
|
|
75
|
+
```ruby
|
|
76
|
+
class JsonPrompt < LlmGateway::Prompt
|
|
77
|
+
def prompt
|
|
78
|
+
"Return JSON."
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def run_json(**options)
|
|
82
|
+
response = run(**options)
|
|
83
|
+
text = response.content.select { |block| block.type == "text" }.map(&:text).join
|
|
84
|
+
JSON.parse(text)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Tools are declared with `TOOLS`
|
|
90
|
+
|
|
91
|
+
Prompt tools are now class-level tool classes declared in a `TOOLS` constant. `Prompt#tools` returns their provider definitions.
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
class AddTool < LlmGateway::Tool
|
|
95
|
+
name "add"
|
|
96
|
+
description "Adds two numbers"
|
|
97
|
+
input_schema(type: "object")
|
|
98
|
+
cache true # optional cache_control marker where supported
|
|
99
|
+
|
|
100
|
+
def execute(input)
|
|
101
|
+
input[:left] + input[:right]
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
class MathPrompt < LlmGateway::Prompt
|
|
106
|
+
TOOLS = [AddTool].freeze
|
|
107
|
+
|
|
108
|
+
def prompt
|
|
109
|
+
"What is 2 + 3? Use the add tool."
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
If a prompt has no tools, `tools` now returns `[]` instead of `nil`.
|
|
115
|
+
|
|
116
|
+
### `run` automatically loops over tool calls
|
|
117
|
+
|
|
118
|
+
When the assistant returns `tool_use` content blocks, `Prompt#run` now:
|
|
119
|
+
|
|
120
|
+
1. Finds the matching class in `TOOLS` by tool name.
|
|
121
|
+
2. Executes `tool_class.new.execute(input)`.
|
|
122
|
+
3. Appends the assistant message and a user `tool_result` message.
|
|
123
|
+
4. Calls `stream` again.
|
|
124
|
+
5. Repeats until the response has no `tool_use` blocks.
|
|
125
|
+
|
|
126
|
+
Unknown tools and tool execution errors are returned to the model as `tool_result` content rather than raised.
|
|
127
|
+
|
|
128
|
+
### Prompt input is resolved once per run
|
|
129
|
+
|
|
130
|
+
`prompt` is evaluated once at the start of `run`. The same initial input is used when building follow-up messages for tool results, so dynamic or expensive prompt builders are not re-evaluated during a single run.
|
|
131
|
+
|
|
132
|
+
### `Prompt#stream` accepts explicit input and forwards reasoning/cache options
|
|
133
|
+
|
|
134
|
+
`stream` now has this signature:
|
|
135
|
+
|
|
136
|
+
```ruby
|
|
137
|
+
stream(input = prompt, provider: nil, model: nil, reasoning: nil, **options, &block)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
You can still call `stream` with no input, but subclasses or callers can now provide a transcript directly:
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
prompt.stream([{ role: "user", content: "Hello" }], model: "gpt-5.4")
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
`Prompt` also now forwards `reasoning:` when configured on the class, instance, `run`, or `stream` call.
|
|
147
|
+
|
|
148
|
+
### Prompt-level cache options
|
|
149
|
+
|
|
150
|
+
Prompt instances accept and forward cache options:
|
|
151
|
+
|
|
152
|
+
```ruby
|
|
153
|
+
SummaryPrompt.new(
|
|
154
|
+
provider: provider,
|
|
155
|
+
model: "gpt-5.4",
|
|
156
|
+
cache_key: "summary-v1",
|
|
157
|
+
cache_retention: "short"
|
|
158
|
+
).run
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
These are passed to providers as managed `cache_key` / `cache_retention` stream options. For providers that support cache control on system/tool blocks, `cache_retention` may also apply cache metadata to the prompt-owned `system_prompt` and tool definitions. Tool classes can also opt into cache metadata with `cache true`.
|
|
162
|
+
|
|
163
|
+
### Stream callbacks may see server-tool events and content blocks
|
|
164
|
+
|
|
165
|
+
Provider-hosted tools (for example OpenAI code interpreter or Anthropic code execution) are normalized as distinct server-tool blocks:
|
|
166
|
+
|
|
167
|
+
- `server_tool_use`
|
|
168
|
+
- `server_tool_result`
|
|
169
|
+
- provider-specific `*_tool_result` blocks during streaming/finalization
|
|
170
|
+
|
|
171
|
+
Stream callbacks may now receive additional event types when server tools are used:
|
|
172
|
+
|
|
173
|
+
- `:tool_result_start`
|
|
174
|
+
- `:tool_result_delta`
|
|
175
|
+
- `:tool_result_end`
|
|
176
|
+
|
|
177
|
+
`tool_start` events also expose `event.tool_type`, which is either `"tool_use"` or `"server_tool_use"`.
|
|
178
|
+
|
|
179
|
+
If your stream handler exhaustively switches on event/content types, add fallbacks or handlers for these server-tool cases. Cross-provider handoff sanitization may convert server-tool blocks to regular `tool_use` / `tool_result` blocks when replaying transcripts on a different provider/API.
|
|
180
|
+
|
|
181
|
+
## Migration checklist
|
|
182
|
+
|
|
183
|
+
- [ ] Replace positional `Prompt.new(provider, model)` calls with `Prompt.new(provider: provider, model: model)`.
|
|
184
|
+
- [ ] Replace prompt class setter-style calls (`provider value`, `model value`) with `self.provider = value` / `self.model = value`.
|
|
185
|
+
- [ ] Update custom provider/test doubles used by `Prompt` to return `AssistantMessage`.
|
|
186
|
+
- [ ] Remove `extract_response` and `parse_response` hooks; inspect, parse, or transform the returned `AssistantMessage` after `run`.
|
|
187
|
+
- [ ] Update `after_execute` callbacks to accept the final `AssistantMessage` only.
|
|
188
|
+
- [ ] Replace direct mutations of `before_execute_callbacks` / `after_execute_callbacks` with the callback registration methods or explicit subclass assignments.
|
|
189
|
+
- [ ] Move prompt tool definitions to a `TOOLS = [ToolClass]` constant.
|
|
190
|
+
- [ ] Account for automatic tool-loop execution in `run`.
|
|
191
|
+
- [ ] Update any `tools.nil?` checks; no-tool prompts now expose `[]`.
|
|
192
|
+
- [ ] Use `cache_key:` / `cache_retention:` on prompt instances when prompt caching is needed.
|
|
193
|
+
- [ ] Add stream/content handling for server-tool event types if your callback code is exhaustive.
|
|
@@ -15,12 +15,15 @@ module LlmGateway
|
|
|
15
15
|
raise LlmGateway::Errors::MissingMapperForProvider, "No stream_mapper configured" unless stream_mapper
|
|
16
16
|
|
|
17
17
|
normalized_input = map_input({
|
|
18
|
-
messages: sanitize_messages(normalize_messages(message)),
|
|
18
|
+
messages: sanitize_messages(normalize_messages(message), target_model: options[:model]),
|
|
19
19
|
tools: tools,
|
|
20
20
|
system: normalize_system(system)
|
|
21
21
|
})
|
|
22
22
|
|
|
23
|
-
mapper = stream_mapper.new
|
|
23
|
+
mapper = stream_mapper.new(
|
|
24
|
+
provider: LlmGateway::Client.provider_id_from_client(client),
|
|
25
|
+
api: api_name
|
|
26
|
+
)
|
|
24
27
|
|
|
25
28
|
perform_stream(
|
|
26
29
|
normalized_input[:messages],
|
|
@@ -31,12 +34,7 @@ module LlmGateway
|
|
|
31
34
|
mapper.map(chunk, &block)
|
|
32
35
|
end
|
|
33
36
|
|
|
34
|
-
|
|
35
|
-
mapper.result.merge(
|
|
36
|
-
provider: LlmGateway::Client.provider_id_from_client(client),
|
|
37
|
-
api: api_name
|
|
38
|
-
)
|
|
39
|
-
)
|
|
37
|
+
mapper.result
|
|
40
38
|
end
|
|
41
39
|
|
|
42
40
|
def upload_file(filename:, content:, mime_type: "application/octet-stream", purpose: "assistants")
|
|
@@ -99,14 +97,13 @@ module LlmGateway
|
|
|
99
97
|
nil
|
|
100
98
|
end
|
|
101
99
|
|
|
102
|
-
def sanitize_messages(messages)
|
|
100
|
+
def sanitize_messages(messages, target_model: nil)
|
|
103
101
|
return messages unless input_sanitizer
|
|
104
102
|
|
|
105
103
|
target_provider = LlmGateway::Client.provider_id_from_client(client)
|
|
106
104
|
target_api = api_name
|
|
107
|
-
target_model = client.model_key
|
|
108
105
|
|
|
109
|
-
return messages
|
|
106
|
+
return messages unless target_provider.present? && target_api.present? && target_model.present?
|
|
110
107
|
|
|
111
108
|
input_sanitizer.sanitize(
|
|
112
109
|
messages,
|
|
@@ -26,6 +26,8 @@ module LlmGateway
|
|
|
26
26
|
map_tool_use_content(content)
|
|
27
27
|
when "tool_result"
|
|
28
28
|
map_tool_result_content(content)
|
|
29
|
+
when "server_tool_result"
|
|
30
|
+
map_server_tool_result_content(content)
|
|
29
31
|
when "thinking", "reasoning"
|
|
30
32
|
map_reasoning_content(content)
|
|
31
33
|
else
|
|
@@ -122,6 +124,28 @@ module LlmGateway
|
|
|
122
124
|
}
|
|
123
125
|
end
|
|
124
126
|
|
|
127
|
+
def map_server_tool_result_content(content)
|
|
128
|
+
{
|
|
129
|
+
type: native_server_tool_result_type(content),
|
|
130
|
+
tool_use_id: content[:tool_use_id],
|
|
131
|
+
content: content[:content]
|
|
132
|
+
}
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def native_server_tool_result_type(content)
|
|
136
|
+
return content[:name] if content[:name] && content[:name] != "server_tool_result"
|
|
137
|
+
|
|
138
|
+
result_type = content.dig(:content, :type)
|
|
139
|
+
case result_type
|
|
140
|
+
when "bash_code_execution_result"
|
|
141
|
+
"bash_code_execution_tool_result"
|
|
142
|
+
when /^text_editor_code_execution_.*_result$/
|
|
143
|
+
"text_editor_code_execution_tool_result"
|
|
144
|
+
else
|
|
145
|
+
content[:name] || "server_tool_result"
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
125
149
|
def map_reasoning_content(content)
|
|
126
150
|
result = {
|
|
127
151
|
type: "thinking",
|