llm_gateway 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/README.md +350 -43
  4. data/docs/migration_guide_0.6.0.md +386 -0
  5. data/docs/migration_guide_0.7.0.md +193 -0
  6. data/lib/llm_gateway/adapters/adapter.rb +8 -11
  7. data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +24 -0
  8. data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +61 -11
  9. data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +1 -1
  10. data/lib/llm_gateway/adapters/groq/option_mapper.rb +1 -1
  11. data/lib/llm_gateway/adapters/input_message_sanitizer.rb +98 -7
  12. data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +132 -39
  13. data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +1 -1
  14. data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +40 -16
  15. data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +47 -31
  16. data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +1 -1
  17. data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +173 -24
  18. data/lib/llm_gateway/adapters/stream_mapper.rb +9 -2
  19. data/lib/llm_gateway/adapters/structs.rb +140 -55
  20. data/lib/llm_gateway/agents/event.rb +105 -0
  21. data/lib/llm_gateway/agents/file_session_manager.rb +100 -0
  22. data/lib/llm_gateway/agents/harness.rb +176 -0
  23. data/lib/llm_gateway/agents/in_memory_session_manager.rb +222 -0
  24. data/lib/llm_gateway/agents/tools/bash_tool.rb +132 -0
  25. data/lib/llm_gateway/agents/tools/edit_tool.rb +215 -0
  26. data/lib/llm_gateway/agents/tools/read_tool.rb +143 -0
  27. data/lib/llm_gateway/agents/tools/tool_utils.rb +164 -0
  28. data/lib/llm_gateway/agents/tools/write_tool.rb +34 -0
  29. data/lib/llm_gateway/base_client.rb +5 -7
  30. data/lib/llm_gateway/clients/anthropic.rb +10 -9
  31. data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +2 -2
  32. data/lib/llm_gateway/clients/groq.rb +8 -6
  33. data/lib/llm_gateway/clients/openai.rb +22 -20
  34. data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +4 -4
  35. data/lib/llm_gateway/prompt.rb +107 -52
  36. data/lib/llm_gateway/utils.rb +116 -13
  37. data/lib/llm_gateway/version.rb +1 -1
  38. data/lib/llm_gateway.rb +7 -21
  39. metadata +13 -2
@@ -0,0 +1,386 @@
1
+ # Migration Guide: 0.5.0 to 0.6.0
2
+
3
+ This guide covers user-facing changes between `v0.5.0` and the latest commit on the 0.6.0 branch.
4
+
5
+ ## Summary
6
+
7
+ 0.6.0 separates provider authentication/configuration from model selection.
8
+
9
+ - Provider config now contains only provider/auth settings such as `provider`, `api_key`, `access_token`, and `account_id`.
10
+ - `model_key` is no longer accepted in provider/client configuration.
11
+ - Pass the model per request with `model:` when calling `chat`, `stream`, Responses/Codex methods, or embeddings.
12
+ - Legacy provider keys such as `openai_apikey_responses` were removed. Use the shorter provider keys.
13
+ - `LlmGateway::Prompt` now accepts/configures a provider and model separately, and uses `stream` internally.
14
+ - The `client.model_key` reader was removed; track the selected model at the call site or read it from returned messages.
15
+ - Streaming events now expose accumulated partial messages during the stream, while `:message_end` exposes the final message through `event.message`.
16
+ - Non-final stream event hashes now include `partial`; normal stream consumers are unaffected, but strict `event.to_h` snapshots/comparisons may need updates.
17
+ - Normalized usage counters were renamed to concise keys: `:input`, `:cache_write`, `:cache_read`, `:output`, and `:total`; `:reasoning_tokens` was removed.
18
+ - Streamed assistant messages now include `timestamp` as Unix milliseconds.
19
+ - Custom stream mappers must initialize with provider/API metadata and emit a final `:message_end` patch.
20
+
21
+ ## 1. Replace legacy provider keys
22
+
23
+ 0.6.0 removes the backward-compatible legacy provider registry entries.
24
+
25
+ | 0.5.0 provider key | 0.6.0 provider key |
26
+ |---|---|
27
+ | `anthropic_apikey_messages` | `anthropic_messages` |
28
+ | `anthropic_oauth_messages` | `anthropic_messages` |
29
+ | `openai_apikey_completions` | `openai_completions` |
30
+ | `openai_apikey_responses` | `openai_responses` |
31
+ | `openai_oauth_codex` | `openai_codex` |
32
+ | `groq_apikey_completions` | `groq_completions` |
33
+
34
+ ### Before
35
+
36
+ ```ruby
37
+ adapter = LlmGateway.build_provider(
38
+ provider: "openai_apikey_responses",
39
+ api_key: ENV.fetch("OPENAI_API_KEY"),
40
+ model_key: "gpt-5.4"
41
+ )
42
+ ```
43
+
44
+ ### After
45
+
46
+ ```ruby
47
+ adapter = LlmGateway.build_provider(
48
+ provider: "openai_responses",
49
+ api_key: ENV.fetch("OPENAI_API_KEY")
50
+ )
51
+ ```
52
+
53
+ ## 2. Move `model_key` from provider config to request calls
54
+
55
+ `model_key` is no longer a provider option. Passing it to `LlmGateway.build_provider` raises:
56
+
57
+ ```text
58
+ ArgumentError: model_key is no longer a provider option; pass model: to chat/stream instead
59
+ ```
60
+
61
+ Pass `model:` on each request instead.
62
+
63
+ ### Streaming
64
+
65
+ ```ruby
66
+ # Before
67
+ adapter = LlmGateway.build_provider(
68
+ provider: "openai_apikey_responses",
69
+ api_key: ENV.fetch("OPENAI_API_KEY"),
70
+ model_key: "gpt-5.4"
71
+ )
72
+ result = adapter.stream("Write one short sentence about Ruby.")
73
+
74
+ # After
75
+ adapter = LlmGateway.build_provider(
76
+ provider: "openai_responses",
77
+ api_key: ENV.fetch("OPENAI_API_KEY")
78
+ )
79
+ result = adapter.stream("Write one short sentence about Ruby.", model: "gpt-5.4")
80
+ ```
81
+
82
+ ### Configure arrays
83
+
84
+ ```ruby
85
+ # Before
86
+ LlmGateway.configure([
87
+ {
88
+ name: "primary",
89
+ config: {
90
+ provider: "groq_apikey_completions",
91
+ api_key: ENV.fetch("GROQ_API_KEY"),
92
+ model_key: "openai/gpt-oss-120b"
93
+ }
94
+ }
95
+ ])
96
+
97
+ # After
98
+ LlmGateway.configure([
99
+ {
100
+ name: "primary",
101
+ config: {
102
+ provider: "groq_completions",
103
+ api_key: ENV.fetch("GROQ_API_KEY")
104
+ }
105
+ }
106
+ ])
107
+
108
+ LlmGateway.configured_clients.fetch("primary").stream(
109
+ "Hello",
110
+ model: "openai/gpt-oss-120b"
111
+ )
112
+ ```
113
+
114
+ ## 3. Update direct client usage
115
+
116
+ Direct clients no longer take `model_key:` in their constructors.
117
+
118
+ ```ruby
119
+ # Before
120
+ client = LlmGateway::Clients::OpenAI.new(
121
+ api_key: ENV.fetch("OPENAI_API_KEY"),
122
+ model_key: "gpt-5.4"
123
+ )
124
+ client.stream(messages)
125
+
126
+ # After
127
+ client = LlmGateway::Clients::OpenAI.new(
128
+ api_key: ENV.fetch("OPENAI_API_KEY")
129
+ )
130
+ client.stream(messages, model: "gpt-5.4")
131
+ ```
132
+
133
+ The same pattern applies to:
134
+
135
+ - `LlmGateway::Clients::Anthropic#chat` / `#stream`
136
+ - `LlmGateway::Clients::OpenAI#chat` / `#stream` / `#responses` / `#stream_responses`
137
+ - `LlmGateway::Clients::OpenAI#chat_codex` / `#stream_codex`
138
+ - `LlmGateway::Clients::Groq#chat` / `#stream`
139
+
140
+ Embeddings also take a per-call model:
141
+
142
+ ```ruby
143
+ client.generate_embeddings(input, model: "text-embedding-3-large")
144
+ ```
145
+
146
+ If omitted, clients still provide default models.
147
+
148
+ ## 4. Update `LlmGateway::Prompt` classes
149
+
150
+ `Prompt` no longer looks up a configured client by comparing a string to `client.model_key`. It now keeps the provider and model as separate values.
151
+
152
+ If you previously called `Prompt.new("gpt-5.4")`, update that code. The first initializer argument is now a provider adapter, not a model lookup key. Configure a provider on the class or pass one to the initializer.
153
+
154
+ ### Class-level configuration
155
+
156
+ ```ruby
157
+ class SummaryPrompt < LlmGateway::Prompt
158
+ self.provider = LlmGateway.build_provider(
159
+ provider: "openai_responses",
160
+ api_key: ENV.fetch("OPENAI_API_KEY")
161
+ )
162
+ self.model = "gpt-5.4"
163
+
164
+ def prompt
165
+ "Summarize this text."
166
+ end
167
+ end
168
+
169
+ SummaryPrompt.new.run
170
+ ```
171
+
172
+
173
+ ### Instance-level configuration
174
+
175
+ ```ruby
176
+ provider = LlmGateway.build_provider(
177
+ provider: "anthropic_messages",
178
+ api_key: ENV.fetch("ANTHROPIC_API_KEY")
179
+ )
180
+
181
+ SummaryPrompt.new(provider, "claude-sonnet-4-20250514").run
182
+ ```
183
+
184
+ ### Per-call overrides
185
+
186
+ ```ruby
187
+ prompt = SummaryPrompt.new(default_provider, "gpt-5.1")
188
+
189
+ prompt.stream(
190
+ provider: other_provider,
191
+ model: "gpt-5.4",
192
+ reasoning: "high"
193
+ )
194
+ ```
195
+
196
+ If you subclassed `Prompt` and called or overrode `post`, migrate that code to `stream`. `run` now calls `stream` internally.
197
+
198
+ ## 5. Stop using `client.model_key`
199
+
200
+ Direct clients no longer expose a `model_key` reader because model selection is no longer client/provider state.
201
+
202
+ ```ruby
203
+ # Before
204
+ client = LlmGateway::Clients::OpenAI.new(
205
+ api_key: ENV.fetch("OPENAI_API_KEY"),
206
+ model_key: "gpt-5.4"
207
+ )
208
+ puts client.model_key
209
+
210
+ # After
211
+ client = LlmGateway::Clients::OpenAI.new(
212
+ api_key: ENV.fetch("OPENAI_API_KEY")
213
+ )
214
+ model = "gpt-5.4"
215
+ result = client.stream(messages, model: model)
216
+ # Track `model` at the call site when you need it later.
217
+ ```
218
+
219
+ ## 6. OAuth provider names
220
+
221
+ OAuth is now represented by credentials, not by separate legacy provider keys.
222
+
223
+ ```ruby
224
+ # Before
225
+ adapter = LlmGateway.build_provider(
226
+ provider: "openai_oauth_codex",
227
+ access_token: current_access_token,
228
+ model_key: "gpt-5.4"
229
+ )
230
+
231
+ # After
232
+ adapter = LlmGateway.build_provider(
233
+ provider: "openai_codex",
234
+ access_token: current_access_token
235
+ )
236
+
237
+ adapter.stream("Hello from OAuth auth", model: "gpt-5.4")
238
+ ```
239
+
240
+ For Anthropic OAuth, use `provider: "anthropic_messages"` with an `access_token`.
241
+
242
+ ## 7. Update stream callback handling
243
+
244
+ The final `:message_end` stream callback event changed shape.
245
+
246
+ In 0.5.x, `:message_end` was an `AssistantStreamMessageEvent` and exposed the accumulated message through `event.partial`.
247
+
248
+ In 0.6.0, `:message_end` is an `AssistantStreamMessageEndEvent` and exposes the final complete `AssistantMessage` through `event.message`. It does not expose `partial`.
249
+
250
+ ```ruby
251
+ response = adapter.stream("Hello", model: "gpt-5.4") do |event|
252
+ case event.type
253
+ when :text_delta
254
+ print event.delta
255
+ when :message_end
256
+ final_message = event.message
257
+ puts final_message.provider
258
+ puts final_message.api
259
+ end
260
+ end
261
+
262
+ # The stream return value is the same final AssistantMessage.
263
+ response # => AssistantMessage
264
+ ```
265
+
266
+ If you previously handled every event as if it had `partial`, branch on `event.type == :message_end` first or check `respond_to?(:partial)`.
267
+
268
+ ```ruby
269
+ adapter.stream("Hello", model: "gpt-5.4") do |event|
270
+ if event.type == :message_end
271
+ persist(event.message.to_h)
272
+ elsif event.respond_to?(:partial)
273
+ update_ui(event.partial)
274
+ end
275
+ end
276
+ ```
277
+
278
+ ## 8. Update usage accounting keys
279
+
280
+ Normalized `AssistantMessage#usage` and final stream `event.usage` patches now use provider-independent concise keys plus `:raw` for the original provider usage/token payload:
281
+
282
+ | 0.5.x key | 0.6.0 key |
283
+ |---|---|
284
+ | `:input_tokens` | `:input` |
285
+ | `:cache_creation_input_tokens` | `:cache_write` |
286
+ | `:cache_read_input_tokens` | `:cache_read` |
287
+ | `:output_tokens` | `:output` |
288
+ | computed normalized total | `:total` |
289
+ | original provider usage payload | `:raw` |
290
+ | `:reasoning_tokens` | removed |
291
+
292
+ `reasoning_tokens` was removed because providers expose and calculate reasoning token counts inconsistently. Use the streamed/final `ReasoningContent` blocks for reasoning text, and treat usage as the normalized token buckets above.
293
+
294
+ ```ruby
295
+ # Before
296
+ result.usage[:input_tokens]
297
+ result.usage[:cache_read_input_tokens]
298
+ result.usage[:output_tokens]
299
+
300
+ # After
301
+ result.usage[:input]
302
+ result.usage[:cache_read]
303
+ result.usage[:output]
304
+ ```
305
+
306
+ When checking cache behavior, use `usage[:cache_read]` and `usage[:cache_write]`. `usage[:total]` is computed as `input + cache_write + cache_read + output`. Use `usage[:raw]` when you need provider-specific token fields that are not part of the normalized counters.
307
+
308
+ ## 9. Account for timestamps on streamed messages
309
+
310
+ `PartialAssistantMessage` and `AssistantMessage` now include a `timestamp` field in Unix milliseconds. Provider-supplied timestamps are preserved when available; otherwise the accumulator assigns one.
311
+
312
+ ```ruby
313
+ response = adapter.stream("Hello", model: "gpt-5.4") do |event|
314
+ puts event.partial.timestamp if event.respond_to?(:partial)
315
+ end
316
+
317
+ puts response.timestamp
318
+ puts response.to_h[:timestamp]
319
+ ```
320
+
321
+ If you instantiate `PartialAssistantMessage` or `AssistantMessage` directly in tests or custom integrations, include `timestamp:`.
322
+
323
+ ## 10. Update custom stream mappers
324
+
325
+ If you implemented a custom adapter or stream mapper, update it for the new final-message flow.
326
+
327
+ `LlmGateway::Adapters::StreamMapper` now requires provider/API metadata:
328
+
329
+ ```ruby
330
+ mapper = MyStreamMapper.new(provider: "openai", api: "responses")
331
+ ```
332
+
333
+ `Adapter#stream` passes these values automatically when it instantiates the configured mapper, but direct mapper construction and custom initializers must accept/pass these keywords.
334
+
335
+ Custom mappers must also push a final normalized end patch. Use the normalized usage keys shown above for final `usage`.
336
+
337
+ ```ruby
338
+ push_patches([
339
+ { type: :message_delta, delta: { stop_reason: "stop" }, usage: { output: 12 } },
340
+ { type: :message_end }
341
+ ], &block)
342
+ ```
343
+
344
+ `StreamMapper#result` now returns the final `AssistantMessage` created by the `:message_end` patch. If a custom mapper never emits `:message_end`, `adapter.stream` will not have a final message to return.
345
+
346
+ ## 11. Cross-provider handoff note
347
+
348
+ Message sanitization for cross-provider/model handoffs now receives the target model from the request options. When replaying or handing off transcripts across providers/models, pass `model:` explicitly on the destination call so model-specific sanitizer behavior can run.
349
+
350
+ ```ruby
351
+ next_response = target_adapter.stream(
352
+ transcript_from_another_provider,
353
+ model: "gpt-5.4"
354
+ )
355
+ ```
356
+
357
+ ## 12. Stream event hash snapshots
358
+
359
+ Non-final stream events now expose a `partial` assistant message, so `event.to_h` includes an additional `partial` field.
360
+
361
+ This is additive for normal stream callback consumers:
362
+
363
+ ```ruby
364
+ adapter.stream("Hello", model: "gpt-5.4") do |event|
365
+ puts event.type
366
+ puts event.delta if event.respond_to?(:delta)
367
+ end
368
+ ```
369
+
370
+ If your tests or application code compare full `event.to_h` hashes or snapshot serialized events, update those expectations to include or ignore `partial`.
371
+
372
+ ## Checklist
373
+
374
+ - [ ] Replace all legacy provider keys with the new provider keys.
375
+ - [ ] Remove `model_key:` from `build_provider`, `configure`, and direct client constructors.
376
+ - [ ] Remove any direct reads of `client.model_key` / `adapter.client.model_key`.
377
+ - [ ] Add `model:` to `chat`, `stream`, Responses/Codex, and embeddings calls where you need a specific model.
378
+ - [ ] Update `Prompt` subclasses to configure `provider` and `model` separately.
379
+ - [ ] Replace `Prompt.new("model-key")` model lookup usage with explicit provider/model configuration.
380
+ - [ ] Replace custom `Prompt#post` usage with `Prompt#stream`.
381
+ - [ ] Update stream callbacks to read `event.message` for `:message_end` and `event.partial` only for non-final events.
382
+ - [ ] Rename normalized usage lookups to `:input`, `:cache_write`, `:cache_read`, `:output`, and `:total`; use `:raw` for provider-specific token fields; remove `:reasoning_tokens` handling.
383
+ - [ ] Include/read `timestamp` on streamed partial and final assistant messages where you construct or persist those objects.
384
+ - [ ] Update custom stream mappers to accept `provider:` / `api:`, emit normalized usage keys, and emit `{ type: :message_end }`.
385
+ - [ ] For cross-provider handoffs, pass the target `model:` explicitly.
386
+ - [ ] Update strict `event.to_h` stream event snapshots/comparisons for the new `partial` field.
@@ -0,0 +1,193 @@
1
+ # Migration guide: 0.7.0
2
+
3
+ This release refactors `LlmGateway::Prompt` around the normalized streaming response model and adds first-class prompt-owned tool loops.
4
+
5
+ ## Breaking changes
6
+
7
+ ### `Prompt.new` uses keyword arguments
8
+
9
+ Prompt instance configuration is now keyword-only:
10
+
11
+ ```ruby
12
+ # Before
13
+ SummaryPrompt.new(provider, "claude-sonnet-4-20250514").run
14
+
15
+ # After
16
+ SummaryPrompt.new(
17
+ provider: provider,
18
+ model: "claude-sonnet-4-20250514"
19
+ ).run
20
+ ```
21
+
22
+ The same applies when overriding class defaults for `reasoning`, `cache_key`, or `cache_retention`.
23
+
24
+ Class-level prompt defaults should be assigned with writer methods:
25
+
26
+ ```ruby
27
+ class SummaryPrompt < LlmGateway::Prompt
28
+ self.provider = provider
29
+ self.model = "gpt-5.4"
30
+ self.reasoning = "medium"
31
+ end
32
+ ```
33
+
34
+ If you used the older setter-style calls (`provider value` or `model value`) in prompt subclasses, switch to `self.provider = value` / `self.model = value`.
35
+
36
+ ### `Prompt#run` uses `stream` and normalized `AssistantMessage`
37
+
38
+ `run` now calls the configured provider's `stream` method and expects it to return a normalized `LlmGateway::AssistantMessage` with `content` blocks.
39
+
40
+ If you use test doubles or custom providers with `Prompt`, update them from hash-like chat responses:
41
+
42
+ ```ruby
43
+ # Before
44
+ { choices: [ { content: "hello" } ] }
45
+ ```
46
+
47
+ To `AssistantMessage` responses:
48
+
49
+ ```ruby
50
+ LlmGateway::AssistantMessage.new(
51
+ id: "msg_123",
52
+ model: "gpt-5.4",
53
+ role: "assistant",
54
+ stop_reason: "stop",
55
+ provider: "openai",
56
+ api: "responses",
57
+ timestamp: Time.now.to_i,
58
+ usage: {},
59
+ content: [ { type: "text", text: "hello" } ]
60
+ )
61
+ ```
62
+
63
+ `run` returns the final normalized `AssistantMessage` after tool handling is complete. It no longer extracts or concatenates text content for you; inspect `response.content` when you need text or other blocks.
64
+
65
+ `after_execute` callbacks now receive only the final `AssistantMessage` instead of both the message and extracted text.
66
+
67
+ Prompt callback storage now uses Rails-style `class_attribute` inheritance. Register callbacks with `before_execute` / `after_execute` or assign a duplicated callback array on the subclass; avoid mutating inherited callback arrays directly with `before_execute_callbacks << ...` because that can affect related classes.
68
+
69
+ ### `extract_response` and `parse_response` hooks were removed
70
+
71
+ `Prompt#run` no longer calls custom `extract_response` or `parse_response` methods.
72
+
73
+ Move response transformation outside the prompt call, or wrap `run` in your subclass:
74
+
75
+ ```ruby
76
+ class JsonPrompt < LlmGateway::Prompt
77
+ def prompt
78
+ "Return JSON."
79
+ end
80
+
81
+ def run_json(**options)
82
+ response = run(**options)
83
+ text = response.content.select { |block| block.type == "text" }.map(&:text).join
84
+ JSON.parse(text)
85
+ end
86
+ end
87
+ ```
88
+
89
+ ### Tools are declared with `TOOLS`
90
+
91
+ Prompt tools are now class-level tool classes declared in a `TOOLS` constant. `Prompt#tools` returns their provider definitions.
92
+
93
+ ```ruby
94
+ class AddTool < LlmGateway::Tool
95
+ name "add"
96
+ description "Adds two numbers"
97
+ input_schema(type: "object")
98
+ cache true # optional cache_control marker where supported
99
+
100
+ def execute(input)
101
+ input[:left] + input[:right]
102
+ end
103
+ end
104
+
105
+ class MathPrompt < LlmGateway::Prompt
106
+ TOOLS = [AddTool].freeze
107
+
108
+ def prompt
109
+ "What is 2 + 3? Use the add tool."
110
+ end
111
+ end
112
+ ```
113
+
114
+ If a prompt has no tools, `tools` now returns `[]` instead of `nil`.
115
+
116
+ ### `run` automatically loops over tool calls
117
+
118
+ When the assistant returns `tool_use` content blocks, `Prompt#run` now:
119
+
120
+ 1. Finds the matching class in `TOOLS` by tool name.
121
+ 2. Executes `tool_class.new.execute(input)`.
122
+ 3. Appends the assistant message and a user `tool_result` message.
123
+ 4. Calls `stream` again.
124
+ 5. Repeats until the response has no `tool_use` blocks.
125
+
126
+ Unknown tools and tool execution errors are returned to the model as `tool_result` content rather than raised.
127
+
128
+ ### Prompt input is resolved once per run
129
+
130
+ `prompt` is evaluated once at the start of `run`. The same initial input is used when building follow-up messages for tool results, so dynamic or expensive prompt builders are not re-evaluated during a single run.
131
+
132
+ ### `Prompt#stream` accepts explicit input and forwards reasoning/cache options
133
+
134
+ `stream` now has this signature:
135
+
136
+ ```ruby
137
+ stream(input = prompt, provider: nil, model: nil, reasoning: nil, **options, &block)
138
+ ```
139
+
140
+ You can still call `stream` with no input, but subclasses or callers can now provide a transcript directly:
141
+
142
+ ```ruby
143
+ prompt.stream([{ role: "user", content: "Hello" }], model: "gpt-5.4")
144
+ ```
145
+
146
+ `Prompt` also now forwards `reasoning:` when configured on the class, instance, `run`, or `stream` call.
147
+
148
+ ### Prompt-level cache options
149
+
150
+ Prompt instances accept and forward cache options:
151
+
152
+ ```ruby
153
+ SummaryPrompt.new(
154
+ provider: provider,
155
+ model: "gpt-5.4",
156
+ cache_key: "summary-v1",
157
+ cache_retention: "short"
158
+ ).run
159
+ ```
160
+
161
+ These are passed to providers as managed `cache_key` / `cache_retention` stream options. For providers that support cache control on system/tool blocks, `cache_retention` may also apply cache metadata to the prompt-owned `system_prompt` and tool definitions. Tool classes can also opt into cache metadata with `cache true`.
162
+
163
+ ### Stream callbacks may see server-tool events and content blocks
164
+
165
+ Provider-hosted tools (for example OpenAI code interpreter or Anthropic code execution) are normalized as distinct server-tool blocks:
166
+
167
+ - `server_tool_use`
168
+ - `server_tool_result`
169
+ - provider-specific `*_tool_result` blocks during streaming/finalization
170
+
171
+ Stream callbacks may now receive additional event types when server tools are used:
172
+
173
+ - `:tool_result_start`
174
+ - `:tool_result_delta`
175
+ - `:tool_result_end`
176
+
177
+ `tool_start` events also expose `event.tool_type`, which is either `"tool_use"` or `"server_tool_use"`.
178
+
179
+ If your stream handler exhaustively switches on event/content types, add fallbacks or handlers for these server-tool cases. Cross-provider handoff sanitization may convert server-tool blocks to regular `tool_use` / `tool_result` blocks when replaying transcripts on a different provider/API.
180
+
181
+ ## Migration checklist
182
+
183
+ - [ ] Replace positional `Prompt.new(provider, model)` calls with `Prompt.new(provider: provider, model: model)`.
184
+ - [ ] Replace prompt class setter-style calls (`provider value`, `model value`) with `self.provider = value` / `self.model = value`.
185
+ - [ ] Update custom provider/test doubles used by `Prompt` to return `AssistantMessage`.
186
+ - [ ] Remove `extract_response` and `parse_response` hooks; inspect, parse, or transform the returned `AssistantMessage` after `run`.
187
+ - [ ] Update `after_execute` callbacks to accept the final `AssistantMessage` only.
188
+ - [ ] Replace direct mutations of `before_execute_callbacks` / `after_execute_callbacks` with the callback registration methods or explicit subclass assignments.
189
+ - [ ] Move prompt tool definitions to a `TOOLS = [ToolClass]` constant.
190
+ - [ ] Account for automatic tool-loop execution in `run`.
191
+ - [ ] Update any `tools.nil?` checks; no-tool prompts now expose `[]`.
192
+ - [ ] Use `cache_key:` / `cache_retention:` on prompt instances when prompt caching is needed.
193
+ - [ ] Add stream/content handling for server-tool event types if your callback code is exhaustive.
@@ -15,12 +15,15 @@ module LlmGateway
15
15
  raise LlmGateway::Errors::MissingMapperForProvider, "No stream_mapper configured" unless stream_mapper
16
16
 
17
17
  normalized_input = map_input({
18
- messages: sanitize_messages(normalize_messages(message)),
18
+ messages: sanitize_messages(normalize_messages(message), target_model: options[:model]),
19
19
  tools: tools,
20
20
  system: normalize_system(system)
21
21
  })
22
22
 
23
- mapper = stream_mapper.new
23
+ mapper = stream_mapper.new(
24
+ provider: LlmGateway::Client.provider_id_from_client(client),
25
+ api: api_name
26
+ )
24
27
 
25
28
  perform_stream(
26
29
  normalized_input[:messages],
@@ -31,12 +34,7 @@ module LlmGateway
31
34
  mapper.map(chunk, &block)
32
35
  end
33
36
 
34
- AssistantMessage.new(
35
- mapper.result.merge(
36
- provider: LlmGateway::Client.provider_id_from_client(client),
37
- api: api_name
38
- )
39
- )
37
+ mapper.result
40
38
  end
41
39
 
42
40
  def upload_file(filename:, content:, mime_type: "application/octet-stream", purpose: "assistants")
@@ -99,14 +97,13 @@ module LlmGateway
99
97
  nil
100
98
  end
101
99
 
102
- def sanitize_messages(messages)
100
+ def sanitize_messages(messages, target_model: nil)
103
101
  return messages unless input_sanitizer
104
102
 
105
103
  target_provider = LlmGateway::Client.provider_id_from_client(client)
106
104
  target_api = api_name
107
- target_model = client.model_key
108
105
 
109
- return messages if target_provider.nil? || target_api.nil? || target_model.nil?
106
+ return messages unless target_provider.present? && target_api.present? && target_model.present?
110
107
 
111
108
  input_sanitizer.sanitize(
112
109
  messages,
@@ -26,6 +26,8 @@ module LlmGateway
26
26
  map_tool_use_content(content)
27
27
  when "tool_result"
28
28
  map_tool_result_content(content)
29
+ when "server_tool_result"
30
+ map_server_tool_result_content(content)
29
31
  when "thinking", "reasoning"
30
32
  map_reasoning_content(content)
31
33
  else
@@ -122,6 +124,28 @@ module LlmGateway
122
124
  }
123
125
  end
124
126
 
127
+ def map_server_tool_result_content(content)
128
+ {
129
+ type: native_server_tool_result_type(content),
130
+ tool_use_id: content[:tool_use_id],
131
+ content: content[:content]
132
+ }
133
+ end
134
+
135
+ def native_server_tool_result_type(content)
136
+ return content[:name] if content[:name] && content[:name] != "server_tool_result"
137
+
138
+ result_type = content.dig(:content, :type)
139
+ case result_type
140
+ when "bash_code_execution_result"
141
+ "bash_code_execution_tool_result"
142
+ when /^text_editor_code_execution_.*_result$/
143
+ "text_editor_code_execution_tool_result"
144
+ else
145
+ content[:name] || "server_tool_result"
146
+ end
147
+ end
148
+
125
149
  def map_reasoning_content(content)
126
150
  result = {
127
151
  type: "thinking",