llm_gateway 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/README.md +255 -1
  4. data/docs/migration_guide_0.7.0.md +193 -0
  5. data/lib/llm_gateway/adapters/adapter.rb +1 -1
  6. data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +24 -0
  7. data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +31 -8
  8. data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +1 -1
  9. data/lib/llm_gateway/adapters/groq/option_mapper.rb +1 -1
  10. data/lib/llm_gateway/adapters/input_message_sanitizer.rb +98 -7
  11. data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +48 -16
  12. data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +1 -1
  13. data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +47 -31
  14. data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +1 -1
  15. data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +131 -3
  16. data/lib/llm_gateway/adapters/structs.rb +45 -10
  17. data/lib/llm_gateway/agents/event.rb +105 -0
  18. data/lib/llm_gateway/agents/file_session_manager.rb +100 -0
  19. data/lib/llm_gateway/agents/harness.rb +176 -0
  20. data/lib/llm_gateway/agents/in_memory_session_manager.rb +222 -0
  21. data/lib/llm_gateway/agents/tools/bash_tool.rb +132 -0
  22. data/lib/llm_gateway/agents/tools/edit_tool.rb +215 -0
  23. data/lib/llm_gateway/agents/tools/read_tool.rb +143 -0
  24. data/lib/llm_gateway/agents/tools/tool_utils.rb +164 -0
  25. data/lib/llm_gateway/agents/tools/write_tool.rb +34 -0
  26. data/lib/llm_gateway/base_client.rb +3 -3
  27. data/lib/llm_gateway/clients/anthropic.rb +5 -5
  28. data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +2 -2
  29. data/lib/llm_gateway/clients/openai.rb +2 -2
  30. data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +4 -4
  31. data/lib/llm_gateway/prompt.rb +105 -68
  32. data/lib/llm_gateway/utils.rb +116 -13
  33. data/lib/llm_gateway/version.rb +1 -1
  34. data/lib/llm_gateway.rb +4 -0
  35. metadata +12 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '086d7bdff1cb0b6b3febb78d025d7ccfe4b53c6fd40fcb5cddebd335d786e437'
4
- data.tar.gz: 1b2ea3af95f44d27c0c1636da321d24dc036fad8a242263f608948c79ac11f88
3
+ metadata.gz: 173ab613e57543956e39d70f4a38fc865bc6b6bac4e8dfe319be9c2928810f77
4
+ data.tar.gz: 46c761a838aee6c3cebad151467555cba8ab70480e952ab741874c2d8acc13e8
5
5
  SHA512:
6
- metadata.gz: '0147478704832819ee6d8fbe4e0e6203f4e598d72fd3b23138b550de9da64fb90cd8354713a5553244acf17b9c6fe0a89a0b5cab624f03ec7382e12f11aebb21'
7
- data.tar.gz: 22e1ff9571717ebe8f39a31cd36d37815c6053def32ac1e125a103ccb516a98b37aeb225edae899c6a9ecf121df5344bbbe6f6166a2e1d89ffa05db881c70e14
6
+ metadata.gz: 0f21f7288e4d8d374ea77d96ee3110b08a260a2e06ef6fd6372357b88abb5e936d2cbeae934720a0a5e17ad431bdce7027a3cd68e4fc60460c6cb5d0f02acc0a
7
+ data.tar.gz: ecf15206364c5ef7d632c0c421294deb1929e508b4287828acbee91e4a4182fb0efd5fb12cca58d6909499b2b33197070bd4c19e232bed8f25fd12f86e2dd604
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [v0.7.0](https://github.com/Hyper-Unearthing/llm_gateway/tree/v0.7.0) (2026-06-03)
4
+
5
+ [Full Changelog](https://github.com/Hyper-Unearthing/llm_gateway/compare/v0.6.0...v0.7.0)
6
+
7
+ **Merged pull requests:**
8
+
9
+ - feat: add agent harness [\#88](https://github.com/Hyper-Unearthing/llm_gateway/pull/88) ([billybonks](https://github.com/billybonks))
10
+ - refactor: prompt to use modern patterns [\#87](https://github.com/Hyper-Unearthing/llm_gateway/pull/87) ([billybonks](https://github.com/billybonks))
11
+ - feat: change our utils to follow actie support style [\#85](https://github.com/Hyper-Unearthing/llm_gateway/pull/85) ([billybonks](https://github.com/billybonks))
12
+ - feat: add reasoning level as soemthing configurable in prompt [\#83](https://github.com/Hyper-Unearthing/llm_gateway/pull/83) ([billybonks](https://github.com/billybonks))
13
+ - feat: add support for code execution tool [\#79](https://github.com/Hyper-Unearthing/llm_gateway/pull/79) ([billybonks](https://github.com/billybonks))
14
+
3
15
  ## [v0.6.0](https://github.com/Hyper-Unearthing/llm_gateway/tree/v0.6.0) (2026-05-27)
4
16
 
5
17
  [Full Changelog](https://github.com/Hyper-Unearthing/llm_gateway/compare/v0.5.0...v0.6.0)
data/README.md CHANGED
@@ -12,10 +12,18 @@ Provide a unified translation interface for LLM Provider API's, While allowing d
12
12
  - [Provider-specific options](#provider-specific-options)
13
13
  - [Quick Start: Streaming (all events)](#quick-start-streaming-all-events)
14
14
  - [Stream API without handling events (final result only)](#stream-api-without-handling-events-final-result-only)
15
+ - [Prompt classes](#prompt-classes)
15
16
  - [Migration guides](#migration-guides)
16
17
  - [Tools](#tools)
17
18
  - [Defining Tools](#defining-tools)
18
19
  - [Handling Tool Calls](#handling-tool-calls)
20
+ - [Server Tool Use](#server-tool-use)
21
+ - [Agents](#agents)
22
+ - [Agent events](#agent-events)
23
+ - [Session managers and persistence](#session-managers-and-persistence)
24
+ - [Queues, steering, and follow-ups](#queues-steering-and-follow-ups)
25
+ - [Compaction](#compaction)
26
+ - [Built-in agent tools](#built-in-agent-tools)
19
27
  - [Image Input](#image-input)
20
28
  - [Thinking / Reasoning](#thinking--reasoning)
21
29
  - [Streaming Thinking Content](#streaming-thinking-content)
@@ -158,7 +166,7 @@ response = adapter.stream(transcript, tools: tools, model: "gpt-5.4", reasoning:
158
166
 
159
167
  # Tool-call events
160
168
  when :tool_start
161
- puts "\n[tool_start] id=#{event.id} name=#{event.name} index=#{event.content_index}"
169
+ puts "\n[tool_start] id=#{event.id} name=#{event.name} type=#{event.tool_type} index=#{event.content_index}"
162
170
  when :tool_delta
163
171
  streamed_tool_args[event.content_index] << event.delta
164
172
  print event.delta
@@ -212,6 +220,7 @@ Stream callback event families:
212
220
  - `AssistantStreamEvent` (and subclasses):
213
221
  - Text: `:text_start`, `:text_delta`, `:text_end`
214
222
  - Tool call: `:tool_start`, `:tool_delta`, `:tool_end`
223
+ - Tool result: `:tool_result_start`, `:tool_result_delta`, `:tool_result_end` (emitted by some provider-hosted/server tools)
215
224
  - Reasoning: `:reasoning_start`, `:reasoning_delta`, `:reasoning_end`
216
225
 
217
226
  Non-final stream events expose `event.partial`, a `PartialAssistantMessage` snapshot accumulated so far. The final `:message_end` event exposes the complete `AssistantMessage` as `event.message` instead.
@@ -251,8 +260,63 @@ text = result.content
251
260
  puts text
252
261
  ```
253
262
 
263
+ ## Prompt classes
264
+
265
+ `LlmGateway::Prompt` wraps a reusable prompt, provider/model defaults, callbacks, optional tools, and prompt-cache options around the `stream` API.
266
+
267
+ ```ruby
268
+ class AddTool < LlmGateway::Tool
269
+ name "add"
270
+ description "Adds two numbers"
271
+ input_schema(type: "object")
272
+ cache true # optional: mark the tool definition as cacheable where supported
273
+
274
+ def execute(input)
275
+ input[:left] + input[:right]
276
+ end
277
+ end
278
+
279
+ class MathPrompt < LlmGateway::Prompt
280
+ self.provider = LlmGateway.build_provider(
281
+ provider: "openai_responses",
282
+ api_key: ENV.fetch("OPENAI_API_KEY")
283
+ )
284
+ self.model = "gpt-5.4"
285
+
286
+ TOOLS = [AddTool].freeze
287
+
288
+ def prompt
289
+ "What is 2 + 3? Use the add tool."
290
+ end
291
+
292
+ def system_prompt
293
+ "You are a careful math assistant."
294
+ end
295
+ end
296
+
297
+ response = MathPrompt.new(
298
+ cache_key: "math-prompt-v1",
299
+ cache_retention: "short"
300
+ ).run
301
+
302
+ puts response.role # "assistant"
303
+ puts response.content.select { |block| block.type == "text" }.map(&:text).join
304
+ ```
305
+
306
+ How `Prompt` works now:
307
+
308
+ - `prompt` is evaluated once per `run`.
309
+ - `run(provider:, model:, reasoning:, **options)` calls `stream` and returns the final normalized `AssistantMessage` after any tool calls complete.
310
+ - `stream(input = prompt, provider:, model:, reasoning:, **options, &block)` forwards to the provider and returns the normalized `AssistantMessage`.
311
+ - Tools are declared as tool classes in a `TOOLS` constant. `run` automatically executes returned `tool_use` blocks, appends `tool_result` messages, and loops until no tool calls remain.
312
+ - `system_prompt`, `tools`, `model`, `reasoning`, `cache_key`, and `cache_retention` are forwarded as stream options.
313
+ - `cache_retention` can also enable provider cache control for prompt-owned system/tool blocks where supported, and `Tool.cache true` marks a tool definition with `cache_control`.
314
+ - `before_execute` callbacks receive the resolved input. `after_execute` callbacks receive the final `AssistantMessage`.
315
+ - The old `extract_response` and `parse_response` hooks are no longer called; inspect, parse, or transform the returned `AssistantMessage` after `run`.
316
+
254
317
  ## Migration guides
255
318
 
319
+ - [0.7.0 migration guide](docs/migration_guide_0.7.0.md) — update `Prompt` subclasses for normalized `AssistantMessage` return values, automatic tool loops, `TOOLS`, and removed response hooks.
256
320
  - [0.6.0 migration guide](docs/migration_guide_0.6.0.md) — move `model_key` to per-request `model:`, update provider keys, update `Prompt` usage, and migrate stream event/usage changes.
257
321
  - [Migrating from `chat` to `stream`](docs/migration-guide.md) — use `stream` without a block when you only need the final response.
258
322
 
@@ -358,6 +422,196 @@ Notes:
358
422
  - Tool results are sent back in the transcript as `{ type: "tool_result", tool_use_id:, content: }` blocks.
359
423
  - For multimodal-capable models, `tool_result` content can include image blocks when supported by the provider/model.
360
424
 
425
+ ### Server Tool Use
426
+
427
+ Some providers offer provider-hosted tools, such as OpenAI Responses code interpreter or Anthropic code execution. Pass these tools in the provider's native shape; `llm_gateway` preserves them and normalizes server tool activity in streams and final messages.
428
+
429
+ ```ruby
430
+ openai_code_interpreter = {
431
+ type: "code_interpreter",
432
+ container: { type: "auto", memory_limit: "1g" }
433
+ }
434
+
435
+ anthropic_code_execution = {
436
+ type: "code_execution_20250825",
437
+ name: "code_execution"
438
+ }
439
+
440
+ tools = provider == "openai_responses" ? [openai_code_interpreter] : [anthropic_code_execution]
441
+ response = adapter.stream("Create a chart from this CSV and save it as PNG.", tools: tools) do |event|
442
+ case event.type
443
+ when :tool_start
444
+ puts "server tool: #{event.name}" if event.tool_type == "server_tool_use"
445
+ when :tool_delta
446
+ print event.delta # streamed code/input JSON when the provider exposes it
447
+ when :tool_result_start, :tool_result_delta
448
+ print event.delta # provider-hosted result metadata/content when available
449
+ end
450
+ end
451
+
452
+ response.content.each do |block|
453
+ case block.type
454
+ when "server_tool_use"
455
+ puts "server tool #{block.name} input=#{block.input.inspect} id=#{block.id}"
456
+ when "server_tool_result"
457
+ puts "server tool result for #{block.tool_use_id}: #{block.content.inspect}"
458
+ end
459
+ end
460
+ ```
461
+
462
+ Cross-provider server tool handoffs are best-effort:
463
+
464
+ - Same provider/API replay keeps `server_tool_use` / `server_tool_result` blocks when possible.
465
+ - Cross-provider replay converts server tool uses into normal `tool_use` blocks and server tool results into `tool_result` blocks.
466
+ - `llm_gateway` does not translate server tool names between providers. Supply the target provider's server tool definition on the follow-up request.
467
+ - Some providers require the same server tool to be selected in `tools:` when replaying prior server tool activity.
468
+
469
+ ## Agents
470
+
471
+ `LlmGateway::Agents::Harness` wraps the streaming API in a stateful conversation loop. It stores session history, executes `LlmGateway::Tool` classes automatically when the model emits tool calls, appends `tool_result` messages, repeats model turns until there are no more tool calls, supports queued user messages while a turn is running, and compacts older session context when needed.
472
+
473
+ ```ruby
474
+ require "llm_gateway"
475
+ require "json"
476
+
477
+ class WeatherTool < LlmGateway::Tool
478
+ name "get_weather"
479
+ description "Get current weather for a location"
480
+ input_schema(
481
+ type: "object",
482
+ properties: {
483
+ location: { type: "string" }
484
+ },
485
+ required: ["location"]
486
+ )
487
+
488
+ def execute(input)
489
+ location = input[:location] || input["location"]
490
+
491
+ JSON.generate(
492
+ location: location,
493
+ temperature: 14,
494
+ condition: "Cloudy"
495
+ )
496
+ end
497
+ end
498
+
499
+ class WeatherHarness < LlmGateway::Agents::Harness
500
+ TOOLS = [WeatherTool]
501
+
502
+ def system_prompt
503
+ "You are a concise weather assistant. Use tools when useful."
504
+ end
505
+ end
506
+
507
+ adapter = LlmGateway.build_provider(
508
+ provider: "openai_responses",
509
+ api_key: ENV.fetch("OPENAI_API_KEY")
510
+ )
511
+
512
+ session = LlmGateway::Agents::InMemorySessionManager.new("weather-session")
513
+ harness = WeatherHarness.new(
514
+ session,
515
+ provider: adapter,
516
+ model: "gpt-5.4",
517
+ reasoning: "high"
518
+ )
519
+
520
+ harness.prompt_message(
521
+ role: "user",
522
+ content: [ { type: "text", text: "What is the weather in London?" } ]
523
+ ) do |event|
524
+ case event.type
525
+ when :agent_start
526
+ puts "Agent started"
527
+ when :turn_start
528
+ puts "Turn started"
529
+ when :message_update
530
+ # Streaming provider events are wrapped on message update events.
531
+ stream_event = event.stream_event
532
+ print stream_event.delta if stream_event.respond_to?(:delta)
533
+ when :tool_execution_start
534
+ puts "\nExecuting #{event.parameters[:name]}"
535
+ when :tool_execution_end
536
+ puts "\nTool result: #{event.result.content}"
537
+ when :agent_end
538
+ puts "\nAgent finished"
539
+ end
540
+ end
541
+
542
+ puts harness.transcript.inspect
543
+ ```
544
+
545
+ Harness behavior:
546
+
547
+ - `prompt_message(message)` accepts an LLM-shaped message hash, records it in the session, streams the provider response, records the final assistant message, executes any returned tool calls from the harness class's `TOOLS` constant, records a user `tool_result` message, and continues until no tool calls remain.
548
+ - Harnesses pass `tools`, `system_prompt`, `model`, `reasoning`, `cache_key`, and `cache_retention` through the inherited `Prompt#stream` defaults.
549
+ - Pass `model:` and optional `reasoning:` to `new`, or set them later with `harness.model = "..."` / `harness.reasoning = "..."`. Model and reasoning changes are recorded as session events.
550
+ - `harness.transcript` (also aliased as `prompt`) returns the current model input: the latest compaction summary, if any, followed by active messages.
551
+ - `harness.run` / `harness.continue` continues from the current session state without adding a new user message.
552
+
553
+ ### Agent events
554
+
555
+ When a block is passed to `prompt_message`, `run`, or `continue`, the harness emits typed events:
556
+
557
+ - `:agent_start`
558
+ - `:turn_start`
559
+ - `:message_start`
560
+ - `:message_update` with `event.stream_event` containing the normalized streaming event from the provider
561
+ - `:message_end` with `event.message`
562
+ - `:tool_execution_start` with `event.parameters` (`id`, `type`, `name`, `input`)
563
+ - `:tool_execution_end` with `event.parameters` and `event.result`
564
+ - `:turn_end` with `event.message` and `event.tool_results`
565
+ - `:agent_end`
566
+
567
+ ### Session managers and persistence
568
+
569
+ - `LlmGateway::Agents::InMemorySessionManager.new(session_id = nil)` keeps session events in memory for the lifetime of the process.
570
+ - `LlmGateway::Agents::FileSessionManager.new(file_name = nil, session_id: nil, session_start: nil, session_dir: nil)` persists session events as JSONL. If `file_name` is omitted, files are created under `LLM_GATEWAY_SESSION_DIR` or `~/.llm_gateway/sessions`.
571
+ - File sessions load existing JSONL sessions and append new events to the same file.
572
+ - Session event types include `session`, `message`, `model_change`, `reasoning_change`, and `compaction`. Queued messages are kept in memory and are persisted only when drained into the active conversation.
573
+
574
+ ### Queues, steering, and follow-ups
575
+
576
+ Calls made while a harness is already processing are queued instead of recursively starting another run.
577
+
578
+ - `prompt_message(message)` queues to the harness's default queue while busy. The default is `:next_turn`.
579
+ - `steer_message(message)`, `follow_up_message(message)`, and `next_turn_message(message)` enqueue to their matching queue while busy. When idle, they behave like `prompt_message`.
580
+ - `:steer` messages are drained before the next model request in the current run.
581
+ - `:follow_up` messages run after the current turn finishes and before `:next_turn` messages.
582
+ - `:next_turn` messages run after the current agent run completes.
583
+ - Queued messages drain as `:all` by default. Set `harness.queue_drain_mode = :one_at_a_time` to drain one FIFO message at a time.
584
+ - Set `harness.default_queue_mode = :steer`, `:follow_up`, or `:next_turn` to change where busy `prompt_message` calls are queued.
585
+
586
+ ### Compaction
587
+
588
+ Before starting a new user message and before draining queued follow-up/next-turn work, the harness checks whether compaction is needed. It compacts when either:
589
+
590
+ - the latest recorded message usage exceeds `LlmGateway::Agents::Harness::COMPACTION_TOKEN_THRESHOLD`, or
591
+ - the latest assistant message is older than `LlmGateway::Agents::Harness::COMPACTION_IDLE_THRESHOLD_SECONDS`.
592
+
593
+ Compaction calls `adapter.stream(active_messages, system: "Summarize the conversation so far for future context.", tools: [])`, stores the returned assistant message as a `compaction` event, and builds future model input as the compaction summary plus messages recorded after that compaction.
594
+
595
+ ### Built-in agent tools
596
+
597
+ The agent harness can use any `LlmGateway::Tool` subclass in its `TOOLS` constant. The library also provides optional coding-oriented tools. Require the ones you want and include them in your harness:
598
+
599
+ ```ruby
600
+ require "llm_gateway/agents/tools/read_tool"
601
+ require "llm_gateway/agents/tools/bash_tool"
602
+ require "llm_gateway/agents/tools/edit_tool"
603
+ require "llm_gateway/agents/tools/write_tool"
604
+
605
+ class CodingHarness < LlmGateway::Agents::Harness
606
+ TOOLS = [ReadTool, BashTool, EditTool, WriteTool]
607
+ end
608
+ ```
609
+
610
+ - `ReadTool` (`read`) reads text files and supported images (`jpg`, `png`, `gif`, `webp`). Text output is truncated to 2,000 lines or 50KB from the start; use `offset`/`limit` to continue through large files.
611
+ - `BashTool` (`bash`) runs a command in the current working directory, combines stdout/stderr, supports an optional timeout, truncates long output to the last 2,000 lines or 50KB, and saves full truncated output to a temp file.
612
+ - `EditTool` (`edit`) edits one file with one or more exact `edits[].oldText` → `edits[].newText` replacements. Each `oldText` must be unique in the original file and edits must not overlap.
613
+ - `WriteTool` (`write`) creates parent directories as needed and writes or overwrites a file.
614
+
361
615
  ## Image Input
362
616
 
363
617
  Send images by including an `image` content block in a user message.
@@ -0,0 +1,193 @@
1
+ # Migration guide: 0.7.0
2
+
3
+ This release refactors `LlmGateway::Prompt` around the normalized streaming response model and adds first-class prompt-owned tool loops.
4
+
5
+ ## Breaking changes
6
+
7
+ ### `Prompt.new` uses keyword arguments
8
+
9
+ Prompt instance configuration is now keyword-only:
10
+
11
+ ```ruby
12
+ # Before
13
+ SummaryPrompt.new(provider, "claude-sonnet-4-20250514").run
14
+
15
+ # After
16
+ SummaryPrompt.new(
17
+ provider: provider,
18
+ model: "claude-sonnet-4-20250514"
19
+ ).run
20
+ ```
21
+
22
+ The same applies when overriding class defaults for `reasoning`, `cache_key`, or `cache_retention`.
23
+
24
+ Class-level prompt defaults should be assigned with writer methods:
25
+
26
+ ```ruby
27
+ class SummaryPrompt < LlmGateway::Prompt
28
+ self.provider = provider
29
+ self.model = "gpt-5.4"
30
+ self.reasoning = "medium"
31
+ end
32
+ ```
33
+
34
+ If you used the older setter-style calls (`provider value` or `model value`) in prompt subclasses, switch to `self.provider = value` / `self.model = value`.
35
+
36
+ ### `Prompt#run` uses `stream` and normalized `AssistantMessage`
37
+
38
+ `run` now calls the configured provider's `stream` method and expects it to return a normalized `LlmGateway::AssistantMessage` with `content` blocks.
39
+
40
+ If you use test doubles or custom providers with `Prompt`, update them from hash-like chat responses:
41
+
42
+ ```ruby
43
+ # Before
44
+ { choices: [ { content: "hello" } ] }
45
+ ```
46
+
47
+ To `AssistantMessage` responses:
48
+
49
+ ```ruby
50
+ LlmGateway::AssistantMessage.new(
51
+ id: "msg_123",
52
+ model: "gpt-5.4",
53
+ role: "assistant",
54
+ stop_reason: "stop",
55
+ provider: "openai",
56
+ api: "responses",
57
+ timestamp: Time.now.to_i,
58
+ usage: {},
59
+ content: [ { type: "text", text: "hello" } ]
60
+ )
61
+ ```
62
+
63
+ `run` returns the final normalized `AssistantMessage` after tool handling is complete. It no longer extracts or concatenates text content for you; inspect `response.content` when you need text or other blocks.
64
+
65
+ `after_execute` callbacks now receive only the final `AssistantMessage` instead of both the message and extracted text.
66
+
67
+ Prompt callback storage now uses Rails-style `class_attribute` inheritance. Register callbacks with `before_execute` / `after_execute` or assign a duplicated callback array on the subclass; avoid mutating inherited callback arrays directly with `before_execute_callbacks << ...` because that can affect related classes.
68
+
69
+ ### `extract_response` and `parse_response` hooks were removed
70
+
71
+ `Prompt#run` no longer calls custom `extract_response` or `parse_response` methods.
72
+
73
+ Move response transformation outside the prompt call, or wrap `run` in your subclass:
74
+
75
+ ```ruby
76
+ class JsonPrompt < LlmGateway::Prompt
77
+ def prompt
78
+ "Return JSON."
79
+ end
80
+
81
+ def run_json(**options)
82
+ response = run(**options)
83
+ text = response.content.select { |block| block.type == "text" }.map(&:text).join
84
+ JSON.parse(text)
85
+ end
86
+ end
87
+ ```
88
+
89
+ ### Tools are declared with `TOOLS`
90
+
91
+ Prompt tools are now class-level tool classes declared in a `TOOLS` constant. `Prompt#tools` returns their provider definitions.
92
+
93
+ ```ruby
94
+ class AddTool < LlmGateway::Tool
95
+ name "add"
96
+ description "Adds two numbers"
97
+ input_schema(type: "object")
98
+ cache true # optional cache_control marker where supported
99
+
100
+ def execute(input)
101
+ input[:left] + input[:right]
102
+ end
103
+ end
104
+
105
+ class MathPrompt < LlmGateway::Prompt
106
+ TOOLS = [AddTool].freeze
107
+
108
+ def prompt
109
+ "What is 2 + 3? Use the add tool."
110
+ end
111
+ end
112
+ ```
113
+
114
+ If a prompt has no tools, `tools` now returns `[]` instead of `nil`.
115
+
116
+ ### `run` automatically loops over tool calls
117
+
118
+ When the assistant returns `tool_use` content blocks, `Prompt#run` now:
119
+
120
+ 1. Finds the matching class in `TOOLS` by tool name.
121
+ 2. Executes `tool_class.new.execute(input)`.
122
+ 3. Appends the assistant message and a user `tool_result` message.
123
+ 4. Calls `stream` again.
124
+ 5. Repeats until the response has no `tool_use` blocks.
125
+
126
+ Unknown tools and tool execution errors are returned to the model as `tool_result` content rather than raised.
127
+
128
+ ### Prompt input is resolved once per run
129
+
130
+ `prompt` is evaluated once at the start of `run`. The same initial input is used when building follow-up messages for tool results, so dynamic or expensive prompt builders are not re-evaluated during a single run.
131
+
132
+ ### `Prompt#stream` accepts explicit input and forwards reasoning/cache options
133
+
134
+ `stream` now has this signature:
135
+
136
+ ```ruby
137
+ stream(input = prompt, provider: nil, model: nil, reasoning: nil, **options, &block)
138
+ ```
139
+
140
+ You can still call `stream` with no input, but subclasses or callers can now provide a transcript directly:
141
+
142
+ ```ruby
143
+ prompt.stream([{ role: "user", content: "Hello" }], model: "gpt-5.4")
144
+ ```
145
+
146
+ `Prompt` also now forwards `reasoning:` when configured on the class, instance, `run`, or `stream` call.
147
+
148
+ ### Prompt-level cache options
149
+
150
+ Prompt instances accept and forward cache options:
151
+
152
+ ```ruby
153
+ SummaryPrompt.new(
154
+ provider: provider,
155
+ model: "gpt-5.4",
156
+ cache_key: "summary-v1",
157
+ cache_retention: "short"
158
+ ).run
159
+ ```
160
+
161
+ These are passed to providers as managed `cache_key` / `cache_retention` stream options. For providers that support cache control on system/tool blocks, `cache_retention` may also apply cache metadata to the prompt-owned `system_prompt` and tool definitions. Tool classes can also opt into cache metadata with `cache true`.
162
+
163
+ ### Stream callbacks may see server-tool events and content blocks
164
+
165
+ Provider-hosted tools (for example OpenAI code interpreter or Anthropic code execution) are normalized as distinct server-tool blocks:
166
+
167
+ - `server_tool_use`
168
+ - `server_tool_result`
169
+ - provider-specific `*_tool_result` blocks during streaming/finalization
170
+
171
+ Stream callbacks may now receive additional event types when server tools are used:
172
+
173
+ - `:tool_result_start`
174
+ - `:tool_result_delta`
175
+ - `:tool_result_end`
176
+
177
+ `tool_start` events also expose `event.tool_type`, which is either `"tool_use"` or `"server_tool_use"`.
178
+
179
+ If your stream handler exhaustively switches on event/content types, add fallbacks or handlers for these server-tool cases. Cross-provider handoff sanitization may convert server-tool blocks to regular `tool_use` / `tool_result` blocks when replaying transcripts on a different provider/API.
180
+
181
+ ## Migration checklist
182
+
183
+ - [ ] Replace positional `Prompt.new(provider, model)` calls with `Prompt.new(provider: provider, model: model)`.
184
+ - [ ] Replace prompt class setter-style calls (`provider value`, `model value`) with `self.provider = value` / `self.model = value`.
185
+ - [ ] Update custom provider/test doubles used by `Prompt` to return `AssistantMessage`.
186
+ - [ ] Remove `extract_response` and `parse_response` hooks; inspect, parse, or transform the returned `AssistantMessage` after `run`.
187
+ - [ ] Update `after_execute` callbacks to accept the final `AssistantMessage` only.
188
+ - [ ] Replace direct mutations of `before_execute_callbacks` / `after_execute_callbacks` with the callback registration methods or explicit subclass assignments.
189
+ - [ ] Move prompt tool definitions to a `TOOLS = [ToolClass]` constant.
190
+ - [ ] Account for automatic tool-loop execution in `run`.
191
+ - [ ] Update any `tools.nil?` checks; no-tool prompts now expose `[]`.
192
+ - [ ] Use `cache_key:` / `cache_retention:` on prompt instances when prompt caching is needed.
193
+ - [ ] Add stream/content handling for server-tool event types if your callback code is exhaustive.
@@ -103,7 +103,7 @@ module LlmGateway
103
103
  target_provider = LlmGateway::Client.provider_id_from_client(client)
104
104
  target_api = api_name
105
105
 
106
- return messages if target_provider.nil? || target_api.nil? || target_model.nil?
106
+ return messages unless target_provider.present? && target_api.present? && target_model.present?
107
107
 
108
108
  input_sanitizer.sanitize(
109
109
  messages,
@@ -26,6 +26,8 @@ module LlmGateway
26
26
  map_tool_use_content(content)
27
27
  when "tool_result"
28
28
  map_tool_result_content(content)
29
+ when "server_tool_result"
30
+ map_server_tool_result_content(content)
29
31
  when "thinking", "reasoning"
30
32
  map_reasoning_content(content)
31
33
  else
@@ -122,6 +124,28 @@ module LlmGateway
122
124
  }
123
125
  end
124
126
 
127
+ def map_server_tool_result_content(content)
128
+ {
129
+ type: native_server_tool_result_type(content),
130
+ tool_use_id: content[:tool_use_id],
131
+ content: content[:content]
132
+ }
133
+ end
134
+
135
+ def native_server_tool_result_type(content)
136
+ return content[:name] if content[:name] && content[:name] != "server_tool_result"
137
+
138
+ result_type = content.dig(:content, :type)
139
+ case result_type
140
+ when "bash_code_execution_result"
141
+ "bash_code_execution_tool_result"
142
+ when /^text_editor_code_execution_.*_result$/
143
+ "text_editor_code_execution_tool_result"
144
+ else
145
+ content[:name] || "server_tool_result"
146
+ end
147
+ end
148
+
125
149
  def map_reasoning_content(content)
126
150
  result = {
127
151
  type: "thinking",
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "json"
4
+
3
5
  require_relative "../stream_mapper"
4
6
 
5
7
  module LlmGateway
@@ -17,20 +19,33 @@ module LlmGateway
17
19
  accumulator.push({ type: :message_start, delta: }, &block)
18
20
  when "content_block_start"
19
21
  content_block = chunk.dig(:data, :content_block) || {}
20
- @current_content_block_type = content_block[:type]
22
+ @current_content_block_type = normalize_content_block_type(content_block[:type])
21
23
 
22
24
  case @current_content_block_type
23
25
  when "thinking"
24
26
  accumulator.push({ type: :reasoning_start, delta: content_block[:thinking], signature: "" }, &block)
25
27
  when "text"
26
28
  accumulator.push({ type: :text_start, delta: content_block[:text] }, &block)
27
- when "tool_use"
29
+ when "tool_use", "server_tool_use"
28
30
  accumulator.push(
29
31
  {
30
32
  type: :tool_start,
31
33
  delta: "",
32
34
  id: content_block[:id],
33
- name: content_block[:name]
35
+ name: content_block[:name],
36
+ tool_type: @current_content_block_type
37
+ },
38
+ &block
39
+ )
40
+ when "server_tool_result"
41
+ content = content_block[:content]
42
+ result_delta = content.nil? ? "" : JSON.generate(content)
43
+ accumulator.push(
44
+ {
45
+ type: :tool_result_start,
46
+ delta: result_delta,
47
+ tool_use_id: content_block[:tool_use_id],
48
+ name: content_block[:type]
34
49
  },
35
50
  &block
36
51
  )
@@ -44,9 +59,13 @@ module LlmGateway
44
59
  when "text"
45
60
  delta = chunk.dig(:data, :delta, :text)
46
61
  accumulator.push({ type: :text_delta, delta: }, &block)
47
- when "tool_use"
62
+ when "tool_use", "server_tool_use"
48
63
  delta = chunk.dig(:data, :delta, :partial_json)
49
64
  accumulator.push({ type: :tool_delta, delta: }, &block)
65
+ when "server_tool_result"
66
+ content = chunk.dig(:data, :delta, :content)
67
+ result_delta = content.nil? ? "" : JSON.generate(content)
68
+ accumulator.push({ type: :tool_result_delta, delta: result_delta }, &block)
50
69
  end
51
70
  when "content_block_stop"
52
71
  case @current_content_block_type
@@ -54,8 +73,10 @@ module LlmGateway
54
73
  accumulator.push({ type: :reasoning_end, delta: "", signature: "" }, &block)
55
74
  when "text"
56
75
  accumulator.push({ type: :text_end, delta: "" }, &block)
57
- when "tool_use"
76
+ when "tool_use", "server_tool_use"
58
77
  accumulator.push({ type: :tool_end, delta: "" }, &block)
78
+ when "server_tool_result"
79
+ accumulator.push({ type: :tool_result_end, delta: "" }, &block)
59
80
  end
60
81
  @current_content_block_type = nil
61
82
  when "message_delta"
@@ -78,7 +99,7 @@ module LlmGateway
78
99
  private
79
100
 
80
101
  def normalized_usage(usage)
81
- usage = symbolize_keys(usage)
102
+ usage = usage.to_h.symbolize_keys
82
103
 
83
104
  input = token_count(usage[:input_tokens])
84
105
  cache_write = token_count(usage[:cache_creation_input_tokens])
@@ -99,8 +120,10 @@ module LlmGateway
99
120
  value.to_i
100
121
  end
101
122
 
102
- def symbolize_keys(hash)
103
- hash.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
123
+ def normalize_content_block_type(type)
124
+ return type unless type&.end_with?("_tool_result")
125
+
126
+ "server_tool_result"
104
127
  end
105
128
 
106
129
  def normalize_message_delta(delta)
@@ -50,7 +50,7 @@ module LlmGateway
50
50
  module_function
51
51
 
52
52
  def map(options)
53
- mapped_options = options.reject { |key, _| MANAGED_OPTIONS.include?(key) }
53
+ mapped_options = options.except(*MANAGED_OPTIONS)
54
54
  mapped_options[:max_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_TOKENS
55
55
 
56
56
  response_format = options[:response_format]