llm_gateway 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +255 -1
- data/docs/migration_guide_0.7.0.md +193 -0
- data/lib/llm_gateway/adapters/adapter.rb +1 -1
- data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +24 -0
- data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +31 -8
- data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/groq/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/input_message_sanitizer.rb +98 -7
- data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +48 -16
- data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +47 -31
- data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +131 -3
- data/lib/llm_gateway/adapters/structs.rb +45 -10
- data/lib/llm_gateway/agents/event.rb +105 -0
- data/lib/llm_gateway/agents/file_session_manager.rb +100 -0
- data/lib/llm_gateway/agents/harness.rb +176 -0
- data/lib/llm_gateway/agents/in_memory_session_manager.rb +222 -0
- data/lib/llm_gateway/agents/tools/bash_tool.rb +132 -0
- data/lib/llm_gateway/agents/tools/edit_tool.rb +215 -0
- data/lib/llm_gateway/agents/tools/read_tool.rb +143 -0
- data/lib/llm_gateway/agents/tools/tool_utils.rb +164 -0
- data/lib/llm_gateway/agents/tools/write_tool.rb +34 -0
- data/lib/llm_gateway/base_client.rb +3 -3
- data/lib/llm_gateway/clients/anthropic.rb +5 -5
- data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +2 -2
- data/lib/llm_gateway/clients/openai.rb +2 -2
- data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +4 -4
- data/lib/llm_gateway/prompt.rb +105 -68
- data/lib/llm_gateway/utils.rb +116 -13
- data/lib/llm_gateway/version.rb +1 -1
- data/lib/llm_gateway.rb +4 -0
- metadata +12 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 173ab613e57543956e39d70f4a38fc865bc6b6bac4e8dfe319be9c2928810f77
|
|
4
|
+
data.tar.gz: 46c761a838aee6c3cebad151467555cba8ab70480e952ab741874c2d8acc13e8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0f21f7288e4d8d374ea77d96ee3110b08a260a2e06ef6fd6372357b88abb5e936d2cbeae934720a0a5e17ad431bdce7027a3cd68e4fc60460c6cb5d0f02acc0a
|
|
7
|
+
data.tar.gz: ecf15206364c5ef7d632c0c421294deb1929e508b4287828acbee91e4a4182fb0efd5fb12cca58d6909499b2b33197070bd4c19e232bed8f25fd12f86e2dd604
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [v0.7.0](https://github.com/Hyper-Unearthing/llm_gateway/tree/v0.7.0) (2026-06-03)
|
|
4
|
+
|
|
5
|
+
[Full Changelog](https://github.com/Hyper-Unearthing/llm_gateway/compare/v0.6.0...v0.7.0)
|
|
6
|
+
|
|
7
|
+
**Merged pull requests:**
|
|
8
|
+
|
|
9
|
+
- feat: add agent harness [\#88](https://github.com/Hyper-Unearthing/llm_gateway/pull/88) ([billybonks](https://github.com/billybonks))
|
|
10
|
+
- refactor: prompt to use modern patterns [\#87](https://github.com/Hyper-Unearthing/llm_gateway/pull/87) ([billybonks](https://github.com/billybonks))
|
|
11
|
+
- feat: change our utils to follow actie support style [\#85](https://github.com/Hyper-Unearthing/llm_gateway/pull/85) ([billybonks](https://github.com/billybonks))
|
|
12
|
+
- feat: add reasoning level as soemthing configurable in prompt [\#83](https://github.com/Hyper-Unearthing/llm_gateway/pull/83) ([billybonks](https://github.com/billybonks))
|
|
13
|
+
- feat: add support for code execution tool [\#79](https://github.com/Hyper-Unearthing/llm_gateway/pull/79) ([billybonks](https://github.com/billybonks))
|
|
14
|
+
|
|
3
15
|
## [v0.6.0](https://github.com/Hyper-Unearthing/llm_gateway/tree/v0.6.0) (2026-05-27)
|
|
4
16
|
|
|
5
17
|
[Full Changelog](https://github.com/Hyper-Unearthing/llm_gateway/compare/v0.5.0...v0.6.0)
|
data/README.md
CHANGED
|
@@ -12,10 +12,18 @@ Provide a unified translation interface for LLM Provider API's, While allowing d
|
|
|
12
12
|
- [Provider-specific options](#provider-specific-options)
|
|
13
13
|
- [Quick Start: Streaming (all events)](#quick-start-streaming-all-events)
|
|
14
14
|
- [Stream API without handling events (final result only)](#stream-api-without-handling-events-final-result-only)
|
|
15
|
+
- [Prompt classes](#prompt-classes)
|
|
15
16
|
- [Migration guides](#migration-guides)
|
|
16
17
|
- [Tools](#tools)
|
|
17
18
|
- [Defining Tools](#defining-tools)
|
|
18
19
|
- [Handling Tool Calls](#handling-tool-calls)
|
|
20
|
+
- [Server Tool Use](#server-tool-use)
|
|
21
|
+
- [Agents](#agents)
|
|
22
|
+
- [Agent events](#agent-events)
|
|
23
|
+
- [Session managers and persistence](#session-managers-and-persistence)
|
|
24
|
+
- [Queues, steering, and follow-ups](#queues-steering-and-follow-ups)
|
|
25
|
+
- [Compaction](#compaction)
|
|
26
|
+
- [Built-in agent tools](#built-in-agent-tools)
|
|
19
27
|
- [Image Input](#image-input)
|
|
20
28
|
- [Thinking / Reasoning](#thinking--reasoning)
|
|
21
29
|
- [Streaming Thinking Content](#streaming-thinking-content)
|
|
@@ -158,7 +166,7 @@ response = adapter.stream(transcript, tools: tools, model: "gpt-5.4", reasoning:
|
|
|
158
166
|
|
|
159
167
|
# Tool-call events
|
|
160
168
|
when :tool_start
|
|
161
|
-
puts "\n[tool_start] id=#{event.id} name=#{event.name} index=#{event.content_index}"
|
|
169
|
+
puts "\n[tool_start] id=#{event.id} name=#{event.name} type=#{event.tool_type} index=#{event.content_index}"
|
|
162
170
|
when :tool_delta
|
|
163
171
|
streamed_tool_args[event.content_index] << event.delta
|
|
164
172
|
print event.delta
|
|
@@ -212,6 +220,7 @@ Stream callback event families:
|
|
|
212
220
|
- `AssistantStreamEvent` (and subclasses):
|
|
213
221
|
- Text: `:text_start`, `:text_delta`, `:text_end`
|
|
214
222
|
- Tool call: `:tool_start`, `:tool_delta`, `:tool_end`
|
|
223
|
+
- Tool result: `:tool_result_start`, `:tool_result_delta`, `:tool_result_end` (emitted by some provider-hosted/server tools)
|
|
215
224
|
- Reasoning: `:reasoning_start`, `:reasoning_delta`, `:reasoning_end`
|
|
216
225
|
|
|
217
226
|
Non-final stream events expose `event.partial`, a `PartialAssistantMessage` snapshot accumulated so far. The final `:message_end` event exposes the complete `AssistantMessage` as `event.message` instead.
|
|
@@ -251,8 +260,63 @@ text = result.content
|
|
|
251
260
|
puts text
|
|
252
261
|
```
|
|
253
262
|
|
|
263
|
+
## Prompt classes
|
|
264
|
+
|
|
265
|
+
`LlmGateway::Prompt` wraps a reusable prompt, provider/model defaults, callbacks, optional tools, and prompt-cache options around the `stream` API.
|
|
266
|
+
|
|
267
|
+
```ruby
|
|
268
|
+
class AddTool < LlmGateway::Tool
|
|
269
|
+
name "add"
|
|
270
|
+
description "Adds two numbers"
|
|
271
|
+
input_schema(type: "object")
|
|
272
|
+
cache true # optional: mark the tool definition as cacheable where supported
|
|
273
|
+
|
|
274
|
+
def execute(input)
|
|
275
|
+
input[:left] + input[:right]
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
class MathPrompt < LlmGateway::Prompt
|
|
280
|
+
self.provider = LlmGateway.build_provider(
|
|
281
|
+
provider: "openai_responses",
|
|
282
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
283
|
+
)
|
|
284
|
+
self.model = "gpt-5.4"
|
|
285
|
+
|
|
286
|
+
TOOLS = [AddTool].freeze
|
|
287
|
+
|
|
288
|
+
def prompt
|
|
289
|
+
"What is 2 + 3? Use the add tool."
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def system_prompt
|
|
293
|
+
"You are a careful math assistant."
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
response = MathPrompt.new(
|
|
298
|
+
cache_key: "math-prompt-v1",
|
|
299
|
+
cache_retention: "short"
|
|
300
|
+
).run
|
|
301
|
+
|
|
302
|
+
puts response.role # "assistant"
|
|
303
|
+
puts response.content.select { |block| block.type == "text" }.map(&:text).join
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
How `Prompt` works now:
|
|
307
|
+
|
|
308
|
+
- `prompt` is evaluated once per `run`.
|
|
309
|
+
- `run(provider:, model:, reasoning:, **options)` calls `stream` and returns the final normalized `AssistantMessage` after any tool calls complete.
|
|
310
|
+
- `stream(input = prompt, provider:, model:, reasoning:, **options, &block)` forwards to the provider and returns the normalized `AssistantMessage`.
|
|
311
|
+
- Tools are declared as tool classes in a `TOOLS` constant. `run` automatically executes returned `tool_use` blocks, appends `tool_result` messages, and loops until no tool calls remain.
|
|
312
|
+
- `system_prompt`, `tools`, `model`, `reasoning`, `cache_key`, and `cache_retention` are forwarded as stream options.
|
|
313
|
+
- `cache_retention` can also enable provider cache control for prompt-owned system/tool blocks where supported, and `Tool.cache true` marks a tool definition with `cache_control`.
|
|
314
|
+
- `before_execute` callbacks receive the resolved input. `after_execute` callbacks receive the final `AssistantMessage`.
|
|
315
|
+
- The old `extract_response` and `parse_response` hooks are no longer called; inspect, parse, or transform the returned `AssistantMessage` after `run`.
|
|
316
|
+
|
|
254
317
|
## Migration guides
|
|
255
318
|
|
|
319
|
+
- [0.7.0 migration guide](docs/migration_guide_0.7.0.md) — update `Prompt` subclasses for normalized `AssistantMessage` return values, automatic tool loops, `TOOLS`, and removed response hooks.
|
|
256
320
|
- [0.6.0 migration guide](docs/migration_guide_0.6.0.md) — move `model_key` to per-request `model:`, update provider keys, update `Prompt` usage, and migrate stream event/usage changes.
|
|
257
321
|
- [Migrating from `chat` to `stream`](docs/migration-guide.md) — use `stream` without a block when you only need the final response.
|
|
258
322
|
|
|
@@ -358,6 +422,196 @@ Notes:
|
|
|
358
422
|
- Tool results are sent back in the transcript as `{ type: "tool_result", tool_use_id:, content: }` blocks.
|
|
359
423
|
- For multimodal-capable models, `tool_result` content can include image blocks when supported by the provider/model.
|
|
360
424
|
|
|
425
|
+
### Server Tool Use
|
|
426
|
+
|
|
427
|
+
Some providers offer provider-hosted tools, such as OpenAI Responses code interpreter or Anthropic code execution. Pass these tools in the provider's native shape; `llm_gateway` preserves them and normalizes server tool activity in streams and final messages.
|
|
428
|
+
|
|
429
|
+
```ruby
|
|
430
|
+
openai_code_interpreter = {
|
|
431
|
+
type: "code_interpreter",
|
|
432
|
+
container: { type: "auto", memory_limit: "1g" }
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
anthropic_code_execution = {
|
|
436
|
+
type: "code_execution_20250825",
|
|
437
|
+
name: "code_execution"
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
tools = provider == "openai_responses" ? [openai_code_interpreter] : [anthropic_code_execution]
|
|
441
|
+
response = adapter.stream("Create a chart from this CSV and save it as PNG.", tools: tools) do |event|
|
|
442
|
+
case event.type
|
|
443
|
+
when :tool_start
|
|
444
|
+
puts "server tool: #{event.name}" if event.tool_type == "server_tool_use"
|
|
445
|
+
when :tool_delta
|
|
446
|
+
print event.delta # streamed code/input JSON when the provider exposes it
|
|
447
|
+
when :tool_result_start, :tool_result_delta
|
|
448
|
+
print event.delta # provider-hosted result metadata/content when available
|
|
449
|
+
end
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
response.content.each do |block|
|
|
453
|
+
case block.type
|
|
454
|
+
when "server_tool_use"
|
|
455
|
+
puts "server tool #{block.name} input=#{block.input.inspect} id=#{block.id}"
|
|
456
|
+
when "server_tool_result"
|
|
457
|
+
puts "server tool result for #{block.tool_use_id}: #{block.content.inspect}"
|
|
458
|
+
end
|
|
459
|
+
end
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
Cross-provider server tool handoffs are best-effort:
|
|
463
|
+
|
|
464
|
+
- Same provider/API replay keeps `server_tool_use` / `server_tool_result` blocks when possible.
|
|
465
|
+
- Cross-provider replay converts server tool uses into normal `tool_use` blocks and server tool results into `tool_result` blocks.
|
|
466
|
+
- `llm_gateway` does not translate server tool names between providers. Supply the target provider's server tool definition on the follow-up request.
|
|
467
|
+
- Some providers require the same server tool to be selected in `tools:` when replaying prior server tool activity.
|
|
468
|
+
|
|
469
|
+
## Agents
|
|
470
|
+
|
|
471
|
+
`LlmGateway::Agents::Harness` wraps the streaming API in a stateful conversation loop. It stores session history, executes `LlmGateway::Tool` classes automatically when the model emits tool calls, appends `tool_result` messages, repeats model turns until there are no more tool calls, supports queued user messages while a turn is running, and compacts older session context when needed.
|
|
472
|
+
|
|
473
|
+
```ruby
|
|
474
|
+
require "llm_gateway"
|
|
475
|
+
require "json"
|
|
476
|
+
|
|
477
|
+
class WeatherTool < LlmGateway::Tool
|
|
478
|
+
name "get_weather"
|
|
479
|
+
description "Get current weather for a location"
|
|
480
|
+
input_schema(
|
|
481
|
+
type: "object",
|
|
482
|
+
properties: {
|
|
483
|
+
location: { type: "string" }
|
|
484
|
+
},
|
|
485
|
+
required: ["location"]
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
def execute(input)
|
|
489
|
+
location = input[:location] || input["location"]
|
|
490
|
+
|
|
491
|
+
JSON.generate(
|
|
492
|
+
location: location,
|
|
493
|
+
temperature: 14,
|
|
494
|
+
condition: "Cloudy"
|
|
495
|
+
)
|
|
496
|
+
end
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
class WeatherHarness < LlmGateway::Agents::Harness
|
|
500
|
+
TOOLS = [WeatherTool]
|
|
501
|
+
|
|
502
|
+
def system_prompt
|
|
503
|
+
"You are a concise weather assistant. Use tools when useful."
|
|
504
|
+
end
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
adapter = LlmGateway.build_provider(
|
|
508
|
+
provider: "openai_responses",
|
|
509
|
+
api_key: ENV.fetch("OPENAI_API_KEY")
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
session = LlmGateway::Agents::InMemorySessionManager.new("weather-session")
|
|
513
|
+
harness = WeatherHarness.new(
|
|
514
|
+
session,
|
|
515
|
+
provider: adapter,
|
|
516
|
+
model: "gpt-5.4",
|
|
517
|
+
reasoning: "high"
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
harness.prompt_message(
|
|
521
|
+
role: "user",
|
|
522
|
+
content: [ { type: "text", text: "What is the weather in London?" } ]
|
|
523
|
+
) do |event|
|
|
524
|
+
case event.type
|
|
525
|
+
when :agent_start
|
|
526
|
+
puts "Agent started"
|
|
527
|
+
when :turn_start
|
|
528
|
+
puts "Turn started"
|
|
529
|
+
when :message_update
|
|
530
|
+
# Streaming provider events are wrapped on message update events.
|
|
531
|
+
stream_event = event.stream_event
|
|
532
|
+
print stream_event.delta if stream_event.respond_to?(:delta)
|
|
533
|
+
when :tool_execution_start
|
|
534
|
+
puts "\nExecuting #{event.parameters[:name]}"
|
|
535
|
+
when :tool_execution_end
|
|
536
|
+
puts "\nTool result: #{event.result.content}"
|
|
537
|
+
when :agent_end
|
|
538
|
+
puts "\nAgent finished"
|
|
539
|
+
end
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
puts harness.transcript.inspect
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
Harness behavior:
|
|
546
|
+
|
|
547
|
+
- `prompt_message(message)` accepts an LLM-shaped message hash, records it in the session, streams the provider response, records the final assistant message, executes any returned tool calls from the harness class's `TOOLS` constant, records a user `tool_result` message, and continues until no tool calls remain.
|
|
548
|
+
- Harnesses pass `tools`, `system_prompt`, `model`, `reasoning`, `cache_key`, and `cache_retention` through the inherited `Prompt#stream` defaults.
|
|
549
|
+
- Pass `model:` and optional `reasoning:` to `new`, or set them later with `harness.model = "..."` / `harness.reasoning = "..."`. Model and reasoning changes are recorded as session events.
|
|
550
|
+
- `harness.transcript` (also aliased as `prompt`) returns the current model input: the latest compaction summary, if any, followed by active messages.
|
|
551
|
+
- `harness.run` / `harness.continue` continues from the current session state without adding a new user message.
|
|
552
|
+
|
|
553
|
+
### Agent events
|
|
554
|
+
|
|
555
|
+
When a block is passed to `prompt_message`, `run`, or `continue`, the harness emits typed events:
|
|
556
|
+
|
|
557
|
+
- `:agent_start`
|
|
558
|
+
- `:turn_start`
|
|
559
|
+
- `:message_start`
|
|
560
|
+
- `:message_update` with `event.stream_event` containing the normalized streaming event from the provider
|
|
561
|
+
- `:message_end` with `event.message`
|
|
562
|
+
- `:tool_execution_start` with `event.parameters` (`id`, `type`, `name`, `input`)
|
|
563
|
+
- `:tool_execution_end` with `event.parameters` and `event.result`
|
|
564
|
+
- `:turn_end` with `event.message` and `event.tool_results`
|
|
565
|
+
- `:agent_end`
|
|
566
|
+
|
|
567
|
+
### Session managers and persistence
|
|
568
|
+
|
|
569
|
+
- `LlmGateway::Agents::InMemorySessionManager.new(session_id = nil)` keeps session events in memory for the lifetime of the process.
|
|
570
|
+
- `LlmGateway::Agents::FileSessionManager.new(file_name = nil, session_id: nil, session_start: nil, session_dir: nil)` persists session events as JSONL. If `file_name` is omitted, files are created under `LLM_GATEWAY_SESSION_DIR` or `~/.llm_gateway/sessions`.
|
|
571
|
+
- File sessions load existing JSONL sessions and append new events to the same file.
|
|
572
|
+
- Session event types include `session`, `message`, `model_change`, `reasoning_change`, and `compaction`. Queued messages are kept in memory and are persisted only when drained into the active conversation.
|
|
573
|
+
|
|
574
|
+
### Queues, steering, and follow-ups
|
|
575
|
+
|
|
576
|
+
Calls made while a harness is already processing are queued instead of recursively starting another run.
|
|
577
|
+
|
|
578
|
+
- `prompt_message(message)` queues to the harness's default queue while busy. The default is `:next_turn`.
|
|
579
|
+
- `steer_message(message)`, `follow_up_message(message)`, and `next_turn_message(message)` enqueue to their matching queue while busy. When idle, they behave like `prompt_message`.
|
|
580
|
+
- `:steer` messages are drained before the next model request in the current run.
|
|
581
|
+
- `:follow_up` messages run after the current turn finishes and before `:next_turn` messages.
|
|
582
|
+
- `:next_turn` messages run after the current agent run completes.
|
|
583
|
+
- Queued messages drain as `:all` by default. Set `harness.queue_drain_mode = :one_at_a_time` to drain one FIFO message at a time.
|
|
584
|
+
- Set `harness.default_queue_mode = :steer`, `:follow_up`, or `:next_turn` to change where busy `prompt_message` calls are queued.
|
|
585
|
+
|
|
586
|
+
### Compaction
|
|
587
|
+
|
|
588
|
+
Before starting a new user message and before draining queued follow-up/next-turn work, the harness checks whether compaction is needed. It compacts when either:
|
|
589
|
+
|
|
590
|
+
- the latest recorded message usage exceeds `LlmGateway::Agents::Harness::COMPACTION_TOKEN_THRESHOLD`, or
|
|
591
|
+
- the latest assistant message is older than `LlmGateway::Agents::Harness::COMPACTION_IDLE_THRESHOLD_SECONDS`.
|
|
592
|
+
|
|
593
|
+
Compaction calls `adapter.stream(active_messages, system: "Summarize the conversation so far for future context.", tools: [])`, stores the returned assistant message as a `compaction` event, and builds future model input as the compaction summary plus messages recorded after that compaction.
|
|
594
|
+
|
|
595
|
+
### Built-in agent tools
|
|
596
|
+
|
|
597
|
+
The agent harness can use any `LlmGateway::Tool` subclass in its `TOOLS` constant. The library also provides optional coding-oriented tools. Require the ones you want and include them in your harness:
|
|
598
|
+
|
|
599
|
+
```ruby
|
|
600
|
+
require "llm_gateway/agents/tools/read_tool"
|
|
601
|
+
require "llm_gateway/agents/tools/bash_tool"
|
|
602
|
+
require "llm_gateway/agents/tools/edit_tool"
|
|
603
|
+
require "llm_gateway/agents/tools/write_tool"
|
|
604
|
+
|
|
605
|
+
class CodingHarness < LlmGateway::Agents::Harness
|
|
606
|
+
TOOLS = [ReadTool, BashTool, EditTool, WriteTool]
|
|
607
|
+
end
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
- `ReadTool` (`read`) reads text files and supported images (`jpg`, `png`, `gif`, `webp`). Text output is truncated to 2,000 lines or 50KB from the start; use `offset`/`limit` to continue through large files.
|
|
611
|
+
- `BashTool` (`bash`) runs a command in the current working directory, combines stdout/stderr, supports an optional timeout, truncates long output to the last 2,000 lines or 50KB, and saves full truncated output to a temp file.
|
|
612
|
+
- `EditTool` (`edit`) edits one file with one or more exact `edits[].oldText` → `edits[].newText` replacements. Each `oldText` must be unique in the original file and edits must not overlap.
|
|
613
|
+
- `WriteTool` (`write`) creates parent directories as needed and writes or overwrites a file.
|
|
614
|
+
|
|
361
615
|
## Image Input
|
|
362
616
|
|
|
363
617
|
Send images by including an `image` content block in a user message.
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# Migration guide: 0.7.0
|
|
2
|
+
|
|
3
|
+
This release refactors `LlmGateway::Prompt` around the normalized streaming response model and adds first-class prompt-owned tool loops.
|
|
4
|
+
|
|
5
|
+
## Breaking changes
|
|
6
|
+
|
|
7
|
+
### `Prompt.new` uses keyword arguments
|
|
8
|
+
|
|
9
|
+
Prompt instance configuration is now keyword-only:
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
# Before
|
|
13
|
+
SummaryPrompt.new(provider, "claude-sonnet-4-20250514").run
|
|
14
|
+
|
|
15
|
+
# After
|
|
16
|
+
SummaryPrompt.new(
|
|
17
|
+
provider: provider,
|
|
18
|
+
model: "claude-sonnet-4-20250514"
|
|
19
|
+
).run
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
The same applies when overriding class defaults for `reasoning`, `cache_key`, or `cache_retention`.
|
|
23
|
+
|
|
24
|
+
Class-level prompt defaults should be assigned with writer methods:
|
|
25
|
+
|
|
26
|
+
```ruby
|
|
27
|
+
class SummaryPrompt < LlmGateway::Prompt
|
|
28
|
+
self.provider = provider
|
|
29
|
+
self.model = "gpt-5.4"
|
|
30
|
+
self.reasoning = "medium"
|
|
31
|
+
end
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
If you used the older setter-style calls (`provider value` or `model value`) in prompt subclasses, switch to `self.provider = value` / `self.model = value`.
|
|
35
|
+
|
|
36
|
+
### `Prompt#run` uses `stream` and normalized `AssistantMessage`
|
|
37
|
+
|
|
38
|
+
`run` now calls the configured provider's `stream` method and expects it to return a normalized `LlmGateway::AssistantMessage` with `content` blocks.
|
|
39
|
+
|
|
40
|
+
If you use test doubles or custom providers with `Prompt`, update them from hash-like chat responses:
|
|
41
|
+
|
|
42
|
+
```ruby
|
|
43
|
+
# Before
|
|
44
|
+
{ choices: [ { content: "hello" } ] }
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
To `AssistantMessage` responses:
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
LlmGateway::AssistantMessage.new(
|
|
51
|
+
id: "msg_123",
|
|
52
|
+
model: "gpt-5.4",
|
|
53
|
+
role: "assistant",
|
|
54
|
+
stop_reason: "stop",
|
|
55
|
+
provider: "openai",
|
|
56
|
+
api: "responses",
|
|
57
|
+
timestamp: Time.now.to_i,
|
|
58
|
+
usage: {},
|
|
59
|
+
content: [ { type: "text", text: "hello" } ]
|
|
60
|
+
)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
`run` returns the final normalized `AssistantMessage` after tool handling is complete. It no longer extracts or concatenates text content for you; inspect `response.content` when you need text or other blocks.
|
|
64
|
+
|
|
65
|
+
`after_execute` callbacks now receive only the final `AssistantMessage` instead of both the message and extracted text.
|
|
66
|
+
|
|
67
|
+
Prompt callback storage now uses Rails-style `class_attribute` inheritance. Register callbacks with `before_execute` / `after_execute` or assign a duplicated callback array on the subclass; avoid mutating inherited callback arrays directly with `before_execute_callbacks << ...` because that can affect related classes.
|
|
68
|
+
|
|
69
|
+
### `extract_response` and `parse_response` hooks were removed
|
|
70
|
+
|
|
71
|
+
`Prompt#run` no longer calls custom `extract_response` or `parse_response` methods.
|
|
72
|
+
|
|
73
|
+
Move response transformation outside the prompt call, or wrap `run` in your subclass:
|
|
74
|
+
|
|
75
|
+
```ruby
|
|
76
|
+
class JsonPrompt < LlmGateway::Prompt
|
|
77
|
+
def prompt
|
|
78
|
+
"Return JSON."
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def run_json(**options)
|
|
82
|
+
response = run(**options)
|
|
83
|
+
text = response.content.select { |block| block.type == "text" }.map(&:text).join
|
|
84
|
+
JSON.parse(text)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Tools are declared with `TOOLS`
|
|
90
|
+
|
|
91
|
+
Prompt tools are now class-level tool classes declared in a `TOOLS` constant. `Prompt#tools` returns their provider definitions.
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
class AddTool < LlmGateway::Tool
|
|
95
|
+
name "add"
|
|
96
|
+
description "Adds two numbers"
|
|
97
|
+
input_schema(type: "object")
|
|
98
|
+
cache true # optional cache_control marker where supported
|
|
99
|
+
|
|
100
|
+
def execute(input)
|
|
101
|
+
input[:left] + input[:right]
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
class MathPrompt < LlmGateway::Prompt
|
|
106
|
+
TOOLS = [AddTool].freeze
|
|
107
|
+
|
|
108
|
+
def prompt
|
|
109
|
+
"What is 2 + 3? Use the add tool."
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
If a prompt has no tools, `tools` now returns `[]` instead of `nil`.
|
|
115
|
+
|
|
116
|
+
### `run` automatically loops over tool calls
|
|
117
|
+
|
|
118
|
+
When the assistant returns `tool_use` content blocks, `Prompt#run` now:
|
|
119
|
+
|
|
120
|
+
1. Finds the matching class in `TOOLS` by tool name.
|
|
121
|
+
2. Executes `tool_class.new.execute(input)`.
|
|
122
|
+
3. Appends the assistant message and a user `tool_result` message.
|
|
123
|
+
4. Calls `stream` again.
|
|
124
|
+
5. Repeats until the response has no `tool_use` blocks.
|
|
125
|
+
|
|
126
|
+
Unknown tools and tool execution errors are returned to the model as `tool_result` content rather than raised.
|
|
127
|
+
|
|
128
|
+
### Prompt input is resolved once per run
|
|
129
|
+
|
|
130
|
+
`prompt` is evaluated once at the start of `run`. The same initial input is used when building follow-up messages for tool results, so dynamic or expensive prompt builders are not re-evaluated during a single run.
|
|
131
|
+
|
|
132
|
+
### `Prompt#stream` accepts explicit input and forwards reasoning/cache options
|
|
133
|
+
|
|
134
|
+
`stream` now has this signature:
|
|
135
|
+
|
|
136
|
+
```ruby
|
|
137
|
+
stream(input = prompt, provider: nil, model: nil, reasoning: nil, **options, &block)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
You can still call `stream` with no input, but subclasses or callers can now provide a transcript directly:
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
prompt.stream([{ role: "user", content: "Hello" }], model: "gpt-5.4")
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
`Prompt` also now forwards `reasoning:` when configured on the class, instance, `run`, or `stream` call.
|
|
147
|
+
|
|
148
|
+
### Prompt-level cache options
|
|
149
|
+
|
|
150
|
+
Prompt instances accept and forward cache options:
|
|
151
|
+
|
|
152
|
+
```ruby
|
|
153
|
+
SummaryPrompt.new(
|
|
154
|
+
provider: provider,
|
|
155
|
+
model: "gpt-5.4",
|
|
156
|
+
cache_key: "summary-v1",
|
|
157
|
+
cache_retention: "short"
|
|
158
|
+
).run
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
These are passed to providers as managed `cache_key` / `cache_retention` stream options. For providers that support cache control on system/tool blocks, `cache_retention` may also apply cache metadata to the prompt-owned `system_prompt` and tool definitions. Tool classes can also opt into cache metadata with `cache true`.
|
|
162
|
+
|
|
163
|
+
### Stream callbacks may see server-tool events and content blocks
|
|
164
|
+
|
|
165
|
+
Provider-hosted tools (for example OpenAI code interpreter or Anthropic code execution) are normalized as distinct server-tool blocks:
|
|
166
|
+
|
|
167
|
+
- `server_tool_use`
|
|
168
|
+
- `server_tool_result`
|
|
169
|
+
- provider-specific `*_tool_result` blocks during streaming/finalization
|
|
170
|
+
|
|
171
|
+
Stream callbacks may now receive additional event types when server tools are used:
|
|
172
|
+
|
|
173
|
+
- `:tool_result_start`
|
|
174
|
+
- `:tool_result_delta`
|
|
175
|
+
- `:tool_result_end`
|
|
176
|
+
|
|
177
|
+
`tool_start` events also expose `event.tool_type`, which is either `"tool_use"` or `"server_tool_use"`.
|
|
178
|
+
|
|
179
|
+
If your stream handler exhaustively switches on event/content types, add fallbacks or handlers for these server-tool cases. Cross-provider handoff sanitization may convert server-tool blocks to regular `tool_use` / `tool_result` blocks when replaying transcripts on a different provider/API.
|
|
180
|
+
|
|
181
|
+
## Migration checklist
|
|
182
|
+
|
|
183
|
+
- [ ] Replace positional `Prompt.new(provider, model)` calls with `Prompt.new(provider: provider, model: model)`.
|
|
184
|
+
- [ ] Replace prompt class setter-style calls (`provider value`, `model value`) with `self.provider = value` / `self.model = value`.
|
|
185
|
+
- [ ] Update custom provider/test doubles used by `Prompt` to return `AssistantMessage`.
|
|
186
|
+
- [ ] Remove `extract_response` and `parse_response` hooks; inspect, parse, or transform the returned `AssistantMessage` after `run`.
|
|
187
|
+
- [ ] Update `after_execute` callbacks to accept the final `AssistantMessage` only.
|
|
188
|
+
- [ ] Replace direct mutations of `before_execute_callbacks` / `after_execute_callbacks` with the callback registration methods or explicit subclass assignments.
|
|
189
|
+
- [ ] Move prompt tool definitions to a `TOOLS = [ToolClass]` constant.
|
|
190
|
+
- [ ] Account for automatic tool-loop execution in `run`.
|
|
191
|
+
- [ ] Update any `tools.nil?` checks; no-tool prompts now expose `[]`.
|
|
192
|
+
- [ ] Use `cache_key:` / `cache_retention:` on prompt instances when prompt caching is needed.
|
|
193
|
+
- [ ] Add stream/content handling for server-tool event types if your callback code is exhaustive.
|
|
@@ -103,7 +103,7 @@ module LlmGateway
|
|
|
103
103
|
target_provider = LlmGateway::Client.provider_id_from_client(client)
|
|
104
104
|
target_api = api_name
|
|
105
105
|
|
|
106
|
-
return messages
|
|
106
|
+
return messages unless target_provider.present? && target_api.present? && target_model.present?
|
|
107
107
|
|
|
108
108
|
input_sanitizer.sanitize(
|
|
109
109
|
messages,
|
|
@@ -26,6 +26,8 @@ module LlmGateway
|
|
|
26
26
|
map_tool_use_content(content)
|
|
27
27
|
when "tool_result"
|
|
28
28
|
map_tool_result_content(content)
|
|
29
|
+
when "server_tool_result"
|
|
30
|
+
map_server_tool_result_content(content)
|
|
29
31
|
when "thinking", "reasoning"
|
|
30
32
|
map_reasoning_content(content)
|
|
31
33
|
else
|
|
@@ -122,6 +124,28 @@ module LlmGateway
|
|
|
122
124
|
}
|
|
123
125
|
end
|
|
124
126
|
|
|
127
|
+
def map_server_tool_result_content(content)
|
|
128
|
+
{
|
|
129
|
+
type: native_server_tool_result_type(content),
|
|
130
|
+
tool_use_id: content[:tool_use_id],
|
|
131
|
+
content: content[:content]
|
|
132
|
+
}
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def native_server_tool_result_type(content)
|
|
136
|
+
return content[:name] if content[:name] && content[:name] != "server_tool_result"
|
|
137
|
+
|
|
138
|
+
result_type = content.dig(:content, :type)
|
|
139
|
+
case result_type
|
|
140
|
+
when "bash_code_execution_result"
|
|
141
|
+
"bash_code_execution_tool_result"
|
|
142
|
+
when /^text_editor_code_execution_.*_result$/
|
|
143
|
+
"text_editor_code_execution_tool_result"
|
|
144
|
+
else
|
|
145
|
+
content[:name] || "server_tool_result"
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
125
149
|
def map_reasoning_content(content)
|
|
126
150
|
result = {
|
|
127
151
|
type: "thinking",
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
3
5
|
require_relative "../stream_mapper"
|
|
4
6
|
|
|
5
7
|
module LlmGateway
|
|
@@ -17,20 +19,33 @@ module LlmGateway
|
|
|
17
19
|
accumulator.push({ type: :message_start, delta: }, &block)
|
|
18
20
|
when "content_block_start"
|
|
19
21
|
content_block = chunk.dig(:data, :content_block) || {}
|
|
20
|
-
@current_content_block_type = content_block[:type]
|
|
22
|
+
@current_content_block_type = normalize_content_block_type(content_block[:type])
|
|
21
23
|
|
|
22
24
|
case @current_content_block_type
|
|
23
25
|
when "thinking"
|
|
24
26
|
accumulator.push({ type: :reasoning_start, delta: content_block[:thinking], signature: "" }, &block)
|
|
25
27
|
when "text"
|
|
26
28
|
accumulator.push({ type: :text_start, delta: content_block[:text] }, &block)
|
|
27
|
-
when "tool_use"
|
|
29
|
+
when "tool_use", "server_tool_use"
|
|
28
30
|
accumulator.push(
|
|
29
31
|
{
|
|
30
32
|
type: :tool_start,
|
|
31
33
|
delta: "",
|
|
32
34
|
id: content_block[:id],
|
|
33
|
-
name: content_block[:name]
|
|
35
|
+
name: content_block[:name],
|
|
36
|
+
tool_type: @current_content_block_type
|
|
37
|
+
},
|
|
38
|
+
&block
|
|
39
|
+
)
|
|
40
|
+
when "server_tool_result"
|
|
41
|
+
content = content_block[:content]
|
|
42
|
+
result_delta = content.nil? ? "" : JSON.generate(content)
|
|
43
|
+
accumulator.push(
|
|
44
|
+
{
|
|
45
|
+
type: :tool_result_start,
|
|
46
|
+
delta: result_delta,
|
|
47
|
+
tool_use_id: content_block[:tool_use_id],
|
|
48
|
+
name: content_block[:type]
|
|
34
49
|
},
|
|
35
50
|
&block
|
|
36
51
|
)
|
|
@@ -44,9 +59,13 @@ module LlmGateway
|
|
|
44
59
|
when "text"
|
|
45
60
|
delta = chunk.dig(:data, :delta, :text)
|
|
46
61
|
accumulator.push({ type: :text_delta, delta: }, &block)
|
|
47
|
-
when "tool_use"
|
|
62
|
+
when "tool_use", "server_tool_use"
|
|
48
63
|
delta = chunk.dig(:data, :delta, :partial_json)
|
|
49
64
|
accumulator.push({ type: :tool_delta, delta: }, &block)
|
|
65
|
+
when "server_tool_result"
|
|
66
|
+
content = chunk.dig(:data, :delta, :content)
|
|
67
|
+
result_delta = content.nil? ? "" : JSON.generate(content)
|
|
68
|
+
accumulator.push({ type: :tool_result_delta, delta: result_delta }, &block)
|
|
50
69
|
end
|
|
51
70
|
when "content_block_stop"
|
|
52
71
|
case @current_content_block_type
|
|
@@ -54,8 +73,10 @@ module LlmGateway
|
|
|
54
73
|
accumulator.push({ type: :reasoning_end, delta: "", signature: "" }, &block)
|
|
55
74
|
when "text"
|
|
56
75
|
accumulator.push({ type: :text_end, delta: "" }, &block)
|
|
57
|
-
when "tool_use"
|
|
76
|
+
when "tool_use", "server_tool_use"
|
|
58
77
|
accumulator.push({ type: :tool_end, delta: "" }, &block)
|
|
78
|
+
when "server_tool_result"
|
|
79
|
+
accumulator.push({ type: :tool_result_end, delta: "" }, &block)
|
|
59
80
|
end
|
|
60
81
|
@current_content_block_type = nil
|
|
61
82
|
when "message_delta"
|
|
@@ -78,7 +99,7 @@ module LlmGateway
|
|
|
78
99
|
private
|
|
79
100
|
|
|
80
101
|
def normalized_usage(usage)
|
|
81
|
-
usage = symbolize_keys
|
|
102
|
+
usage = usage.to_h.symbolize_keys
|
|
82
103
|
|
|
83
104
|
input = token_count(usage[:input_tokens])
|
|
84
105
|
cache_write = token_count(usage[:cache_creation_input_tokens])
|
|
@@ -99,8 +120,10 @@ module LlmGateway
|
|
|
99
120
|
value.to_i
|
|
100
121
|
end
|
|
101
122
|
|
|
102
|
-
def
|
|
103
|
-
|
|
123
|
+
def normalize_content_block_type(type)
|
|
124
|
+
return type unless type&.end_with?("_tool_result")
|
|
125
|
+
|
|
126
|
+
"server_tool_result"
|
|
104
127
|
end
|
|
105
128
|
|
|
106
129
|
def normalize_message_delta(delta)
|
|
@@ -50,7 +50,7 @@ module LlmGateway
|
|
|
50
50
|
module_function
|
|
51
51
|
|
|
52
52
|
def map(options)
|
|
53
|
-
mapped_options = options.
|
|
53
|
+
mapped_options = options.except(*MANAGED_OPTIONS)
|
|
54
54
|
mapped_options[:max_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_TOKENS
|
|
55
55
|
|
|
56
56
|
response_format = options[:response_format]
|