llm.rb 4.9.0 → 4.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +152 -0
  3. data/README.md +178 -31
  4. data/data/anthropic.json +209 -242
  5. data/data/deepseek.json +15 -15
  6. data/data/google.json +553 -403
  7. data/data/openai.json +740 -535
  8. data/data/xai.json +250 -253
  9. data/data/zai.json +157 -90
  10. data/lib/llm/context/deserializer.rb +2 -1
  11. data/lib/llm/context.rb +58 -2
  12. data/lib/llm/contract/completion.rb +7 -0
  13. data/lib/llm/error.rb +4 -0
  14. data/lib/llm/eventhandler.rb +7 -0
  15. data/lib/llm/function/registry.rb +106 -0
  16. data/lib/llm/function/task.rb +39 -0
  17. data/lib/llm/function.rb +12 -7
  18. data/lib/llm/mcp/transport/http/event_handler.rb +66 -0
  19. data/lib/llm/mcp/transport/http.rb +156 -0
  20. data/lib/llm/mcp/transport/stdio.rb +7 -0
  21. data/lib/llm/mcp.rb +74 -30
  22. data/lib/llm/message.rb +9 -2
  23. data/lib/llm/provider.rb +10 -0
  24. data/lib/llm/providers/anthropic/response_adapter/completion.rb +6 -0
  25. data/lib/llm/providers/anthropic/stream_parser.rb +37 -4
  26. data/lib/llm/providers/anthropic.rb +1 -1
  27. data/lib/llm/providers/google/response_adapter/completion.rb +12 -5
  28. data/lib/llm/providers/google/stream_parser.rb +54 -11
  29. data/lib/llm/providers/google/utils.rb +30 -0
  30. data/lib/llm/providers/google.rb +2 -0
  31. data/lib/llm/providers/ollama/response_adapter/completion.rb +6 -0
  32. data/lib/llm/providers/ollama/stream_parser.rb +10 -4
  33. data/lib/llm/providers/ollama.rb +1 -1
  34. data/lib/llm/providers/openai/response_adapter/completion.rb +7 -0
  35. data/lib/llm/providers/openai/response_adapter/responds.rb +84 -10
  36. data/lib/llm/providers/openai/responses/stream_parser.rb +63 -4
  37. data/lib/llm/providers/openai/responses.rb +1 -1
  38. data/lib/llm/providers/openai/stream_parser.rb +68 -4
  39. data/lib/llm/providers/openai.rb +1 -1
  40. data/lib/llm/schema/all_of.rb +31 -0
  41. data/lib/llm/schema/any_of.rb +31 -0
  42. data/lib/llm/schema/one_of.rb +31 -0
  43. data/lib/llm/schema/parser.rb +36 -0
  44. data/lib/llm/schema.rb +45 -8
  45. data/lib/llm/stream/queue.rb +51 -0
  46. data/lib/llm/stream.rb +102 -0
  47. data/lib/llm/tool.rb +53 -47
  48. data/lib/llm/version.rb +1 -1
  49. data/lib/llm.rb +3 -2
  50. data/llm.gemspec +2 -2
  51. metadata +12 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e876d90bb27d23cb36f97ee0edbc1fe8079a09f473b3a38a31fbd0247f3b9035
4
- data.tar.gz: 82fce429783bcc2b8cb3bf8efc4b705ecc4e793cd6b0301ec616b48ec2d68f97
3
+ metadata.gz: a2af34506e099996b451951da8fb892ecdacebe9f29217bbf7a9e3ee3382d942
4
+ data.tar.gz: f49edb6d166ae113618139f0b118f37acbbd001b9b256d76d5c66b2828915a88
5
5
  SHA512:
6
- metadata.gz: e2164d4b134ad12316e1ffa1bdf0d73bc52e14c2514e48bfd6a4963dcb9b3f1bcec932728627a49fec1e4a941e576784d48ae50c59ee174eaa8191dda54123c3
7
- data.tar.gz: 02bf971e89ee97485f83b87a44eef3cdfec0dc4eb5fc7aad3f1383dce34ef3f889a89ae77738933945248c8ea7d76062270cba2fad2d734eba77ea105abffbe4
6
+ metadata.gz: 8dbdbde04bf04fd714ce5ab3689f078f6a77243853bdb7ea287124295b2a5b5878493a36e4ec0c703a10466306f13ca503de9132b2a8a31c2c39b2f721b1bf78
7
+ data.tar.gz: 5bcb9be7c664bbee548cdc305878bc62fe1c8b5ab23d64630719084dab3581b8f4abf875a235a0e33ee05430cda8d69b0b6cc8fce538abafa4e8f85bbbbaead0
data/CHANGELOG.md ADDED
@@ -0,0 +1,152 @@
1
+ # Changelog
2
+
3
+ ## Unreleased
4
+
5
+ Changes since `v4.11.0`.
6
+
7
+ ## v4.11.0
8
+
9
+ Changes since `v4.10.0`.
10
+
11
+ ### Add
12
+
13
+ - Add `LLM::Stream` for richer streaming callbacks, including `on_content`,
14
+ `on_reasoning_content`, and `on_tool_call` for concurrent tool execution.
15
+ - Add `LLM::Stream#wait` as a shortcut for `queue.wait`.
16
+ - Add `LLM::Context#wait` as a shortcut for the configured stream's `wait`.
17
+ - Add `LLM::Context#call(:functions)` as a shortcut for `functions.call`.
18
+ - Add `LLM::Function.registry` and enhanced support for MCP tools in
19
+ `LLM::Tool.registry` for tool resolution during streaming.
20
+ - Add normalized `LLM::Response` for OpenAI Responses, providing `content`,
21
+ `content!`, `messages` / `choices`, `usage`, and `reasoning_content`.
22
+ - Add `mode: :responses` to `LLM::Context` for routing `talk` through the
23
+ Responses API.
24
+ - Add `LLM::Context#returns` for collecting pending tool returns from the context.
25
+ - Add persistent HTTP connection pooling for repeated MCP tool calls via
26
+ `LLM.mcp(http: ...).persist!`.
27
+ - Add explicit MCP transport constructors via `LLM::MCP.stdio(...)` and
28
+ `LLM::MCP.http(...)`.
29
+
30
+ ### Fix
31
+
32
+ - Fix Google tool-call handling by synthesizing stable ids when Gemini does
33
+ not provide a direct tool-call id.
34
+
35
+ ## v4.10.0
36
+
37
+ Changes since `v4.9.0`.
38
+
39
+ ### Add
40
+
41
+ - Add HTTP transport for MCP with `LLM::MCP::Transport::HTTP` for remote servers
42
+ - Add JSON Schema union types (`any_of`, `all_of`, `one_of`) with parser integration
43
+ - Add JSON Schema type array union support (e.g., `"type": ["object", "null"]`)
44
+ - Add JSON Schema type inference from `const`, `enum`, or `default` fields
45
+
46
+ ### Change
47
+
48
+ - Update `LLM::MCP` constructor for exclusive `http:` or `stdio:` transport
49
+ - Update `LLM::MCP` documentation for HTTP transport support
50
+
51
+ ## v4.9.0
52
+
53
+ Changes since `v4.8.0`.
54
+
55
+ ### Add
56
+
57
+ - Add fiber-based concurrency with `LLM::Function::FiberGroup` and
58
+ `LLM::Function::TaskGroup` classes for lightweight async execution.
59
+ - Add `:thread`, `:task`, and `:fiber` strategy parameter to
60
+ `LLM::Function#spawn` for explicit concurrency control.
61
+ - Add stdio MCP client support, including remote tool discovery and
62
+ invocation through `LLM.mcp`, `LLM::Context`, and existing function/tool
63
+ APIs.
64
+ - Add model registry support via `LLM::Registry`, including model
65
+ metadata lookup, pricing, modalities, limits, and cost estimation.
66
+ - Add context access to a model context window via
67
+ `LLM::Context#context_window`.
68
+ - Add tracking of defined tools in the tool registry.
69
+ - Add `LLM::Schema::Enum`, enabling `Enum[...]` as a schema/tool
70
+ parameter type.
71
+ - Add top-level Anthropic system instruction support using Anthropic's
72
+ provider-specific request format.
73
+ - Add richer tracing hooks and extra metadata support for
74
+ LangSmith/OpenTelemetry-style traces.
75
+ - Add rack/websocket and Relay-related example work, including MCP-focused
76
+ examples.
77
+ - Add concurrent tool execution with `LLM::Function#spawn`,
78
+ `LLM::Function::Array` (`call`, `wait`, `spawn`), and
79
+ `LLM::Function::ThreadGroup`.
80
+ - Add `LLM::Function::ThreadGroup#alive?` method for non-blocking
81
+ monitoring of concurrent tool execution.
82
+ - Add `LLM::Function::ThreadGroup#value` alias for `ThreadGroup#wait` for
83
+ consistency with Ruby's `Thread#value`.
84
+
85
+ ### Change
86
+
87
+ - Rename `LLM::Session` to `LLM::Context` throughout the codebase to better
88
+ reflect the concept of a stateful interaction environment.
89
+ - Rename `LLM::Gemini` to `LLM::Google` to better reflect provider naming.
90
+ - Standardize model objects across providers around a smaller common
91
+ interface.
92
+ - Switch registry cost internals from `LLM::Estimate` to `LLM::Cost`.
93
+ - Update image generation defaults so OpenAI and xAI consistently return
94
+ base64-encoded image data by default.
95
+ - Update `LLM::Bot` deprecation warning from v5.0 to v6.0, giving users
96
+ more time to migrate to `LLM::Context`.
97
+ - Rework the README and screencast documentation to better cover MCP,
98
+ registry, contexts, prompts, concurrency, providers, and example flow.
99
+ - Expand the README with architecture, production, and provider guidance
100
+ while improving readability and example ordering.
101
+
102
+ ### Fix
103
+
104
+ - Fix local schema `$ref` resolution in `LLM::Schema::Parser`.
105
+ - Fix multiple MCP issues around stdio env handling, request IDs, registry
106
+ interaction, tool registration, and filtering of MCP tools from the
107
+ standard tool registry.
108
+ - Fix stream parsing issues, including chunk-splitting bugs and safer
109
+ handling of streamed error responses.
110
+ - Fix prompt handling across contexts, agents, and provider adapters so
111
+ prompt turns remain consistent in history and completions.
112
+ - Fix several tool/context issues, including function return wrapping,
113
+ tool lookup after deserialization, unnamed subclass filtering, and
114
+ thread-safety around tool registry mutations.
115
+ - Fix Google tool-call handling to preserve `thoughtSignature`.
116
+ - Fix `LLM::Tracer::Logger` argument handling.
117
+ - Fix packaging/docs issues such as registry files in the gemspec and
118
+ stale provider docs.
119
+ - Fix Google provider handling of `nil` function IDs during context
120
+ deserialization.
121
+ - Fix MCP stdio transport by increasing poll timeout for better
122
+ reliability.
123
+ - Fix Google provider to properly cast non-Hash tool results into Hash
124
+ format for API compatibility.
125
+ - Fix schema parser to support recursive normalization of `Array`,
126
+ `LLM::Object`, and nested structures.
127
+ - Fix DeepSeek provider to tolerate malformed tool arguments.
128
+ - Fix `LLM::Function::TaskGroup#alive?` to properly delegate to
129
+ `Async::Task#alive?`.
130
+ - Fix various RuboCop errors across the codebase.
131
+ - Fix DeepSeek provider to handle JSON that might be valid but unexpected.
132
+
133
+ ### Notes
134
+
135
+ Notable merged work in this range includes:
136
+
137
+ - `feat(function): add fiber-based concurrency for async environments (#64)`
138
+ - `feat(mcp): add stdio MCP support (#134)`
139
+ - `Add LLM::Registry + cost support (#133)`
140
+ - `Consistent model objects across providers (#131)`
141
+ - `Add rack + websocket example (#130)`
142
+ - `feat(gemspec): add changelog URI (#136)`
143
+ - `feat(function): alias ThreadGroup#wait as ThreadGroup#value (#62)`
144
+ - README and screencast refresh across `#66`, `#67`, `#68`, `#71`, and
145
+ `#72`
146
+ - `chore(bot): update deprecation warning from v5.0 to v6.0`
147
+ - `fix(deepseek): tolerate malformed tool arguments`
148
+ - `refactor(context): Rename Session as Context (#70)`
149
+
150
+ Comparison base:
151
+ - Latest tag: `v4.8.0` (`6468f2426ee125823b7ae43b4af507b125f96ffc`)
152
+ - HEAD used for this changelog: `915c48da6fda9bef1554ff613947a6ce26d382e3`
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  <p align="center">
5
5
  <a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
6
6
  <a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
7
- <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.9.0-green.svg?" alt="Version"></a>
7
+ <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.11.0-green.svg?" alt="Version"></a>
8
8
  </p>
9
9
 
10
10
  ## About
@@ -32,6 +32,11 @@ llm.rb is built around the state and execution model around them:
32
32
  They hold history, tools, schema, usage, cost, persistence, and execution state.
33
33
  - **Tool execution is explicit** <br>
34
34
  Run local, provider-native, and MCP tools sequentially or concurrently with threads, fibers, or async tasks.
35
+ - **Run tools while streaming** <br>
36
+ Start tool work while a response is still streaming instead of waiting for the turn to finish. <br>
37
+ This lets tool latency overlap with model output and is one of llm.rb's strongest execution features.
38
+ - **HTTP MCP can reuse connections** <br>
39
+ Opt into persistent HTTP pooling for repeated remote MCP tool calls with `persist!`.
35
40
  - **One API across providers and capabilities** <br>
36
41
  The same model covers chat, files, images, audio, embeddings, vector stores, and more.
37
42
  - **Thread-safe where it matters** <br>
@@ -75,12 +80,14 @@ llm.rb is built in layers, each providing explicit control:
75
80
  llm.rb provides a complete set of primitives for building LLM-powered systems:
76
81
 
77
82
  - **Chat & Contexts** — stateless and stateful interactions with persistence
78
- - **Streaming** — real-time responses across providers
83
+ - **Streaming** — real-time responses across providers, including structured stream callbacks
84
+ - **Reasoning Support** — full stream, message, and response support when providers expose reasoning
79
85
  - **Tool Calling** — define and execute functions with automatic orchestration
86
+ - **Run Tools While Streaming** — begin tool work before the model finishes its turn
80
87
  - **Concurrent Execution** — threads, async tasks, and fibers
81
88
  - **Agents** — reusable, preconfigured assistants with tool auto-execution
82
89
  - **Structured Outputs** — JSON schema-based responses
83
- - **MCP Support** — integrate external tool servers dynamically
90
+ - **MCP Support** — integrate external tool servers dynamically over stdio or HTTP
84
91
  - **Multimodal Inputs** — text, images, audio, documents, URLs
85
92
  - **Audio** — text-to-speech, transcription, translation
86
93
  - **Images** — generation and editing
@@ -93,6 +100,50 @@ llm.rb provides a complete set of primitives for building LLM-powered systems:
93
100
 
94
101
  ## Quick Start
95
102
 
103
+ #### Run Tools While Streaming
104
+
105
+ llm.rb can start tool execution from streamed tool-call events before the
106
+ assistant turn is fully finished. That means tool latency can overlap with
107
+ streaming output instead of happening strictly after it. If your model emits
108
+ tool calls early, this can noticeably reduce end-to-end latency for real
109
+ systems.
110
+
111
+ This is different from plain concurrent tool execution. The tool starts while
112
+ the response is still arriving, not after the turn has fully completed.
113
+
114
+ For example:
115
+
116
+ ```ruby
117
+ #!/usr/bin/env ruby
118
+ require "llm"
119
+
120
+ class System < LLM::Tool
121
+ name "system"
122
+ description "Run a shell command"
123
+ params { _1.object(command: _1.string.required) }
124
+
125
+ def call(command:)
126
+ {success: Kernel.system(command)}
127
+ end
128
+ end
129
+
130
+ class Stream < LLM::Stream
131
+ def on_content(content)
132
+ print content
133
+ end
134
+
135
+ def on_tool_call(tool, error)
136
+ queue << (error || tool.spawn(:thread))
137
+ end
138
+ end
139
+
140
+ llm = LLM.openai(key: ENV["KEY"])
141
+ ctx = LLM::Context.new(llm, stream: Stream.new, tools: [System])
142
+
143
+ ctx.talk("Run `date` and tell me what command you ran.")
144
+ ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
145
+ ```
146
+
96
147
  #### Concurrent Tools
97
148
 
98
149
  llm.rb provides explicit concurrency control for tool execution. The
@@ -112,7 +163,7 @@ ctx = LLM::Context.new(llm, stream: $stdout, tools: [FetchWeather, FetchNews, Fe
112
163
 
113
164
  # Execute multiple independent tools concurrently
114
165
  ctx.talk("Summarize the weather, headlines, and stock price.")
115
- ctx.talk(ctx.functions.wait(:thread)) while ctx.functions.any?
166
+ ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
116
167
  ```
117
168
 
118
169
  #### MCP
@@ -120,34 +171,59 @@ ctx.talk(ctx.functions.wait(:thread)) while ctx.functions.any?
120
171
  llm.rb integrates with the Model Context Protocol (MCP) to dynamically discover
121
172
  and use tools from external servers. This example starts a filesystem MCP
122
173
  server over stdio and makes its tools available to a context, enabling the LLM
123
- to interact with the local file system through a standardized interface:
174
+ to interact with the local file system through a standardized interface.
175
+ Use `LLM::MCP.stdio` or `LLM::MCP.http` when you want to make the transport
176
+ explicit. Like `LLM::Context`, an MCP client is stateful and should remain
177
+ isolated to a single thread:
124
178
 
125
179
  ```ruby
126
180
  #!/usr/bin/env ruby
127
181
  require "llm"
128
182
 
129
183
  llm = LLM.openai(key: ENV["KEY"])
130
- mcp = LLM.mcp(stdio: {argv: ["npx", "-y", "@modelcontextprotocol/server-filesystem", Dir.pwd]})
184
+ mcp = LLM::MCP.stdio(argv: ["npx", "-y", "@modelcontextprotocol/server-filesystem", Dir.pwd])
131
185
 
132
186
  begin
133
187
  mcp.start
134
188
  ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
135
189
  ctx.talk("List the directories in this project.")
136
- ctx.talk(ctx.functions.call) while ctx.functions.any?
190
+ ctx.talk(ctx.call(:functions)) while ctx.functions.any?
191
+ ensure
192
+ mcp.stop
193
+ end
194
+ ```
195
+
196
+ You can also connect to an MCP server over HTTP. This is useful when the
197
+ server already runs remotely and exposes MCP through a URL instead of a local
198
+ process. If you expect repeated tool calls, use `persist!` to reuse a
199
+ process-wide HTTP connection pool. This requires the optional
200
+ `net-http-persistent` gem:
201
+
202
+ ```ruby
203
+ #!/usr/bin/env ruby
204
+ require "llm"
205
+
206
+ llm = LLM.openai(key: ENV["KEY"])
207
+ mcp = LLM::MCP.http(
208
+ url: "https://api.githubcopilot.com/mcp/",
209
+ headers: {"Authorization" => "Bearer #{ENV.fetch("GITHUB_PAT")}"}
210
+ ).persist!
211
+
212
+ begin
213
+ mcp.start
214
+ ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
215
+ ctx.talk("List the available GitHub MCP toolsets.")
216
+ ctx.talk(ctx.call(:functions)) while ctx.functions.any?
137
217
  ensure
138
218
  mcp.stop
139
219
  end
140
220
  ```
141
221
 
142
- #### Streaming Chat
222
+ #### Simple Streaming
143
223
 
144
- This example demonstrates llm.rb's streaming support. The `stream: $stdout`
145
- parameter tells the context to write responses incrementally as they arrive
146
- from the LLM. The `Context` object manages the conversation history, and
147
- `talk()` sends your input while automatically appending both your message and
148
- the LLM's response to the context. Streams accept any object with `#<<`,
149
- giving you flexibility to pipe output to files, network sockets, or custom
150
- buffers:
224
+ At the simplest level, any object that implements `#<<` can receive visible
225
+ output as it arrives. This works with `$stdout`, `StringIO`, files, sockets,
226
+ and other Ruby IO-style objects:
151
227
 
152
228
  ```ruby
153
229
  #!/usr/bin/env ruby
@@ -162,14 +238,80 @@ loop do
162
238
  end
163
239
  ```
164
240
 
241
+ #### Advanced Streaming
242
+
243
+ llm.rb also supports the [`LLM::Stream`](lib/llm/stream.rb) interface for
244
+ structured streaming events:
245
+
246
+ - `on_content` for visible assistant output
247
+ - `on_reasoning_content` for separate reasoning output
248
+ - `on_tool_call` for streamed tool-call notifications
249
+
250
+ Subclass [`LLM::Stream`](lib/llm/stream.rb) when you want features like
251
+ `queue` and `wait`, or implement the same methods on your own object. Keep these
252
+ callbacks fast: they run inline with the parser.
253
+
254
+ `on_tool_call` lets tools start before the model finishes its turn, for
255
+ example with `tool.spawn(:thread)`, `tool.spawn(:fiber)`, or
256
+ `tool.spawn(:task)`. This is the mechanism behind running tools while
257
+ streaming.
258
+
259
+ If a stream cannot execute a tool, `error` is an `LLM::Function::Return` that
260
+ communicates the failure back to the LLM. That lets the tool-call path recover
261
+ and keeps the session alive. It also leaves control in the callback: it can
262
+ send `error`, spawn the tool when `error == nil`, or handle the situation
263
+ however it sees fit.
264
+
265
+ In normal use this should be rare, since `on_tool_call` is usually called with
266
+ a resolved tool and `error == nil`. To resolve a tool call, the tool must be
267
+ found in `LLM::Function.registry`. That covers `LLM::Tool` subclasses,
268
+ including MCP tools, but not `LLM.function` closures, which are excluded
269
+ because they may be bound to local state:
270
+
271
+ ```ruby
272
+ #!/usr/bin/env ruby
273
+ require "llm"
274
+ # Assume `System < LLM::Tool` is already defined.
275
+
276
+ class Stream < LLM::Stream
277
+ attr_reader :content, :reasoning_content
278
+
279
+ def initialize
280
+ @content = +""
281
+ @reasoning_content = +""
282
+ end
283
+
284
+ def on_content(content)
285
+ @content << content
286
+ print content
287
+ end
288
+
289
+ def on_reasoning_content(content)
290
+ @reasoning_content << content
291
+ end
292
+
293
+ def on_tool_call(tool, error)
294
+ queue << (error || tool.spawn(:thread))
295
+ end
296
+ end
297
+
298
+ llm = LLM.openai(key: ENV["KEY"])
299
+ ctx = LLM::Context.new(llm, stream: Stream.new, tools: [System])
300
+
301
+ ctx.talk("Run `date` and `uname -a`.")
302
+ while ctx.functions.any?
303
+ ctx.talk(ctx.wait(:thread))
304
+ end
305
+ ```
306
+
165
307
  #### Tool Calling
166
308
 
167
309
  Tools in llm.rb can be defined as classes inheriting from `LLM::Tool` or as
168
310
  closures using `LLM.function`. When the LLM requests a tool call, the context
169
311
  stores `Function` objects in `ctx.functions`. The `call()` method executes all
170
- pending functions and returns their results to the LLM. Tools support
171
- structured parameters with JSON Schema validation and automatically adapt to
172
- each provider's API format (OpenAI, Anthropic, Google, etc.):
312
+ pending functions and returns their results to the LLM. Tools describe
313
+ structured parameters with JSON Schema and adapt those definitions to each
314
+ provider's tool-calling format (OpenAI, Anthropic, Google, etc.):
173
315
 
174
316
  ```ruby
175
317
  #!/usr/bin/env ruby
@@ -188,18 +330,17 @@ end
188
330
  llm = LLM.openai(key: ENV["KEY"])
189
331
  ctx = LLM::Context.new(llm, stream: $stdout, tools: [System])
190
332
  ctx.talk("Run `date`.")
191
- ctx.talk(ctx.functions.call) while ctx.functions.any?
333
+ ctx.talk(ctx.call(:functions)) while ctx.functions.any?
192
334
  ```
193
335
 
194
336
  #### Structured Outputs
195
337
 
196
- The `LLM::Schema` system lets you define JSON schemas that LLMs must follow.
338
+ The `LLM::Schema` system lets you define JSON schemas for structured outputs.
197
339
  Schemas can be defined as classes with `property` declarations or built
198
340
  programmatically using a fluent interface. When you pass a schema to a context,
199
- llm.rb automatically configures the provider's JSON mode and validates
200
- responses against your schema. The `content!` method returns the parsed JSON
201
- object, while errors are captured as structured data rather than raising
202
- exceptions:
341
+ llm.rb adapts it into the provider's structured-output format when that
342
+ provider supports one. The `content!` method then parses the assistant's JSON
343
+ response into a Ruby object:
203
344
 
204
345
  ```ruby
205
346
  #!/usr/bin/env ruby
@@ -209,6 +350,7 @@ require "pp"
209
350
  class Report < LLM::Schema
210
351
  property :category, Enum["performance", "security", "outage"], "Report category", required: true
211
352
  property :summary, String, "Short summary", required: true
353
+ property :impact, OneOf[String, Integer], "Primary impact, as text or a count", required: true
212
354
  property :services, Array[String], "Impacted services", required: true
213
355
  property :timestamp, String, "When it happened", optional: true
214
356
  end
@@ -221,6 +363,7 @@ pp res.content!
221
363
  # {
222
364
  # "category" => "performance",
223
365
  # "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
366
+ # "impact" => "5% request timeouts",
224
367
  # "services" => ["Database"],
225
368
  # "timestamp" => "2024-06-05T10:42:00Z"
226
369
  # }
@@ -379,20 +522,24 @@ puts "Cost: $#{model_info.cost.input}/1M input tokens"
379
522
 
380
523
  #### Responses API
381
524
 
382
- llm.rb also supports OpenAI's Responses API through `llm.responses` and
383
- `ctx.respond`. This API can maintain response state server-side and can reduce
384
- how much conversation state needs to be sent on each turn:
525
+ llm.rb also supports OpenAI's Responses API through `LLM::Context` with
526
+ `mode: :responses`. The important switch is `store:`. With `store: false`, the
527
+ Responses API stays stateless while still using the Responses endpoint, which
528
+ is useful for models or features that are only available through the Responses
529
+ API. With `store: true`, OpenAI can keep
530
+ response state server-side and reduce how much conversation state needs to be
531
+ sent on each turn:
385
532
 
386
533
  ```ruby
387
534
  #!/usr/bin/env ruby
388
535
  require "llm"
389
536
 
390
537
  llm = LLM.openai(key: ENV["KEY"])
391
- ctx = LLM::Context.new(llm)
538
+ ctx = LLM::Context.new(llm, mode: :responses, store: false)
392
539
 
393
- ctx.respond("Your task is to answer the user's questions", role: :developer)
394
- res = ctx.respond("What is the capital of France?")
395
- puts res.output_text
540
+ ctx.talk("Your task is to answer the user's questions", role: :developer)
541
+ res = ctx.talk("What is the capital of France?")
542
+ puts res.content
396
543
  ```
397
544
 
398
545
  #### Context Persistence