llm.rb 4.10.0 → 4.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +152 -0
  3. data/README.md +265 -113
  4. data/data/anthropic.json +209 -242
  5. data/data/deepseek.json +15 -15
  6. data/data/google.json +553 -403
  7. data/data/openai.json +740 -535
  8. data/data/xai.json +250 -253
  9. data/data/zai.json +157 -90
  10. data/lib/llm/context/deserializer.rb +2 -1
  11. data/lib/llm/context.rb +58 -2
  12. data/lib/llm/contract/completion.rb +7 -0
  13. data/lib/llm/error.rb +4 -0
  14. data/lib/llm/eventhandler.rb +7 -0
  15. data/lib/llm/function/registry.rb +106 -0
  16. data/lib/llm/function/task.rb +39 -0
  17. data/lib/llm/function.rb +12 -7
  18. data/lib/llm/mcp/transport/http.rb +40 -6
  19. data/lib/llm/mcp/transport/stdio.rb +7 -0
  20. data/lib/llm/mcp.rb +54 -24
  21. data/lib/llm/message.rb +9 -2
  22. data/lib/llm/provider.rb +10 -0
  23. data/lib/llm/providers/anthropic/response_adapter/completion.rb +6 -0
  24. data/lib/llm/providers/anthropic/stream_parser.rb +37 -4
  25. data/lib/llm/providers/anthropic.rb +1 -1
  26. data/lib/llm/providers/google/response_adapter/completion.rb +12 -5
  27. data/lib/llm/providers/google/stream_parser.rb +54 -11
  28. data/lib/llm/providers/google/utils.rb +30 -0
  29. data/lib/llm/providers/google.rb +2 -0
  30. data/lib/llm/providers/ollama/response_adapter/completion.rb +6 -0
  31. data/lib/llm/providers/ollama/stream_parser.rb +10 -4
  32. data/lib/llm/providers/ollama.rb +1 -1
  33. data/lib/llm/providers/openai/response_adapter/completion.rb +7 -0
  34. data/lib/llm/providers/openai/response_adapter/responds.rb +84 -10
  35. data/lib/llm/providers/openai/responses/stream_parser.rb +63 -4
  36. data/lib/llm/providers/openai/responses.rb +1 -1
  37. data/lib/llm/providers/openai/stream_parser.rb +68 -4
  38. data/lib/llm/providers/openai.rb +1 -1
  39. data/lib/llm/stream/queue.rb +51 -0
  40. data/lib/llm/stream.rb +102 -0
  41. data/lib/llm/tool.rb +50 -45
  42. data/lib/llm/tracer/telemetry.rb +2 -2
  43. data/lib/llm/version.rb +1 -1
  44. data/lib/llm.rb +3 -2
  45. data/llm.gemspec +2 -2
  46. metadata +7 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7fc9e03f0a1b44775f414d310b202dcc68c1a8ebbb4bd7e6e0517902551ffbdf
4
- data.tar.gz: 40a11ffd6d8f91ec0babbfb442174e68dac38fc2183922ff5994ff33670c572c
3
+ metadata.gz: f4c449483ce7a3b53411760d6376157fed3e23b4f013f23ae397255398bef368
4
+ data.tar.gz: a9a9c82b107cde72edfe6fe5f68ea7b1ea5e493314883d101c453a94db81b601
5
5
  SHA512:
6
- metadata.gz: ac72f357d340917b99462f12f3a18d30039b3c4f34e8cbb1686da84f3c75b0d67919fc2d90732268301fa45a25d9e9d721d5e1f33a46a387196349878e384dd0
7
- data.tar.gz: 92d7f611de8229d7f5cbe065169f0688940753eb1bffcc0ef907fd1d66ec18be840848b692aca81d24b1dfc6068011b644ed9be890e863ed8fa5d51194f089ed
6
+ metadata.gz: 71a389b2fe654cfd053f45bd749c34b96c9d89ac60e984960f4a2720896588ba39056a3a92ab75a429572cd099961d9f3c02474f7dc43460b59866e41d8b5f28
7
+ data.tar.gz: 4532ec55176751b32ed21b281f2f71395dcd32cdf318973a751decf171af0a9e5f3f75b75871542c578fd9a2a134f8fc5cbf6a54b1df3b2dbe0c47745122b900
data/CHANGELOG.md ADDED
@@ -0,0 +1,152 @@
1
+ # Changelog
2
+
3
+ ## Unreleased
4
+
5
+ Changes since `v4.11.1`.
6
+
7
+ ## v4.11.1
8
+
9
+ Changes since `v4.10.0`.
10
+
11
+ ### Add
12
+
13
+ - Add `LLM::Stream` for richer streaming callbacks, including `on_content`,
14
+ `on_reasoning_content`, and `on_tool_call` for concurrent tool execution.
15
+ - Add `LLM::Stream#wait` as a shortcut for `queue.wait`.
16
+ - Add `LLM::Context#wait` as a shortcut for the configured stream's `wait`.
17
+ - Add `LLM::Context#call(:functions)` as a shortcut for `functions.call`.
18
+ - Add `LLM::Function.registry` and enhanced support for MCP tools in
19
+ `LLM::Tool.registry` for tool resolution during streaming.
20
+ - Add normalized `LLM::Response` for OpenAI Responses, providing `content`,
21
+ `content!`, `messages` / `choices`, `usage`, and `reasoning_content`.
22
+ - Add `mode: :responses` to `LLM::Context` for routing `talk` through the
23
+ Responses API.
24
+ - Add `LLM::Context#returns` for collecting pending tool returns from the context.
25
+ - Add persistent HTTP connection pooling for repeated MCP tool calls via
26
+ `LLM.mcp(http: ...).persist!`.
27
+ - Add explicit MCP transport constructors via `LLM::MCP.stdio(...)` and
28
+ `LLM::MCP.http(...)`.
29
+
30
+ ### Fix
31
+
32
+ - Fix Google tool-call handling by synthesizing stable ids when Gemini does
33
+ not provide a direct tool-call id.
34
+
35
+ ## v4.10.0
36
+
37
+ Changes since `v4.9.0`.
38
+
39
+ ### Add
40
+
41
+ - Add HTTP transport for MCP with `LLM::MCP::Transport::HTTP` for remote servers
42
+ - Add JSON Schema union types (`any_of`, `all_of`, `one_of`) with parser integration
43
+ - Add JSON Schema type array union support (e.g., `"type": ["object", "null"]`)
44
+ - Add JSON Schema type inference from `const`, `enum`, or `default` fields
45
+
46
+ ### Change
47
+
48
+ - Update `LLM::MCP` constructor for exclusive `http:` or `stdio:` transport
49
+ - Update `LLM::MCP` documentation for HTTP transport support
50
+
51
+ ## v4.9.0
52
+
53
+ Changes since `v4.8.0`.
54
+
55
+ ### Add
56
+
57
+ - Add fiber-based concurrency with `LLM::Function::FiberGroup` and
58
+ `LLM::Function::TaskGroup` classes for lightweight async execution.
59
+ - Add `:thread`, `:task`, and `:fiber` strategy parameter to
60
+ `LLM::Function#spawn` for explicit concurrency control.
61
+ - Add stdio MCP client support, including remote tool discovery and
62
+ invocation through `LLM.mcp`, `LLM::Context`, and existing function/tool
63
+ APIs.
64
+ - Add model registry support via `LLM::Registry`, including model
65
+ metadata lookup, pricing, modalities, limits, and cost estimation.
66
+ - Add context access to a model context window via
67
+ `LLM::Context#context_window`.
68
+ - Add tracking of defined tools in the tool registry.
69
+ - Add `LLM::Schema::Enum`, enabling `Enum[...]` as a schema/tool
70
+ parameter type.
71
+ - Add top-level Anthropic system instruction support using Anthropic's
72
+ provider-specific request format.
73
+ - Add richer tracing hooks and extra metadata support for
74
+ LangSmith/OpenTelemetry-style traces.
75
+ - Add rack/websocket and Relay-related example work, including MCP-focused
76
+ examples.
77
+ - Add concurrent tool execution with `LLM::Function#spawn`,
78
+ `LLM::Function::Array` (`call`, `wait`, `spawn`), and
79
+ `LLM::Function::ThreadGroup`.
80
+ - Add `LLM::Function::ThreadGroup#alive?` method for non-blocking
81
+ monitoring of concurrent tool execution.
82
+ - Add `LLM::Function::ThreadGroup#value` alias for `ThreadGroup#wait` for
83
+ consistency with Ruby's `Thread#value`.
84
+
85
+ ### Change
86
+
87
+ - Rename `LLM::Session` to `LLM::Context` throughout the codebase to better
88
+ reflect the concept of a stateful interaction environment.
89
+ - Rename `LLM::Gemini` to `LLM::Google` to better reflect provider naming.
90
+ - Standardize model objects across providers around a smaller common
91
+ interface.
92
+ - Switch registry cost internals from `LLM::Estimate` to `LLM::Cost`.
93
+ - Update image generation defaults so OpenAI and xAI consistently return
94
+ base64-encoded image data by default.
95
+ - Update `LLM::Bot` deprecation warning from v5.0 to v6.0, giving users
96
+ more time to migrate to `LLM::Context`.
97
+ - Rework the README and screencast documentation to better cover MCP,
98
+ registry, contexts, prompts, concurrency, providers, and example flow.
99
+ - Expand the README with architecture, production, and provider guidance
100
+ while improving readability and example ordering.
101
+
102
+ ### Fix
103
+
104
+ - Fix local schema `$ref` resolution in `LLM::Schema::Parser`.
105
+ - Fix multiple MCP issues around stdio env handling, request IDs, registry
106
+ interaction, tool registration, and filtering of MCP tools from the
107
+ standard tool registry.
108
+ - Fix stream parsing issues, including chunk-splitting bugs and safer
109
+ handling of streamed error responses.
110
+ - Fix prompt handling across contexts, agents, and provider adapters so
111
+ prompt turns remain consistent in history and completions.
112
+ - Fix several tool/context issues, including function return wrapping,
113
+ tool lookup after deserialization, unnamed subclass filtering, and
114
+ thread-safety around tool registry mutations.
115
+ - Fix Google tool-call handling to preserve `thoughtSignature`.
116
+ - Fix `LLM::Tracer::Logger` argument handling.
117
+ - Fix packaging/docs issues such as registry files in the gemspec and
118
+ stale provider docs.
119
+ - Fix Google provider handling of `nil` function IDs during context
120
+ deserialization.
121
+ - Fix MCP stdio transport by increasing poll timeout for better
122
+ reliability.
123
+ - Fix Google provider to properly cast non-Hash tool results into Hash
124
+ format for API compatibility.
125
+ - Fix schema parser to support recursive normalization of `Array`,
126
+ `LLM::Object`, and nested structures.
127
+ - Fix DeepSeek provider to tolerate malformed tool arguments.
128
+ - Fix `LLM::Function::TaskGroup#alive?` to properly delegate to
129
+ `Async::Task#alive?`.
130
+ - Fix various RuboCop errors across the codebase.
131
+ - Fix DeepSeek provider to handle JSON that might be valid but unexpected.
132
+
133
+ ### Notes
134
+
135
+ Notable merged work in this range includes:
136
+
137
+ - `feat(function): add fiber-based concurrency for async environments (#64)`
138
+ - `feat(mcp): add stdio MCP support (#134)`
139
+ - `Add LLM::Registry + cost support (#133)`
140
+ - `Consistent model objects across providers (#131)`
141
+ - `Add rack + websocket example (#130)`
142
+ - `feat(gemspec): add changelog URI (#136)`
143
+ - `feat(function): alias ThreadGroup#wait as ThreadGroup#value (#62)`
144
+ - README and screencast refresh across `#66`, `#67`, `#68`, `#71`, and
145
+ `#72`
146
+ - `chore(bot): update deprecation warning from v5.0 to v6.0`
147
+ - `fix(deepseek): tolerate malformed tool arguments`
148
+ - `refactor(context): Rename Session as Context (#70)`
149
+
150
+ Comparison base:
151
+ - Latest tag: `v4.8.0` (`6468f2426ee125823b7ae43b4af507b125f96ffc`)
152
+ - HEAD used for this changelog: `915c48da6fda9bef1554ff613947a6ce26d382e3`
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  <p align="center">
5
5
  <a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
6
6
  <a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
7
- <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.10.0-green.svg?" alt="Version"></a>
7
+ <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.11.1-green.svg?" alt="Version"></a>
8
8
  </p>
9
9
 
10
10
  ## About
@@ -30,8 +30,16 @@ llm.rb is built around the state and execution model around them:
30
30
 
31
31
  - **Contexts are central** <br>
32
32
  They hold history, tools, schema, usage, cost, persistence, and execution state.
33
+ - **Contexts can be serialized** <br>
34
+ A context can be serialized to JSON and stored on disk, in a database, in a
35
+ job queue, or anywhere else your application needs to persist state.
33
36
  - **Tool execution is explicit** <br>
34
37
  Run local, provider-native, and MCP tools sequentially or concurrently with threads, fibers, or async tasks.
38
+ - **Run tools while streaming** <br>
39
+ Start tool work while a response is still streaming instead of waiting for the turn to finish. <br>
40
+ This overlaps tool latency with model output and exposes streamed tool-call events for introspection, making it one of llm.rb's strongest execution features.
41
+ - **HTTP MCP can reuse connections** <br>
42
+ Opt into persistent HTTP pooling for repeated remote MCP tool calls with `persist!`.
35
43
  - **One API across providers and capabilities** <br>
36
44
  The same model covers chat, files, images, audio, embeddings, vector stores, and more.
37
45
  - **Thread-safe where it matters** <br>
@@ -75,12 +83,14 @@ llm.rb is built in layers, each providing explicit control:
75
83
  llm.rb provides a complete set of primitives for building LLM-powered systems:
76
84
 
77
85
  - **Chat & Contexts** — stateless and stateful interactions with persistence
78
- - **Streaming** — real-time responses across providers
86
+ - **Streaming** — real-time responses across providers, including structured stream callbacks
87
+ - **Reasoning Support** — full stream, message, and response support when providers expose reasoning
79
88
  - **Tool Calling** — define and execute functions with automatic orchestration
89
+ - **Run Tools While Streaming** — begin tool work before the model finishes its turn
80
90
  - **Concurrent Execution** — threads, async tasks, and fibers
81
91
  - **Agents** — reusable, preconfigured assistants with tool auto-execution
82
92
  - **Structured Outputs** — JSON schema-based responses
83
- - **MCP Support** — integrate external tool servers dynamically
93
+ - **MCP Support** — integrate external tool servers dynamically over stdio or HTTP
84
94
  - **Multimodal Inputs** — text, images, audio, documents, URLs
85
95
  - **Audio** — text-to-speech, transcription, translation
86
96
  - **Images** — generation and editing
@@ -93,163 +103,234 @@ llm.rb provides a complete set of primitives for building LLM-powered systems:
93
103
 
94
104
  ## Quick Start
95
105
 
96
- #### Concurrent Tools
106
+ #### Simple Streaming
97
107
 
98
- llm.rb provides explicit concurrency control for tool execution. The
99
- `wait(:thread)` method spawns each pending function in its own thread and waits
100
- for all to complete. You can also use `:fiber` for cooperative multitasking or
101
- `:task` for async/await patterns (requires the `async` gem). The context
102
- automatically collects all results and reports them back to the LLM in a
103
- single turn, maintaining conversation flow while parallelizing independent
104
- operations:
108
+ At the simplest level, any object that implements `#<<` can receive visible
109
+ output as it arrives. This works with `$stdout`, `StringIO`, files, sockets,
110
+ and other Ruby IO-style objects.
111
+
112
+ For more control, llm.rb also supports advanced streaming patterns through
113
+ [`LLM::Stream`](lib/llm/stream.rb). See [Advanced Streaming](#advanced-streaming)
114
+ for a structured callback-based example:
105
115
 
106
116
  ```ruby
107
117
  #!/usr/bin/env ruby
108
118
  require "llm"
109
119
 
110
120
  llm = LLM.openai(key: ENV["KEY"])
111
- ctx = LLM::Context.new(llm, stream: $stdout, tools: [FetchWeather, FetchNews, FetchStock])
112
-
113
- # Execute multiple independent tools concurrently
114
- ctx.talk("Summarize the weather, headlines, and stock price.")
115
- ctx.talk(ctx.functions.wait(:thread)) while ctx.functions.any?
121
+ ctx = LLM::Context.new(llm, stream: $stdout)
122
+ loop do
123
+ print "> "
124
+ ctx.talk(STDIN.gets || break)
125
+ puts
126
+ end
116
127
  ```
117
128
 
118
- #### MCP
129
+ #### Structured Outputs
119
130
 
120
- llm.rb integrates with the Model Context Protocol (MCP) to dynamically discover
121
- and use tools from external servers. This example starts a filesystem MCP
122
- server over stdio and makes its tools available to a context, enabling the LLM
123
- to interact with the local file system through a standardized interface:
131
+ The `LLM::Schema` system lets you define JSON schemas for structured outputs.
132
+ Schemas can be defined as classes with `property` declarations or built
133
+ programmatically using a fluent interface. When you pass a schema to a context,
134
+ llm.rb adapts it into the provider's structured-output format when that
135
+ provider supports one. The `content!` method then parses the assistant's JSON
136
+ response into a Ruby object:
124
137
 
125
138
  ```ruby
126
139
  #!/usr/bin/env ruby
127
140
  require "llm"
141
+ require "pp"
142
+
143
+ class Report < LLM::Schema
144
+ property :category, Enum["performance", "security", "outage"], "Report category", required: true
145
+ property :summary, String, "Short summary", required: true
146
+ property :impact, OneOf[String, Integer], "Primary impact, as text or a count", required: true
147
+ property :services, Array[String], "Impacted services", required: true
148
+ property :timestamp, String, "When it happened", optional: true
149
+ end
128
150
 
129
151
  llm = LLM.openai(key: ENV["KEY"])
130
- mcp = LLM.mcp(stdio: {argv: ["npx", "-y", "@modelcontextprotocol/server-filesystem", Dir.pwd]})
152
+ ctx = LLM::Context.new(llm, schema: Report)
153
+ res = ctx.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
154
+ pp res.content!
131
155
 
132
- begin
133
- mcp.start
134
- ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
135
- ctx.talk("List the directories in this project.")
136
- ctx.talk(ctx.functions.call) while ctx.functions.any?
137
- ensure
138
- mcp.stop
139
- end
156
+ # {
157
+ # "category" => "performance",
158
+ # "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
159
+ # "impact" => "5% request timeouts",
160
+ # "services" => ["Database"],
161
+ # "timestamp" => "2024-06-05T10:42:00Z"
162
+ # }
140
163
  ```
141
164
 
142
- You can also connect to a hosted MCP server over HTTP. This is useful when the
143
- server already runs remotely and exposes MCP through a URL instead of a local
144
- process:
165
+ #### Tool Calling
166
+
167
+ Tools in llm.rb can be defined as classes inheriting from `LLM::Tool` or as
168
+ closures using `LLM.function`. When the LLM requests a tool call, the context
169
+ stores `Function` objects in `ctx.functions`. The `call()` method executes all
170
+ pending functions and returns their results to the LLM. Tools describe
171
+ structured parameters with JSON Schema and adapt those definitions to each
172
+ provider's tool-calling format (OpenAI, Anthropic, Google, etc.):
145
173
 
146
174
  ```ruby
147
175
  #!/usr/bin/env ruby
148
176
  require "llm"
149
177
 
150
- llm = LLM.openai(key: ENV["KEY"])
151
- mcp = LLM.mcp(http: {
152
- url: "https://api.githubcopilot.com/mcp/",
153
- headers: {"Authorization" => "Bearer #{ENV.fetch("GITHUB_PAT")}"}
154
- })
178
+ class System < LLM::Tool
179
+ name "system"
180
+ description "Run a shell command"
181
+ param :command, String, "Command to execute", required: true
155
182
 
156
- begin
157
- mcp.start
158
- ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
159
- ctx.talk("List the available GitHub MCP toolsets.")
160
- ctx.talk(ctx.functions.call) while ctx.functions.any?
161
- ensure
162
- mcp.stop
183
+ def call(command:)
184
+ {success: system(command)}
185
+ end
163
186
  end
187
+
188
+ llm = LLM.openai(key: ENV["KEY"])
189
+ ctx = LLM::Context.new(llm, stream: $stdout, tools: [System])
190
+ ctx.talk("Run `date`.")
191
+ ctx.talk(ctx.call(:functions)) while ctx.functions.any?
164
192
  ```
165
193
 
166
- #### Streaming Chat
194
+ #### Concurrent Tools
167
195
 
168
- This example demonstrates llm.rb's streaming support. The `stream: $stdout`
169
- parameter tells the context to write responses incrementally as they arrive
170
- from the LLM. The `Context` object manages the conversation history, and
171
- `talk()` sends your input while automatically appending both your message and
172
- the LLM's response to the context. Streams accept any object with `#<<`,
173
- giving you flexibility to pipe output to files, network sockets, or custom
174
- buffers:
196
+ llm.rb provides explicit concurrency control for tool execution. The
197
+ `wait(:thread)` method spawns each pending function in its own thread and waits
198
+ for all to complete. You can also use `:fiber` for cooperative multitasking or
199
+ `:task` for async/await patterns (requires the `async` gem). The context
200
+ automatically collects all results and reports them back to the LLM in a
201
+ single turn, maintaining conversation flow while parallelizing independent
202
+ operations:
175
203
 
176
204
  ```ruby
177
205
  #!/usr/bin/env ruby
178
206
  require "llm"
179
207
 
180
208
  llm = LLM.openai(key: ENV["KEY"])
181
- ctx = LLM::Context.new(llm, stream: $stdout)
182
- loop do
183
- print "> "
184
- ctx.talk(STDIN.gets || break)
185
- puts
186
- end
209
+ ctx = LLM::Context.new(llm, stream: $stdout, tools: [FetchWeather, FetchNews, FetchStock])
210
+
211
+ # Execute multiple independent tools concurrently
212
+ ctx.talk("Summarize the weather, headlines, and stock price.")
213
+ ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
187
214
  ```
188
215
 
189
- #### Tool Calling
216
+ #### Advanced Streaming
190
217
 
191
- Tools in llm.rb can be defined as classes inheriting from `LLM::Tool` or as
192
- closures using `LLM.function`. When the LLM requests a tool call, the context
193
- stores `Function` objects in `ctx.functions`. The `call()` method executes all
194
- pending functions and returns their results to the LLM. Tools support
195
- structured parameters with JSON Schema validation and automatically adapt to
196
- each provider's API format (OpenAI, Anthropic, Google, etc.):
218
+ llm.rb also supports the [`LLM::Stream`](lib/llm/stream.rb) interface for
219
+ structured streaming events:
220
+
221
+ - `on_content` for visible assistant output
222
+ - `on_reasoning_content` for separate reasoning output
223
+ - `on_tool_call` for streamed tool-call notifications
224
+
225
+ Subclass [`LLM::Stream`](lib/llm/stream.rb) when you want features like
226
+ `queue` and `wait`, or implement the same methods on your own object. Keep these
227
+ callbacks fast: they run inline with the parser.
228
+
229
+ `on_tool_call` lets tools start before the model finishes its turn, for
230
+ example with `tool.spawn(:thread)`, `tool.spawn(:fiber)`, or
231
+ `tool.spawn(:task)`. That can overlap tool latency with streaming output and
232
+ gives you a first-class place to observe and instrument tool-call execution as
233
+ it unfolds.
234
+
235
+ If a stream cannot resolve a tool, `error` is an `LLM::Function::Return` that
236
+ communicates the failure back to the LLM. That lets the tool-call path recover
237
+ and keeps the session alive. It also leaves control in the callback: it can
238
+ send `error`, spawn the tool when `error == nil`, or handle the situation
239
+ however it sees fit.
240
+
241
+ In normal use this should be rare, since `on_tool_call` is usually called with
242
+ a resolved tool and `error == nil`. To resolve a tool call, the tool must be
243
+ found in `LLM::Function.registry`. That covers `LLM::Tool` subclasses,
244
+ including MCP tools, but not `LLM.function` closures, which are excluded
245
+ because they may be bound to local state:
197
246
 
198
247
  ```ruby
199
248
  #!/usr/bin/env ruby
200
249
  require "llm"
250
+ # Assume `System < LLM::Tool` is already defined.
201
251
 
202
- class System < LLM::Tool
203
- name "system"
204
- description "Run a shell command"
205
- param :command, String, "Command to execute", required: true
252
+ class Stream < LLM::Stream
253
+ attr_reader :content, :reasoning_content
206
254
 
207
- def call(command:)
208
- {success: system(command)}
255
+ def initialize
256
+ @content = +""
257
+ @reasoning_content = +""
258
+ end
259
+
260
+ def on_content(content)
261
+ @content << content
262
+ print content
263
+ end
264
+
265
+ def on_reasoning_content(content)
266
+ @reasoning_content << content
267
+ end
268
+
269
+ def on_tool_call(tool, error)
270
+ queue << (error || tool.spawn(:thread))
209
271
  end
210
272
  end
211
273
 
212
274
  llm = LLM.openai(key: ENV["KEY"])
213
- ctx = LLM::Context.new(llm, stream: $stdout, tools: [System])
214
- ctx.talk("Run `date`.")
215
- ctx.talk(ctx.functions.call) while ctx.functions.any?
275
+ ctx = LLM::Context.new(llm, stream: Stream.new, tools: [System])
276
+
277
+ ctx.talk("Run `date` and `uname -a`.")
278
+ while ctx.functions.any?
279
+ ctx.talk(ctx.wait(:thread))
280
+ end
216
281
  ```
217
282
 
218
- #### Structured Outputs
283
+ #### MCP
219
284
 
220
- The `LLM::Schema` system lets you define JSON schemas that LLMs must follow.
221
- Schemas can be defined as classes with `property` declarations or built
222
- programmatically using a fluent interface. When you pass a schema to a context,
223
- llm.rb automatically configures the provider's JSON mode and validates
224
- responses against your schema. The `content!` method returns the parsed JSON
225
- object, while errors are captured as structured data rather than raising
226
- exceptions:
285
+ llm.rb integrates with the Model Context Protocol (MCP) to dynamically discover
286
+ and use tools from external servers. This example starts a filesystem MCP
287
+ server over stdio and makes its tools available to a context, enabling the LLM
288
+ to interact with the local file system through a standardized interface.
289
+ Use `LLM::MCP.stdio` or `LLM::MCP.http` when you want to make the transport
290
+ explicit. Like `LLM::Context`, an MCP client is stateful and should remain
291
+ isolated to a single thread:
227
292
 
228
293
  ```ruby
229
294
  #!/usr/bin/env ruby
230
295
  require "llm"
231
- require "pp"
232
296
 
233
- class Report < LLM::Schema
234
- property :category, Enum["performance", "security", "outage"], "Report category", required: true
235
- property :summary, String, "Short summary", required: true
236
- property :impact, OneOf[String, Integer], "Primary impact, as text or a count", required: true
237
- property :services, Array[String], "Impacted services", required: true
238
- property :timestamp, String, "When it happened", optional: true
297
+ llm = LLM.openai(key: ENV["KEY"])
298
+ mcp = LLM::MCP.stdio(argv: ["npx", "-y", "@modelcontextprotocol/server-filesystem", Dir.pwd])
299
+
300
+ begin
301
+ mcp.start
302
+ ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
303
+ ctx.talk("List the directories in this project.")
304
+ ctx.talk(ctx.call(:functions)) while ctx.functions.any?
305
+ ensure
306
+ mcp.stop
239
307
  end
308
+ ```
309
+
310
+ You can also connect to an MCP server over HTTP. This is useful when the
311
+ server already runs remotely and exposes MCP through a URL instead of a local
312
+ process. If you expect repeated tool calls, use `persist!` to reuse a
313
+ process-wide HTTP connection pool. This requires the optional
314
+ `net-http-persistent` gem:
315
+
316
+ ```ruby
317
+ #!/usr/bin/env ruby
318
+ require "llm"
240
319
 
241
320
  llm = LLM.openai(key: ENV["KEY"])
242
- ctx = LLM::Context.new(llm, schema: Report)
243
- res = ctx.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
244
- pp res.content!
321
+ mcp = LLM::MCP.http(
322
+ url: "https://api.githubcopilot.com/mcp/",
323
+ headers: {"Authorization" => "Bearer #{ENV.fetch("GITHUB_PAT")}"}
324
+ ).persist!
245
325
 
246
- # {
247
- # "category" => "performance",
248
- # "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
249
- # "impact" => "5% request timeouts",
250
- # "services" => ["Database"],
251
- # "timestamp" => "2024-06-05T10:42:00Z"
252
- # }
326
+ begin
327
+ mcp.start
328
+ ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
329
+ ctx.talk("List the available GitHub MCP toolsets.")
330
+ ctx.talk(ctx.call(:functions)) while ctx.functions.any?
331
+ ensure
332
+ mcp.stop
333
+ end
253
334
  ```
254
335
 
255
336
  ## Providers
@@ -405,27 +486,31 @@ puts "Cost: $#{model_info.cost.input}/1M input tokens"
405
486
 
406
487
  #### Responses API
407
488
 
408
- llm.rb also supports OpenAI's Responses API through `llm.responses` and
409
- `ctx.respond`. This API can maintain response state server-side and can reduce
410
- how much conversation state needs to be sent on each turn:
489
+ llm.rb also supports OpenAI's Responses API through `LLM::Context` with
490
+ `mode: :responses`. The important switch is `store:`. With `store: false`, the
491
+ Responses API stays stateless while still using the Responses endpoint, which
492
+ is useful for models or features that are only available through the Responses
493
+ API. With `store: true`, OpenAI can keep
494
+ response state server-side and reduce how much conversation state needs to be
495
+ sent on each turn:
411
496
 
412
497
  ```ruby
413
498
  #!/usr/bin/env ruby
414
499
  require "llm"
415
500
 
416
501
  llm = LLM.openai(key: ENV["KEY"])
417
- ctx = LLM::Context.new(llm)
502
+ ctx = LLM::Context.new(llm, mode: :responses, store: false)
418
503
 
419
- ctx.respond("Your task is to answer the user's questions", role: :developer)
420
- res = ctx.respond("What is the capital of France?")
421
- puts res.output_text
504
+ ctx.talk("Your task is to answer the user's questions", role: :developer)
505
+ res = ctx.talk("What is the capital of France?")
506
+ puts res.content
422
507
  ```
423
508
 
424
- #### Context Persistence
509
+ #### Context Persistence: Vanilla
425
510
 
426
- Contexts can be serialized and restored across process boundaries. This makes
427
- it possible to persist conversation state in a file, database, or queue and
428
- resume work later:
511
+ Contexts can be serialized and restored across process boundaries. A context
512
+ can be serialized to JSON and stored on disk, in a database, in a job queue,
513
+ or anywhere else your application needs to persist state:
429
514
 
430
515
  ```ruby
431
516
  #!/usr/bin/env ruby
@@ -435,12 +520,79 @@ llm = LLM.openai(key: ENV["KEY"])
435
520
  ctx = LLM::Context.new(llm)
436
521
  ctx.talk("Hello")
437
522
  ctx.talk("Remember that my favorite language is Ruby")
438
- ctx.save(path: "context.json")
523
+
524
+ # Serialize to a string when you want to store the context yourself,
525
+ # for example in a database row or job payload.
526
+ payload = ctx.to_json
439
527
 
440
528
  restored = LLM::Context.new(llm)
441
- restored.restore(path: "context.json")
529
+ restored.restore(string: payload)
442
530
  res = restored.talk("What is my favorite language?")
443
531
  puts res.content
532
+
533
+ # You can also persist the same state to a file:
534
+ ctx.save(path: "context.json")
535
+ restored = LLM::Context.new(llm)
536
+ restored.restore(path: "context.json")
537
+ ```
538
+
539
+ #### Context Persistence: ActiveRecord (Rails)
540
+
541
+ In a Rails application, you can also wrap persisted context state in an
542
+ ActiveRecord model. A minimal schema would include a `snapshot` column for the
543
+ serialized context payload (`jsonb` is recommended) and a `provider` column
544
+ for the provider name:
545
+
546
+ ```ruby
547
+ create_table :contexts do |t|
548
+ t.jsonb :snapshot
549
+ t.string :provider, null: false
550
+ t.timestamps
551
+ end
552
+ ```
553
+
554
+ For example:
555
+
556
+ ```ruby
557
+ class Context < ApplicationRecord
558
+ def talk(...)
559
+ ctx.talk(...).tap { flush }
560
+ end
561
+
562
+ def wait(...)
563
+ ctx.wait(...).tap { flush }
564
+ end
565
+
566
+ def messages
567
+ ctx.messages
568
+ end
569
+
570
+ def model
571
+ ctx.model
572
+ end
573
+
574
+ def flush
575
+ update_column(:snapshot, ctx.to_json)
576
+ end
577
+
578
+ private
579
+
580
+ def ctx
581
+ @ctx ||= begin
582
+ ctx = LLM::Context.new(llm)
583
+ ctx.restore(string: snapshot) if snapshot
584
+ ctx
585
+ end
586
+ end
587
+
588
+ def llm
589
+ LLM.method(provider).call(key: ENV.fetch(key))
590
+ end
591
+
592
+ def key
593
+ "#{provider.upcase}_KEY"
594
+ end
595
+ end
444
596
  ```
445
597
 
446
598
  #### Agents