llm.rb 8.1.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +196 -6
  3. data/README.md +233 -518
  4. data/data/anthropic.json +278 -258
  5. data/data/bedrock.json +1288 -1561
  6. data/data/deepseek.json +38 -38
  7. data/data/google.json +656 -579
  8. data/data/openai.json +860 -818
  9. data/data/xai.json +243 -552
  10. data/data/zai.json +168 -168
  11. data/lib/llm/active_record/acts_as_agent.rb +5 -0
  12. data/lib/llm/active_record/acts_as_llm.rb +7 -8
  13. data/lib/llm/active_record.rb +1 -6
  14. data/lib/llm/agent.rb +121 -82
  15. data/lib/llm/context.rb +79 -74
  16. data/lib/llm/contract/completion.rb +45 -0
  17. data/lib/llm/cost.rb +81 -4
  18. data/lib/llm/error.rb +1 -1
  19. data/lib/llm/function/array.rb +8 -5
  20. data/lib/llm/function/call_group.rb +39 -0
  21. data/lib/llm/function/call_task.rb +46 -0
  22. data/lib/llm/function/fork/task.rb +6 -0
  23. data/lib/llm/function/ractor/task.rb +6 -0
  24. data/lib/llm/function/task.rb +10 -0
  25. data/lib/llm/function.rb +28 -1
  26. data/lib/llm/mcp/transport/http.rb +26 -46
  27. data/lib/llm/mcp/transport/stdio.rb +0 -8
  28. data/lib/llm/mcp.rb +6 -23
  29. data/lib/llm/provider.rb +30 -20
  30. data/lib/llm/providers/anthropic/error_handler.rb +6 -7
  31. data/lib/llm/providers/anthropic/files.rb +2 -2
  32. data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
  33. data/lib/llm/providers/anthropic/stream_parser.rb +2 -2
  34. data/lib/llm/providers/anthropic.rb +1 -1
  35. data/lib/llm/providers/bedrock/error_handler.rb +8 -9
  36. data/lib/llm/providers/bedrock/models.rb +13 -13
  37. data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
  38. data/lib/llm/providers/bedrock/stream_parser.rb +2 -2
  39. data/lib/llm/providers/bedrock.rb +1 -1
  40. data/lib/llm/providers/google/error_handler.rb +6 -7
  41. data/lib/llm/providers/google/files.rb +2 -4
  42. data/lib/llm/providers/google/images.rb +1 -1
  43. data/lib/llm/providers/google/models.rb +0 -2
  44. data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
  45. data/lib/llm/providers/google/stream_parser.rb +2 -2
  46. data/lib/llm/providers/google.rb +1 -1
  47. data/lib/llm/providers/ollama/error_handler.rb +6 -7
  48. data/lib/llm/providers/ollama/models.rb +0 -2
  49. data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
  50. data/lib/llm/providers/ollama.rb +1 -1
  51. data/lib/llm/providers/openai/audio.rb +3 -3
  52. data/lib/llm/providers/openai/error_handler.rb +6 -7
  53. data/lib/llm/providers/openai/files.rb +2 -2
  54. data/lib/llm/providers/openai/images.rb +3 -3
  55. data/lib/llm/providers/openai/models.rb +1 -1
  56. data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
  57. data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
  58. data/lib/llm/providers/openai/responses/stream_parser.rb +2 -2
  59. data/lib/llm/providers/openai/responses.rb +2 -2
  60. data/lib/llm/providers/openai/stream_parser.rb +2 -2
  61. data/lib/llm/providers/openai/vector_stores.rb +1 -1
  62. data/lib/llm/providers/openai.rb +1 -1
  63. data/lib/llm/response.rb +10 -8
  64. data/lib/llm/schema.rb +11 -0
  65. data/lib/llm/sequel/agent.rb +5 -0
  66. data/lib/llm/sequel/plugin.rb +8 -14
  67. data/lib/llm/stream/queue.rb +15 -42
  68. data/lib/llm/stream.rb +15 -40
  69. data/lib/llm/tool/param.rb +1 -8
  70. data/lib/llm/transport/execution.rb +67 -0
  71. data/lib/llm/transport/http.rb +134 -0
  72. data/lib/llm/transport/persistent_http.rb +152 -0
  73. data/lib/llm/transport/response/http.rb +113 -0
  74. data/lib/llm/transport/response.rb +112 -0
  75. data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
  76. data/lib/llm/transport.rb +139 -0
  77. data/lib/llm/usage.rb +14 -5
  78. data/lib/llm/utils.rb +24 -14
  79. data/lib/llm/version.rb +1 -1
  80. data/lib/llm.rb +3 -12
  81. data/llm.gemspec +2 -16
  82. metadata +13 -20
  83. data/lib/llm/bot.rb +0 -3
  84. data/lib/llm/provider/transport/http/execution.rb +0 -115
  85. data/lib/llm/provider/transport/http/interruptible.rb +0 -114
  86. data/lib/llm/provider/transport/http.rb +0 -145
data/README.md CHANGED
@@ -1,522 +1,281 @@
1
1
  <p align="center">
2
- <a href="llm.rb"><img src="https://github.com/llmrb/llm.rb/raw/main/llm.png" width="200" height="200" border="0" alt="llm.rb"></a>
2
+ <a href="llm.rb">
3
+ <img src="https://github.com/llmrb/llm.rb/raw/main/llm.png" width="200" height="200" border="0" alt="llm.rb">
4
+ </a>
3
5
  </p>
4
6
  <p align="center">
5
- <a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
6
- <a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
7
- <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-8.1.0-green.svg?" alt="Version"></a>
7
+ <a href="https://0x1eef.github.io/x/llm.rb?rebuild=1">
8
+ <img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc">
9
+ </a>
10
+ <a href="https://opensource.org/license/0bsd">
11
+ <img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License">
12
+ </a>
13
+ <a href="https://github.com/llmrb/llm.rb/tags">
14
+ <img src="https://img.shields.io/badge/version-10.0.0-green.svg?" alt="Version">
15
+ </a>
8
16
  </p>
9
17
 
10
18
  ## About
11
19
 
12
- llm.rb is the most capable runtime for building AI systems in Ruby.
13
- <br>
14
-
15
- llm.rb is designed for Ruby, and although it works great in Rails, it is not tightly
16
- coupled to it. It runs on the standard library by default (zero dependencies),
17
- loads optional pieces only when needed, includes built-in ActiveRecord support through
18
- `acts_as_llm` and `acts_as_agent`, includes built-in Sequel support through
19
- `plugin :llm` and `plugin :agent`, and is designed for engineers who want control over
20
- long-lived, tool-capable, stateful AI workflows instead of just
21
- request/response helpers.
22
-
23
- It provides one runtime for providers, agents, tools, skills, MCP servers, streaming,
24
- schemas, files, and persisted state, so real systems can be built out of one coherent
25
- execution model instead of a pile of adapters.
26
-
27
- It supports providers including OpenAI, Anthropic, Google Gemini, DeepSeek, xAI,
28
- Z.ai, and AWS Bedrock.
20
+ llm.rb is Ruby's most capable AI runtime.
29
21
 
30
- It provides concurrent tool execution with multiple strategies exposed through a single
31
- runtime: async-task, threads, fibers, ractors and processes (fork). The first three are
32
- good for IO-bound work and the last two are good for CPU-bound work. Ractor support is
33
- experimental and comes with limitations.
22
+ It runs on Ruby's standard library by default. loads optional pieces
23
+ only when needed, and offers a single runtime for providers, agents,
24
+ tools, skills, MCP, streaming, files, and persisted state. As a bonus,
25
+ llm.rb is also [available for mruby](https://github.com/llmrb/mruby-llm).
34
26
 
35
- Want to see some code? Jump to [the examples](#examples) section. <br>
36
- Want to see a self-hosted LLM environment built on llm.rb? Check out [relay.app](https://github.com/llmrb/relay.app). <br>
37
- Want to use llm.rb with mruby ? Check out [mruby-llm](https://github.com/llmrb/mruby-llm)
27
+ It supports OpenAI, OpenAI-compatible endpoints, Anthropic, Google
28
+ Gemini, DeepSeek, xAI, Z.ai, AWS Bedrock, Ollama, and llama.cpp. It
29
+ also includes built-in ActiveRecord and Sequel support, plus concurrent
30
+ tool execution through threads, tasks (via async gem), fibers, ractors,
31
+ and fork (via xchan.rb gem).
38
32
 
33
+ ## Quick start
39
34
 
40
- ## Architecture
41
-
42
- <p align="center">
43
- <img src="https://github.com/llmrb/llm.rb/raw/main/resources/architecture.png" alt="llm.rb architecture" width="790">
44
- </p>
35
+ #### LLM::Context
45
36
 
46
- ## Core Concept
37
+ The
38
+ [LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
39
+ object is at the heart of the runtime. Almost all other features build
40
+ on top of it. It is a low-level interface to a model, and requires tool
41
+ execution to be managed manually. The
42
+ [LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
43
+ class is almost the same as
44
+ [LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
45
+ but it manages tool execution for you - we'll cover agents next:
47
46
 
48
- [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
49
- is the execution boundary in llm.rb.
50
-
51
- It holds:
52
- - message history
53
- - tool state
54
- - schemas
55
- - streaming configuration
56
- - usage and cost tracking
57
-
58
- Instead of switching abstractions for each feature, everything builds on the
59
- same context object.
47
+ ```ruby
48
+ require "llm"
60
49
 
61
- ## Standout features
50
+ llm = LLM.openai(key: ENV["KEY"])
51
+ ctx = LLM::Context.new(llm, stream: $stdout)
52
+ ctx.talk "Hello world"
53
+ ```
62
54
 
63
- The following list is **not exhaustive**, but it covers a lot of ground.
55
+ #### LLM::Agent
64
56
 
65
- #### Skills
57
+ The
58
+ [LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
59
+ object is implemented on top of
60
+ [LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html).
61
+ It provides the same interface, but manages tool execution for you. It
62
+ also has builtin features such as a loop guard that detects repeated
63
+ tool call patterns, and another guard that detects infinite tool call
64
+ loops. Both guards advise the model to change course rather than raise
65
+ an error:
66
66
 
67
- Skills are reusable, directory-backed capabilities loaded from `SKILL.md`.
68
- They run through the same runtime as tools, agents, and MCP. They do not
69
- require a second orchestration layer or a parallel abstraction. If you've
70
- used Claude or Codex, you know the general idea of skills, and llm.rb
71
- supports that same concept with the same execution model as the rest of the
72
- system.
67
+ ```ruby
68
+ require "llm"
73
69
 
74
- In llm.rb, a skill has frontmatter and instructions. The frontmatter can
75
- define `name`, `description`, and `tools`. The `tools` entries are tool names,
76
- and each name must resolve to a subclass of
77
- [`LLM::Tool`](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) that is already
78
- loaded in the runtime.
70
+ llm = LLM.openai(key: ENV["KEY"])
71
+ agent = LLM::Agent.new(llm, stream: $stdout)
72
+ agent.talk "Hello world"
73
+ ```
79
74
 
80
- If you want Claude/Codex-like skills that can drive scripts or shell
81
- commands, you would typically pair the skill with a tool that can execute
82
- system commands.
75
+ #### Agents (Advanced)
83
76
 
84
- ```yaml
85
- ---
86
- name: release
87
- description: Prepare a release
88
- tools:
89
- - search_docs
90
- - git
91
- ---
92
- Review the release state, summarize what changed, and prepare the release.
93
- ```
77
+ An agent can be configured to require confirmation before a tool is
78
+ executed. When a matching tool is called, llm.rb runs
79
+ `on_tool_confirmation`. That callback must decide whether to cancel the
80
+ tool call or approve it and execute it by calling
81
+ `fn.spawn(strategy).wait`, and it must always return an instance of
82
+ [`LLM::Function::Return`](https://0x1eef.github.io/x/llm.rb/LLM/Function/Return.html):
94
83
 
95
84
  ```ruby
85
+ require "llm"
86
+
96
87
  class Agent < LLM::Agent
97
- model "gpt-5.4-mini"
98
- skills "./skills/release"
99
- tracer { LLM::Tracer::Logger.new(llm, path: "logs/release-agent.log") }
88
+ tools DeleteFile
89
+ confirm "delete-file"
90
+
91
+ def on_tool_confirmation(fn, strategy)
92
+ path = fn.arguments["path"] || fn.arguments[:path]
93
+ if path.start_with?("/tmp/")
94
+ fn.spawn(strategy).wait
95
+ else
96
+ fn.cancel(reason: "Deletion requires approval")
97
+ end
98
+ end
100
99
  end
101
100
 
102
101
  llm = LLM.openai(key: ENV["KEY"])
103
- Agent.new(llm, stream: $stdout).talk("Let's prepare the release!")
102
+ Agent.new(llm, stream: $stdout).talk("Delete /tmp/example.txt.")
104
103
  ```
105
104
 
106
- #### ORM
107
-
108
- Any ActiveRecord model or Sequel model can become an agent-capable model,
109
- including existing business and domain models, without forcing you into a
110
- separate agent table or a second persistence layer.
111
-
112
- `acts_as_agent` extends a model with agent capabilities: the same runtime
113
- surface as [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
114
- because it actually wraps an `LLM::Agent`, plus persistence through one text,
115
- JSON, or JSONB-backed `data` column on the same table. If your app also has
116
- provider or model columns, provide them to llm.rb through `set_provider` and
117
- `set_context`.
105
+ #### Tools
118
106
 
107
+ The
108
+ [LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html)
109
+ class can be subclassed to implement your own tools that can extend the
110
+ abilities of a model:
119
111
 
120
112
  ```ruby
121
- class Ticket < ApplicationRecord
122
- acts_as_agent provider: :set_provider, context: :set_context
123
- model "gpt-5.4-mini"
124
- instructions "You are a support assistant."
125
-
126
- private
127
-
128
- def set_provider
129
- LLM.openai(key: ENV["OPENAI_SECRET"])
130
- end
131
-
132
- def set_context
133
- { mode: :responses, store: false }
113
+ class ReadFile < LLM::Tool
114
+ name "read-file"
115
+ description "Read a file"
116
+ parameter :path, String, "The filename or path"
117
+ required %i[path]
118
+
119
+ def call(path:)
120
+ {contents: File.read(path)}
134
121
  end
135
122
  end
136
123
  ```
137
124
 
138
- #### Agentic Patterns
139
-
140
- llm.rb is especially strong when you want to build agentic systems in a Ruby
141
- way. Agents can be ordinary application models with state, associations,
142
- tools, skills, and persistence, which makes it much easier to build systems
143
- where users have their own specialized agents instead of treating agents as
144
- something outside the app.
145
-
146
- That pattern works so well in llm.rb because
147
- [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
148
- `acts_as_agent`, `plugin :agent`, skills, tools, and persisted runtime state
149
- all fit the same execution model. The runtime stays small enough that the
150
- main design work becomes application design, not orchestration glue.
151
-
152
- For a concrete example, see
153
- [How to build a platform of agents](https://0x1eef.github.io/posts/how-to-build-a-platform-of-agents).
154
-
155
- #### Persistence
125
+ #### MCP
156
126
 
157
- The same runtime can be serialized to disk, restored later, persisted in JSON
158
- or JSONB-backed ORM columns, resumed across process boundaries, or shared
159
- across long-lived workflows.
127
+ The
128
+ [LLM::MCP](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
129
+ object lets llm.rb use tools provided by an MCP server. Those tools are
130
+ exposed through the same runtime as local tools, so you can pass them
131
+ to either
132
+ [LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
133
+ or
134
+ [LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html).
135
+ In this example, the MCP server runs over stdio and
136
+ [LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
137
+ uses the same tool loop as local tools:
160
138
 
161
139
  ```ruby
162
- ctx = LLM::Context.new(llm)
163
- ctx.talk("Remember that my favorite language is Ruby.")
164
- ctx.save(path: "context.json")
165
- ```
166
-
167
- #### Context Compaction
140
+ require "llm"
168
141
 
169
- Long-lived contexts can compact older history into a summary instead of
170
- growing forever. Compaction is built into [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
171
- through [`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html),
172
- and when a stream is present it emits `on_compaction` and
173
- `on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
174
- The compactor can also use a different model from the main context, which is
175
- useful when you want summarization to run on a cheaper or faster model.
176
- `token_threshold:` accepts either a fixed token count or a percentage string
177
- like `"90%"`, which resolves against the active model context window and
178
- triggers compaction once total token usage goes over that percentage.
142
+ llm = LLM.openai(key: ENV["KEY"])
143
+ mcp = LLM::MCP.stdio(argv: ["ruby", "server.rb"])
179
144
 
180
- ```ruby
181
- ctx = LLM::Context.new(
182
- llm,
183
- compactor: {
184
- token_threshold: "90%",
185
- retention_window: 8,
186
- model: "gpt-5.4-mini"
187
- }
188
- )
145
+ mcp.run do
146
+ ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
147
+ ctx.talk "Use the available tools to inspect the environment."
148
+ ctx.talk(ctx.wait(:call)) while ctx.functions?
149
+ end
189
150
  ```
190
151
 
191
- #### Guards
152
+ #### Skills
192
153
 
193
- Guards let llm.rb supervise agentic execution, not just run it.
194
- They live on [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
195
- can inspect the current runtime state, and can step in when a context is no
196
- longer making progress.
154
+ Skills are reusable instructions loaded from a `SKILL.md` directory. They let
155
+ you package behavior and tool access together, and they plug into the same
156
+ runtime as tools, agents, and MCP. When a skill runs, llm.rb spawns a
157
+ subagent with the skill instructions, access to only the tools listed in the
158
+ skill, and recent conversation context:
197
159
 
198
- [`LLM::LoopGuard`](https://0x1eef.github.io/x/llm.rb/LLM/LoopGuard.html) is
199
- the built-in implementation. It detects repeated tool-call patterns and
200
- blocks pending tool execution with in-band guarded tool errors instead of
201
- letting the loop keep spinning. [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
202
- enables that guard by default through its wrapped context.
160
+ ```yaml
161
+ ---
162
+ name: release
163
+ description: Prepare a release
164
+ tools: ["search-docs", "git"]
165
+ ---
203
166
 
204
- ```ruby
205
- ctx = LLM::Context.new(llm)
206
- ctx.guard = MyGuard.new
207
- ```
167
+ ## Task
208
168
 
209
- #### Transformers
210
-
211
- Transformers let llm.rb rewrite outgoing prompts and params before a request
212
- is sent to the provider. They also live on
213
- [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html), but
214
- they solve a different problem from guards: instead of blocking execution,
215
- they can normalize or scrub what gets sent. When a stream is present, that
216
- lifecycle is also exposed through
217
- [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with
218
- `on_transform` and `on_transform_finish`.
219
-
220
- That makes them a good fit for things like PII scrubbing, prompt
221
- normalization, or request-level param injection. A transformer just needs to
222
- implement `call(ctx, prompt, params)` and return `[prompt, params]`. That
223
- means a transformer can scrub plain text prompts, but it can also scrub
224
- [`LLM::Function::Return`](https://0x1eef.github.io/x/llm.rb/LLM/Function/Return.html)
225
- values. In other words, you can intercept a tool call's return value and
226
- modify it before sending it back to the LLM.
227
-
228
- That is also a useful UI hook. A stream can surface messages like
229
- `Anonymizing your data...` before a scrubber runs and `Data anonymized.`
230
- after it finishes.
169
+ Review the release state, summarize what changed, and prepare the release.
170
+ ```
231
171
 
232
172
  ```ruby
233
- class ScrubPII
234
- EMAIL = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i
173
+ require "llm"
235
174
 
236
- def call(ctx, prompt, params)
237
- [scrub(prompt), params]
238
- end
175
+ class ReleaseAgent < LLM::Agent
176
+ model "gpt-5.4-mini"
177
+ skills "./skills/release"
178
+ end
239
179
 
240
- private
180
+ llm = LLM.openai(key: ENV["KEY"])
181
+ ReleaseAgent.new(llm, stream: $stdout).talk("Prepare the next release.")
182
+ ```
241
183
 
242
- def scrub(prompt)
243
- case prompt
244
- when String then prompt.gsub(EMAIL, "[REDACTED_EMAIL]")
245
- when Array then prompt.map { scrub(_1) }
246
- when LLM::Function::Return then on_tool_return(prompt)
247
- else prompt
248
- end
249
- end
184
+ #### LLM::Stream
250
185
 
251
- def on_tool_return(result)
252
- value = case result.name
253
- when "lookup-customer" then scrub_value(result.value)
254
- else result.value
255
- end
256
- LLM::Function::Return.new(result.id, result.name, value)
257
- end
186
+ The
187
+ [LLM::Stream](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
188
+ object lets you observe output and runtime events as they happen. You
189
+ can subclass it to handle streamed content in your own application:
258
190
 
259
- def scrub_value(value)
260
- case value
261
- when String then value.gsub(EMAIL, "[REDACTED_EMAIL]")
262
- when Array then value.map { scrub_value(_1) }
263
- when Hash then value.transform_values { scrub_value(_1) }
264
- else value
265
- end
191
+ ```ruby
192
+ require "llm"
193
+
194
+ class Stream < LLM::Stream
195
+ def on_content(content)
196
+ $stdout << content
266
197
  end
267
198
  end
268
199
 
269
- ctx = LLM::Context.new(llm)
270
- ctx.transformer = ScrubPII.new
200
+ llm = LLM.openai(key: ENV["KEY"])
201
+ ctx = LLM::Context.new(llm, stream: Stream.new)
202
+ ctx.talk "Write a haiku about Ruby."
271
203
  ```
272
204
 
273
- When a stream is present, that transformer lifecycle is also exposed through
274
- `on_transform` and `on_transform_finish` on
275
- [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
205
+ #### LLM::Stream (advanced)
276
206
 
277
- #### LLM::Stream
278
-
279
- `LLM::Stream` is not just for printing tokens. It supports `on_content`,
280
- `on_reasoning_content`, `on_tool_call`, `on_tool_return`, `on_transform`,
281
- `on_transform_finish`, `on_compaction`, and `on_compaction_finish`, which
282
- means visible output, reasoning output, request rewriting, tool execution,
283
- and context compaction can all be driven through the same execution path.
207
+ The
208
+ [LLM::Stream](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
209
+ object can also resolve tool calls while output is still streaming. In
210
+ `on_tool_call`, you can spawn the tool, push the work onto the stream
211
+ queue, and later drain it with `wait`:
284
212
 
285
213
  ```ruby
214
+ require "llm"
215
+
286
216
  class Stream < LLM::Stream
287
- def on_tool_call(tool, error)
288
- queue << (error || ctx.spawn(tool, :thread))
217
+ def on_content(content)
218
+ $stdout << content
289
219
  end
290
220
 
291
- def on_tool_return(tool, result)
292
- puts(result.value)
221
+ def on_tool_call(tool, error)
222
+ return queue << error if error
223
+ queue << ctx.spawn(tool, :thread)
293
224
  end
294
225
  end
226
+
227
+ llm = LLM.openai(key: ENV["KEY"])
228
+ ctx = LLM::Context.new(llm, stream: Stream.new, tools: [ReadFile])
229
+ ctx.talk "Read README.md and summarize the quick start."
230
+ ctx.talk(ctx.wait) while ctx.functions?
295
231
  ```
296
232
 
297
233
  #### Concurrency
298
234
 
299
- Tool execution can run sequentially with `:call` or concurrently through
300
- `:thread`, `:task`, `:fiber`, `:fork`, and experimental `:ractor`, without
301
- rewriting your tool layer. Async tasks, threads, and fibers are the
302
- I/O-bound options. Fork and ractor are the CPU-bound options. `:fork`
303
- requires [`xchan.rb`](https://github.com/0x1eef/xchan.rb#readme) support,
304
- and `:ractor` is still experimental.
305
-
306
- `:fiber` uses `Fiber.schedule`, so it requires `Fiber.scheduler`.
235
+ llm.rb can run tool work concurrently. This is useful when a model calls
236
+ multiple tools and you want to resolve them in parallel instead of one
237
+ at a time. On
238
+ [LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
239
+ you can enable this with `concurrency`. Common options are `:call` for
240
+ sequential execution, `:thread`, or `:task` for concurrent IO-bound work, and
241
+ `:ractor` or `:fork` for more isolated CPU-bound work:
307
242
 
308
243
  ```ruby
244
+ require "llm"
245
+
309
246
  class Agent < LLM::Agent
310
247
  model "gpt-5.4-mini"
311
- tools FetchWeather, FetchNews, FetchStock
248
+ tools ReadFile
312
249
  concurrency :thread
313
250
  end
314
- ```
315
-
316
- #### MCP
317
251
 
318
- Remote MCP tools and prompts are not bolted on as a separate integration
319
- stack. They adapt into the same tool and prompt path used by local tools,
320
- skills, contexts, and agents.
321
-
322
- Use `mcp.run do ... end` for scoped work where the client should start and
323
- stop around one block. Use `mcp.start` and `mcp.stop` directly when you need
324
- finer sequential control across several steps before shutting the client down.
325
-
326
- ```ruby
327
- mcp = LLM::MCP.http(
328
- url: "https://api.githubcopilot.com/mcp/",
329
- headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"},
330
- persistent: true
331
- )
332
- mcp.run do
333
- ctx = LLM::Context.new(llm, tools: mcp.tools)
334
- end
252
+ llm = LLM.openai(key: ENV["KEY"])
253
+ agent = Agent.new(llm, stream: $stdout)
254
+ agent.talk "Read README.md and CHANGELOG.md and compare them."
335
255
  ```
336
256
 
337
- #### Cancellation
257
+ #### Serialization
338
258
 
339
- Cancellation is one of the harder problems to get right, and while llm.rb
340
- makes it possible, it still requires careful engineering to use effectively.
341
- The point though is that it is possible to stop in-flight provider work cleanly
342
- through the same runtime, and the model used by llm.rb is directly inspired by
343
- Go's context package. In fact, llm.rb is heavily inspired by Go but with a Ruby
344
- twist.
259
+ The [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
260
+ object can be serialized to JSON, which makes it suitable for storing
261
+ in a file, a database column, or a Redis queue. The built-in
262
+ ActiveRecord and Sequel plugins are built on top of this feature:
345
263
 
346
264
  ```ruby
347
265
  require "llm"
348
- require "io/console"
349
266
 
350
267
  llm = LLM.openai(key: ENV["KEY"])
351
- ctx = LLM::Context.new(llm, stream: $stdout)
352
- worker = Thread.new do
353
- ctx.talk("Write a very long essay about network protocols.")
354
- rescue LLM::Interrupt
355
- puts "Request was interrupted!"
356
- end
357
268
 
358
- STDIN.getch
359
- ctx.interrupt!
360
- worker.join
361
- ```
269
+ # Serialize a context
270
+ ctx1 = LLM::Context.new(llm)
271
+ ctx1.talk "Remember that my favorite language is Ruby"
272
+ string = ctx1.to_json
362
273
 
363
- ## Differentiators
364
-
365
- ### Execution Model
366
-
367
- - **A system layer, not just an API wrapper** <br>
368
- Put providers, tools, MCP servers, and application APIs behind one runtime
369
- model instead of stitching them together by hand.
370
- - **Contexts are central** <br>
371
- Keep history, tools, schema, usage, persistence, and execution state in one
372
- place instead of spreading them across your app.
373
- - **Contexts can be serialized** <br>
374
- Save and restore live state for jobs, databases, retries, or long-running
375
- workflows.
376
-
377
- ### Runtime Behavior
378
-
379
- - **Streaming and tool execution work together** <br>
380
- Start tool work while output is still streaming so you can hide latency
381
- instead of waiting for turns to finish.
382
- - **Agents auto-manage tool execution** <br>
383
- Use `LLM::Agent` when you want the same stateful runtime surface as
384
- `LLM::Context`, but with tool loops executed automatically according to a
385
- configured concurrency mode such as `:call`, `:thread`, `:task`, `:fiber`,
386
- `:fork`, or experimental `:ractor` support for class-based tools. MCP tools
387
- are not supported by the current `:ractor` mode, but mixed tool sets can
388
- still route MCP tools and local tools through different strategies at
389
- runtime. By default, the tool attempt budget is `25`. When an agent
390
- exhausts that budget, it sends advisory tool errors back through the model
391
- instead of raising out of the runtime. Set `tool_attempts: nil` to disable
392
- that advisory behavior.
393
- - **Tool calls have an explicit lifecycle** <br>
394
- A tool call can be executed, cancelled through
395
- [`LLM::Function#cancel`](https://0x1eef.github.io/x/llm.rb/LLM/Function.html#cancel-instance_method),
396
- or left unresolved for manual handling, but the normal runtime contract is
397
- still that a model-issued tool request is answered with a tool return.
398
- - **Requests can be interrupted cleanly** <br>
399
- Stop in-flight provider work through the same runtime instead of treating
400
- cancellation as a separate concern.
401
- [`LLM::Context#cancel!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#cancel-21-instance_method)
402
- is inspired by Go's context cancellation model.
403
- - **Concurrency is a first-class feature** <br>
404
- Use async tasks, threads, fibers, forks, or experimental ractors without
405
- rewriting your tool layer. Async tasks, threads, and fibers are the
406
- I/O-bound options. Fork and ractor are the CPU-bound options. `:fork`
407
- requires [`xchan.rb`](https://github.com/0x1eef/xchan.rb#readme) support.
408
- The current `:ractor` mode is for class-based tools, and MCP tools are
409
- not supported by ractor, but mixed workloads can branch on `tool.mcp?`
410
- and choose a supported strategy per tool. Class-based `:ractor` tools
411
- still emit normal tool tracer callbacks. `:fiber` uses `Fiber.schedule`,
412
- so it requires `Fiber.scheduler`.
413
- - **Advanced workloads are built in, not bolted on** <br>
414
- Streaming, concurrent tool execution, persistence, tracing, and MCP support
415
- all fit the same runtime model.
416
-
417
- ### Integration
418
-
419
- - **MCP is built in** <br>
420
- Connect to MCP servers over stdio or HTTP without bolting on a separate
421
- integration stack.
422
- - **ActiveRecord and Sequel persistence are built in** <br>
423
- llm.rb includes built-in ActiveRecord support through `acts_as_llm` and
424
- `acts_as_agent`, plus built-in Sequel support through `plugin :llm` and
425
- `plugin :agent`.
426
- Use `acts_as_llm` when you want to wrap `LLM::Context`, `acts_as_agent`
427
- when you want to wrap `LLM::Agent`, `plugin :llm` when you want a
428
- `LLM::Context` on a Sequel model, or `plugin :agent` when you want an
429
- `LLM::Agent`. These integrations support `provider:` and `context:` hooks,
430
- plus `format: :string` for text columns or `format: :jsonb` for native
431
- PostgreSQL JSON storage when ORM JSON typecasting support is enabled.
432
- - **ORM models can become persistent agents** <br>
433
- Turn an ActiveRecord or Sequel model into an agent-capable model with
434
- built-in persistence, stored on the same table, with `jsonb` support when
435
- your ORM and database support native JSON columns.
436
- - **Persistent HTTP pooling is shared process-wide** <br>
437
- When enabled, separate
438
- [`LLM::Provider`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
439
- instances with the same endpoint settings can share one persistent
440
- pool, and separate HTTP
441
- [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
442
- instances can do the same, instead of each object creating its own
443
- isolated per-instance transport.
444
- - **OpenAI-compatible gateways are supported** <br>
445
- Target OpenAI-compatible services such as DeepInfra and OpenRouter, as well
446
- as proxies and self-hosted servers, with `host:` and `base_path:` when they
447
- preserve OpenAI request shapes but change the API root path.
448
- - **Provider support is broad** <br>
449
- Work with OpenAI, OpenAI-compatible endpoints, Anthropic, Google, DeepSeek,
450
- Z.ai, xAI, AWS Bedrock, llama.cpp, and Ollama through the same runtime.
451
- - **Tools are explicit** <br>
452
- Run local tools, provider-native tools, and MCP tools through the same path
453
- with fewer special cases.
454
- - **Skills become bounded runtime capabilities** <br>
455
- Point llm.rb at directories with a `SKILL.md`, resolve named tools through
456
- the registry, and adapt each skill into its own callable capability through
457
- the normal runtime. Unlike a generic skill-discovery tool, each skill runs
458
- with its own bounded tool subset and behaves like a task-scoped sub-agent.
459
- - **Providers are normalized, not flattened** <br>
460
- Share one API surface across providers without losing access to provider-
461
- specific capabilities where they matter.
462
- - **Responses keep a uniform shape** <br>
463
- Provider calls return
464
- [`LLM::Response`](https://0x1eef.github.io/x/llm.rb/LLM/Response.html)
465
- objects as a common base shape, then extend them with endpoint- or
466
- provider-specific behavior when needed.
467
- - **Low-level access is still there** <br>
468
- Normalized responses still keep the raw `Net::HTTPResponse` available when
469
- you need headers, status, or other HTTP details.
470
- - **Local model metadata is included** <br>
471
- Model capabilities, pricing, and limits are available locally without extra
472
- API calls.
473
-
474
- ### Design Philosophy
475
-
476
- - **Runs on the stdlib** <br>
477
- Start with Ruby's standard library and add extra dependencies only when you
478
- need them.
479
- - **It is highly pluggable** <br>
480
- Add tools, swap providers, change JSON backends, plug in tracing, or layer
481
- internal APIs and MCP servers into the same execution path.
482
- - **It scales from scripts to long-lived systems** <br>
483
- The same primitives work for one-off scripts, background jobs, and more
484
- demanding application workloads with streaming, persistence, and tracing.
485
- - **Thread boundaries are clear** <br>
486
- Providers are shareable. Contexts are stateful and should stay thread-local.
487
-
488
- ## Capabilities
489
-
490
- Execution:
491
- - **Chat & Contexts** — stateless and stateful interactions with persistence
492
- - **Context Serialization** — save and restore state across processes or time
493
- - **Streaming** — visible output, reasoning output, tool-call events
494
- - **Request Interruption** — stop in-flight provider work cleanly
495
- - **Concurrent Execution** — threads, async tasks, and fibers
496
-
497
- Runtime Building Blocks:
498
- - **Tool Calling** — class-based tools and closure-based functions
499
- - **Run Tools While Streaming** — overlap model output with tool latency
500
- - **Agents** — reusable assistants with tool auto-execution
501
- - **Skills** — directory-backed capabilities loaded from `SKILL.md`
502
- - **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
503
- - **Context Compaction** — summarize older history in long-lived contexts
504
-
505
- Data and Structure:
506
- - **Structured Outputs** — JSON Schema-based responses
507
- - **Responses API** — stateful response workflows where providers support them
508
- - **Multimodal Inputs** — text, images, audio, documents, URLs
509
- - **Audio** — speech generation, transcription, translation
510
- - **Images** — generation and editing
511
- - **Files API** — upload and reference files in prompts
512
- - **Embeddings** — vector generation for search and RAG
513
- - **Vector Stores** — retrieval workflows
514
-
515
- Operations:
516
- - **Cost Tracking** — local cost estimation without extra API calls
517
- - **Observability** — tracing, logging, telemetry
518
- - **Model Registry** — local metadata for capabilities, limits, pricing
519
- - **Persistent HTTP** — optional connection pooling for providers and MCP
274
+ # Restore a context (from JSON)
275
+ ctx2 = LLM::Context.new(llm, stream: $stdout)
276
+ ctx2.restore(string:)
277
+ ctx2.talk "What is my favorite language?"
278
+ ```
520
279
 
521
280
  ## Installation
522
281
 
@@ -528,7 +287,10 @@ gem install llm.rb
528
287
 
529
288
  #### REPL
530
289
 
531
- This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) directly for an interactive REPL. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
290
+ This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
291
+ directly for an interactive REPL. <br> See the
292
+ [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
293
+ [deepdive (markdown)](resources/deepdive.md) for more examples.
532
294
 
533
295
  ```ruby
534
296
  require "llm"
@@ -566,80 +328,6 @@ ctx = LLM::Context.new(llm)
566
328
  ctx.talk ["Summarize this document.", ctx.local_file("README.md")]
567
329
  ```
568
330
 
569
- #### Agent
570
-
571
- This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
572
-
573
- ```ruby
574
- require "llm"
575
-
576
- class ShellAgent < LLM::Agent
577
- model "gpt-5.4-mini"
578
- instructions "You are a Linux system assistant."
579
- tools Shell
580
- concurrency :thread
581
- end
582
-
583
- llm = LLM.openai(key: ENV["KEY"])
584
- agent = ShellAgent.new(llm)
585
- puts agent.talk("What time is it on this system?").content
586
- ```
587
-
588
- #### Skills
589
-
590
- This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. In llm.rb, a skill is exposed as a tool in the runtime. When that tool is called, it spawns a sub-agent with relevant context plus the instructions and tool subset declared in its own `SKILL.md`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
591
-
592
- Each skill runs only with the tools declared in its own frontmatter.
593
-
594
- ```ruby
595
- require "llm"
596
-
597
- class Agent < LLM::Agent
598
- model "gpt-5.4-mini"
599
- instructions "You are a concise release assistant."
600
- skills "./skills/release", "./skills/review"
601
- tracer { LLM::Tracer::Logger.new(llm, path: "logs/release-agent.log") }
602
- end
603
-
604
- llm = LLM.openai(key: ENV["KEY"])
605
- puts Agent.new(llm).talk("Use the review skill.").content
606
- ```
607
-
608
- #### Streaming
609
-
610
- This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
611
-
612
- ```ruby
613
- require "llm"
614
-
615
- class Stream < LLM::Stream
616
- def on_content(content)
617
- $stdout << content
618
- end
619
-
620
- def on_tool_call(tool, error)
621
- return queue << error if error
622
- $stdout << "\nRunning tool #{tool.name}...\n"
623
- queue << ctx.spawn(tool, :thread)
624
- end
625
-
626
- def on_tool_return(tool, result)
627
- if result.error?
628
- $stdout << "Tool #{tool.name} failed\n"
629
- else
630
- $stdout << "Finished tool #{tool.name}\n"
631
- end
632
- end
633
- end
634
-
635
- llm = LLM.openai(key: ENV["KEY"])
636
- stream = Stream.new
637
- ctx = LLM::Context.new(llm, stream:, tools: [System])
638
-
639
- ctx.talk("Run `date` and `uname -a`.")
640
- ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
641
- ```
642
-
643
331
  #### Context Compaction
644
332
 
645
333
  This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
@@ -652,7 +340,9 @@ compactor can also use its own `model:` if you want summarization to run on a
652
340
  different model from the main context. `token_threshold:` accepts either a
653
341
  fixed token count or a percentage string like `"90%"`, which resolves
654
342
  against the active model context window and triggers compaction once total
655
- token usage goes over that percentage. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
343
+ token usage goes over that percentage. See the
344
+ [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
345
+ [deepdive (markdown)](resources/deepdive.md) for more examples.
656
346
 
657
347
  ```ruby
658
348
  require "llm"
@@ -681,7 +371,15 @@ ctx = LLM::Context.new(
681
371
 
682
372
  #### Reasoning
683
373
 
684
- This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with the OpenAI Responses API so reasoning output is streamed separately from visible assistant output. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
374
+ This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
375
+ with the OpenAI Responses API so reasoning output is streamed separately from
376
+ visible assistant output. See the
377
+ [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
378
+ [deepdive (markdown)](resources/deepdive.md) for more examples.
379
+
380
+ To use the Responses API (OpenAI-specific), initialize a
381
+ context or agent with `mode: :responses` and keep using
382
+ `talk` for turns.
685
383
 
686
384
  ```ruby
687
385
  require "llm"
@@ -709,7 +407,10 @@ ctx.talk("Solve 17 * 19 and show your work.")
709
407
 
710
408
  #### Request Cancellation
711
409
 
712
- Need to cancel a stream? llm.rb has you covered through [`LLM::Context#interrupt!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#interrupt-21-instance_method). <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
410
+ Need to cancel a stream? llm.rb has you covered through
411
+ [`LLM::Context#interrupt!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#interrupt-21-instance_method).
412
+ <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html)
413
+ or [deepdive (markdown)](resources/deepdive.md) for more examples.
713
414
 
714
415
  ```ruby
715
416
  require "llm"
@@ -730,7 +431,14 @@ worker.join
730
431
 
731
432
  #### Sequel (ORM)
732
433
 
733
- The `plugin :llm` integration wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) on a `Sequel::Model` and keeps tool execution explicit. Like the ActiveRecord wrappers, its built-in persistence contract is the serialized `data` column, while `provider:` resolves a real `LLM::Provider` instance and `context:` injects defaults such as `model:`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
434
+ The `plugin :llm` integration wraps
435
+ [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) on a
436
+ `Sequel::Model` and keeps tool execution explicit. Like the ActiveRecord
437
+ wrappers, its built-in persistence contract is the serialized `data` column,
438
+ while `provider:` resolves a real `LLM::Provider` instance and `context:`
439
+ injects defaults such as `model:`. <br> See the
440
+ [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
441
+ [deepdive (markdown)](resources/deepdive.md) for more examples.
734
442
 
735
443
  ```ruby
736
444
  require "llm"
@@ -744,7 +452,7 @@ class Context < Sequel::Model
744
452
  private
745
453
 
746
454
  def set_provider
747
- LLM.openai(key: ENV["OPENAI_SECRET"])
455
+ LLM.openai(key: ENV["OPENAI_SECRET"], persistent: true)
748
456
  end
749
457
 
750
458
  def set_context
@@ -765,7 +473,8 @@ one serialized `data` column. If your app has provider, model, or usage
765
473
  columns, provide them to llm.rb through `provider:` and `context:` instead of
766
474
  relying on reserved wrapper columns.
767
475
 
768
- See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
476
+ See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html)
477
+ or [deepdive (markdown)](resources/deepdive.md) for more examples.
769
478
 
770
479
  ```ruby
771
480
  require "llm"
@@ -821,7 +530,8 @@ manages tool execution for you. Like `acts_as_llm`, its built-in persistence
821
530
  contract is one serialized `data` column. If your app has provider or model
822
531
  columns, provide them to llm.rb through your hooks and agent DSL.
823
532
 
824
- See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
533
+ See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html)
534
+ or [deepdive (markdown)](resources/deepdive.md) for more examples.
825
535
 
826
536
  ```ruby
827
537
  require "llm"
@@ -874,13 +584,18 @@ end
874
584
 
875
585
  #### MCP
876
586
 
877
- This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools. It expects a GitHub token in `ENV["GITHUB_PAT"]`. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
587
+ This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
588
+ over HTTP so remote GitHub MCP tools run through the same
589
+ `LLM::Context` tool path as local tools. It expects a GitHub token in
590
+ `ENV["GITHUB_PAT"]`. See the
591
+ [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
592
+ [deepdive (markdown)](resources/deepdive.md) for more examples.
878
593
 
879
594
  ```ruby
880
595
  require "llm"
881
596
  require "net/http/persistent"
882
597
 
883
- llm = LLM.openai(key: ENV["KEY"])
598
+ llm = LLM.openai(key: ENV["KEY"], persistent: true)
884
599
  mcp = LLM::MCP.http(
885
600
  url: "https://api.githubcopilot.com/mcp/",
886
601
  headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"},
@@ -890,7 +605,7 @@ mcp = LLM::MCP.http(
890
605
  mcp.start
891
606
  ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
892
607
  ctx.talk("Pull information about my GitHub account.")
893
- ctx.talk(ctx.call(:functions)) while ctx.functions.any?
608
+ ctx.talk(ctx.wait(:call)) while ctx.functions?
894
609
  mcp.stop
895
610
  ```
896
611
 
@@ -905,7 +620,7 @@ mcp = LLM::MCP.http(
905
620
  mcp.run do
906
621
  ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
907
622
  ctx.talk("Pull information about my GitHub account.")
908
- ctx.talk(ctx.call(:functions)) while ctx.functions.any?
623
+ ctx.talk(ctx.wait(:call)) while ctx.functions?
909
624
  end
910
625
  ```
911
626