llm.rb 8.1.0 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +120 -2
- data/README.md +161 -514
- data/lib/llm/active_record/acts_as_llm.rb +7 -8
- data/lib/llm/agent.rb +36 -16
- data/lib/llm/context.rb +30 -26
- data/lib/llm/contract/completion.rb +45 -0
- data/lib/llm/cost.rb +81 -4
- data/lib/llm/error.rb +1 -1
- data/lib/llm/function/array.rb +8 -5
- data/lib/llm/function/call_group.rb +39 -0
- data/lib/llm/function/fork/task.rb +6 -0
- data/lib/llm/function/ractor/task.rb +6 -0
- data/lib/llm/function/task.rb +10 -0
- data/lib/llm/function.rb +1 -0
- data/lib/llm/mcp/transport/http.rb +26 -46
- data/lib/llm/mcp/transport/stdio.rb +0 -8
- data/lib/llm/mcp.rb +6 -23
- data/lib/llm/provider.rb +23 -20
- data/lib/llm/providers/anthropic/error_handler.rb +6 -7
- data/lib/llm/providers/anthropic/files.rb +2 -2
- data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/anthropic.rb +1 -1
- data/lib/llm/providers/bedrock/error_handler.rb +8 -9
- data/lib/llm/providers/bedrock/models.rb +13 -13
- data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/bedrock.rb +1 -1
- data/lib/llm/providers/google/error_handler.rb +6 -7
- data/lib/llm/providers/google/files.rb +2 -4
- data/lib/llm/providers/google/images.rb +1 -1
- data/lib/llm/providers/google/models.rb +0 -2
- data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/google.rb +1 -1
- data/lib/llm/providers/ollama/error_handler.rb +6 -7
- data/lib/llm/providers/ollama/models.rb +0 -2
- data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/ollama.rb +1 -1
- data/lib/llm/providers/openai/audio.rb +3 -3
- data/lib/llm/providers/openai/error_handler.rb +6 -7
- data/lib/llm/providers/openai/files.rb +2 -2
- data/lib/llm/providers/openai/images.rb +3 -3
- data/lib/llm/providers/openai/models.rb +1 -1
- data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
- data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
- data/lib/llm/providers/openai/responses.rb +2 -2
- data/lib/llm/providers/openai/vector_stores.rb +1 -1
- data/lib/llm/providers/openai.rb +1 -1
- data/lib/llm/response.rb +10 -8
- data/lib/llm/sequel/plugin.rb +7 -8
- data/lib/llm/stream/queue.rb +15 -42
- data/lib/llm/stream.rb +4 -4
- data/lib/llm/transport/execution.rb +67 -0
- data/lib/llm/transport/http.rb +134 -0
- data/lib/llm/transport/persistent_http.rb +152 -0
- data/lib/llm/transport/response/http.rb +113 -0
- data/lib/llm/transport/response.rb +112 -0
- data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
- data/lib/llm/transport.rb +139 -0
- data/lib/llm/usage.rb +14 -5
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +2 -12
- data/llm.gemspec +2 -16
- metadata +11 -19
- data/lib/llm/provider/transport/http/execution.rb +0 -115
- data/lib/llm/provider/transport/http/interruptible.rb +0 -114
- data/lib/llm/provider/transport/http.rb +0 -145
- data/lib/llm/utils.rb +0 -19
data/README.md
CHANGED
|
@@ -4,519 +4,240 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-9.0.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
11
11
|
|
|
12
|
-
llm.rb is
|
|
13
|
-
<br>
|
|
14
|
-
|
|
15
|
-
llm.rb is designed for Ruby, and although it works great in Rails, it is not tightly
|
|
16
|
-
coupled to it. It runs on the standard library by default (zero dependencies),
|
|
17
|
-
loads optional pieces only when needed, includes built-in ActiveRecord support through
|
|
18
|
-
`acts_as_llm` and `acts_as_agent`, includes built-in Sequel support through
|
|
19
|
-
`plugin :llm` and `plugin :agent`, and is designed for engineers who want control over
|
|
20
|
-
long-lived, tool-capable, stateful AI workflows instead of just
|
|
21
|
-
request/response helpers.
|
|
22
|
-
|
|
23
|
-
It provides one runtime for providers, agents, tools, skills, MCP servers, streaming,
|
|
24
|
-
schemas, files, and persisted state, so real systems can be built out of one coherent
|
|
25
|
-
execution model instead of a pile of adapters.
|
|
26
|
-
|
|
27
|
-
It supports providers including OpenAI, Anthropic, Google Gemini, DeepSeek, xAI,
|
|
28
|
-
Z.ai, and AWS Bedrock.
|
|
29
|
-
|
|
30
|
-
It provides concurrent tool execution with multiple strategies exposed through a single
|
|
31
|
-
runtime: async-task, threads, fibers, ractors and processes (fork). The first three are
|
|
32
|
-
good for IO-bound work and the last two are good for CPU-bound work. Ractor support is
|
|
33
|
-
experimental and comes with limitations.
|
|
34
|
-
|
|
35
|
-
Want to see some code? Jump to [the examples](#examples) section. <br>
|
|
36
|
-
Want to see a self-hosted LLM environment built on llm.rb? Check out [relay.app](https://github.com/llmrb/relay.app). <br>
|
|
37
|
-
Want to use llm.rb with mruby ? Check out [mruby-llm](https://github.com/llmrb/mruby-llm)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
## Architecture
|
|
41
|
-
|
|
42
|
-
<p align="center">
|
|
43
|
-
<img src="https://github.com/llmrb/llm.rb/raw/main/resources/architecture.png" alt="llm.rb architecture" width="790">
|
|
44
|
-
</p>
|
|
45
|
-
|
|
46
|
-
## Core Concept
|
|
12
|
+
llm.rb is Ruby's most capable AI runtime.
|
|
47
13
|
|
|
48
|
-
|
|
49
|
-
|
|
14
|
+
It runs on Ruby's standard library by default. loads optional pieces
|
|
15
|
+
only when needed, and offers a single runtime for providers, agents,
|
|
16
|
+
tools, skills, MCP, streaming, files, and persisted state. As a bonus,
|
|
17
|
+
llm.rb is also [available for mruby](https://github.com/llmrb/mruby-llm).
|
|
50
18
|
|
|
51
|
-
It
|
|
52
|
-
|
|
53
|
-
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
- usage and cost tracking
|
|
19
|
+
It supports OpenAI, OpenAI-compatible endpoints, Anthropic, Google
|
|
20
|
+
Gemini, DeepSeek, xAI, Z.ai, AWS Bedrock, Ollama, and llama.cpp. It
|
|
21
|
+
also includes built-in ActiveRecord and Sequel support, plus concurrent
|
|
22
|
+
tool execution through threads, tasks (via async gem), fibers, ractors,
|
|
23
|
+
and fork (via xchan.rb gem).
|
|
57
24
|
|
|
58
|
-
|
|
59
|
-
same context object.
|
|
25
|
+
## Quick start
|
|
60
26
|
|
|
61
|
-
|
|
27
|
+
#### LLM::Context
|
|
62
28
|
|
|
63
|
-
The
|
|
29
|
+
The
|
|
30
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
31
|
+
object is at the heart of the runtime. Almost all other features build
|
|
32
|
+
on top of it. It is a low-level interface to a model, and requires tool
|
|
33
|
+
execution to be managed manually. The
|
|
34
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
35
|
+
class is almost the same as
|
|
36
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
37
|
+
but it manages tool execution for you - we'll cover agents next:
|
|
64
38
|
|
|
65
|
-
|
|
39
|
+
```ruby
|
|
40
|
+
require "llm"
|
|
66
41
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
supports that same concept with the same execution model as the rest of the
|
|
72
|
-
system.
|
|
42
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
43
|
+
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
44
|
+
ctx.talk "Hello world"
|
|
45
|
+
```
|
|
73
46
|
|
|
74
|
-
|
|
75
|
-
define `name`, `description`, and `tools`. The `tools` entries are tool names,
|
|
76
|
-
and each name must resolve to a subclass of
|
|
77
|
-
[`LLM::Tool`](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) that is already
|
|
78
|
-
loaded in the runtime.
|
|
47
|
+
#### LLM::Agent
|
|
79
48
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
- search_docs
|
|
90
|
-
- git
|
|
91
|
-
---
|
|
92
|
-
Review the release state, summarize what changed, and prepare the release.
|
|
93
|
-
```
|
|
49
|
+
The
|
|
50
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
51
|
+
object is implemented on top of
|
|
52
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html).
|
|
53
|
+
It provides the same interface, but manages tool execution for you. It
|
|
54
|
+
also has builtin features such as a loop guard that detects repeated
|
|
55
|
+
tool call patterns, and another guard that detects infinite tool call
|
|
56
|
+
loops. Both guards advise the model to change course rather than raise
|
|
57
|
+
an error:
|
|
94
58
|
|
|
95
59
|
```ruby
|
|
96
|
-
|
|
97
|
-
model "gpt-5.4-mini"
|
|
98
|
-
skills "./skills/release"
|
|
99
|
-
tracer { LLM::Tracer::Logger.new(llm, path: "logs/release-agent.log") }
|
|
100
|
-
end
|
|
60
|
+
require "llm"
|
|
101
61
|
|
|
102
62
|
llm = LLM.openai(key: ENV["KEY"])
|
|
103
|
-
Agent.new(llm, stream: $stdout)
|
|
63
|
+
agent = LLM::Agent.new(llm, stream: $stdout)
|
|
64
|
+
agent.talk "Hello world"
|
|
104
65
|
```
|
|
105
66
|
|
|
106
|
-
####
|
|
107
|
-
|
|
108
|
-
Any ActiveRecord model or Sequel model can become an agent-capable model,
|
|
109
|
-
including existing business and domain models, without forcing you into a
|
|
110
|
-
separate agent table or a second persistence layer.
|
|
111
|
-
|
|
112
|
-
`acts_as_agent` extends a model with agent capabilities: the same runtime
|
|
113
|
-
surface as [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
114
|
-
because it actually wraps an `LLM::Agent`, plus persistence through one text,
|
|
115
|
-
JSON, or JSONB-backed `data` column on the same table. If your app also has
|
|
116
|
-
provider or model columns, provide them to llm.rb through `set_provider` and
|
|
117
|
-
`set_context`.
|
|
67
|
+
#### Tools
|
|
118
68
|
|
|
69
|
+
The
|
|
70
|
+
[LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html)
|
|
71
|
+
class can be subclassed to implement your own tools that can extend the
|
|
72
|
+
abilities of a model:
|
|
119
73
|
|
|
120
74
|
```ruby
|
|
121
|
-
class
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
def set_context
|
|
133
|
-
{ mode: :responses, store: false }
|
|
75
|
+
class ReadFile < LLM::Tool
|
|
76
|
+
name "read-file"
|
|
77
|
+
description "Read a file"
|
|
78
|
+
parameter :path, String, "The filename or path"
|
|
79
|
+
required %i[path]
|
|
80
|
+
|
|
81
|
+
def call(path:)
|
|
82
|
+
{contents: File.read(path)}
|
|
134
83
|
end
|
|
135
84
|
end
|
|
136
85
|
```
|
|
137
86
|
|
|
138
|
-
####
|
|
139
|
-
|
|
140
|
-
llm.rb is especially strong when you want to build agentic systems in a Ruby
|
|
141
|
-
way. Agents can be ordinary application models with state, associations,
|
|
142
|
-
tools, skills, and persistence, which makes it much easier to build systems
|
|
143
|
-
where users have their own specialized agents instead of treating agents as
|
|
144
|
-
something outside the app.
|
|
145
|
-
|
|
146
|
-
That pattern works so well in llm.rb because
|
|
147
|
-
[`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
148
|
-
`acts_as_agent`, `plugin :agent`, skills, tools, and persisted runtime state
|
|
149
|
-
all fit the same execution model. The runtime stays small enough that the
|
|
150
|
-
main design work becomes application design, not orchestration glue.
|
|
151
|
-
|
|
152
|
-
For a concrete example, see
|
|
153
|
-
[How to build a platform of agents](https://0x1eef.github.io/posts/how-to-build-a-platform-of-agents).
|
|
154
|
-
|
|
155
|
-
#### Persistence
|
|
87
|
+
#### MCP
|
|
156
88
|
|
|
157
|
-
The
|
|
158
|
-
|
|
159
|
-
|
|
89
|
+
The
|
|
90
|
+
[LLM::MCP](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
91
|
+
object lets llm.rb use tools provided by an MCP server. Those tools are
|
|
92
|
+
exposed through the same runtime as local tools, so you can pass them
|
|
93
|
+
to either
|
|
94
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
95
|
+
or
|
|
96
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html).
|
|
97
|
+
In this example, the MCP server runs over stdio and
|
|
98
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
99
|
+
uses the same tool loop as local tools:
|
|
160
100
|
|
|
161
101
|
```ruby
|
|
162
|
-
|
|
163
|
-
ctx.talk("Remember that my favorite language is Ruby.")
|
|
164
|
-
ctx.save(path: "context.json")
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
#### Context Compaction
|
|
102
|
+
require "llm"
|
|
168
103
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
through [`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html),
|
|
172
|
-
and when a stream is present it emits `on_compaction` and
|
|
173
|
-
`on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
174
|
-
The compactor can also use a different model from the main context, which is
|
|
175
|
-
useful when you want summarization to run on a cheaper or faster model.
|
|
176
|
-
`token_threshold:` accepts either a fixed token count or a percentage string
|
|
177
|
-
like `"90%"`, which resolves against the active model context window and
|
|
178
|
-
triggers compaction once total token usage goes over that percentage.
|
|
104
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
105
|
+
mcp = LLM::MCP.stdio(argv: ["ruby", "server.rb"])
|
|
179
106
|
|
|
180
|
-
|
|
181
|
-
ctx = LLM::Context.new(
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
retention_window: 8,
|
|
186
|
-
model: "gpt-5.4-mini"
|
|
187
|
-
}
|
|
188
|
-
)
|
|
107
|
+
mcp.run do
|
|
108
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
109
|
+
ctx.talk "Use the available tools to inspect the environment."
|
|
110
|
+
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
111
|
+
end
|
|
189
112
|
```
|
|
190
113
|
|
|
191
|
-
####
|
|
114
|
+
#### Skills
|
|
192
115
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
116
|
+
Skills are reusable instructions loaded from a `SKILL.md` directory. They let
|
|
117
|
+
you package behavior and tool access together, and they plug into the same
|
|
118
|
+
runtime as tools, agents, and MCP. When a skill runs, llm.rb spawns a
|
|
119
|
+
subagent with the skill instructions, access to only the tools listed in the
|
|
120
|
+
skill, and recent conversation context:
|
|
197
121
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
122
|
+
```yaml
|
|
123
|
+
---
|
|
124
|
+
name: release
|
|
125
|
+
description: Prepare a release
|
|
126
|
+
tools: ["search-docs", "git"]
|
|
127
|
+
---
|
|
203
128
|
|
|
204
|
-
|
|
205
|
-
ctx = LLM::Context.new(llm)
|
|
206
|
-
ctx.guard = MyGuard.new
|
|
207
|
-
```
|
|
129
|
+
## Task
|
|
208
130
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
Transformers let llm.rb rewrite outgoing prompts and params before a request
|
|
212
|
-
is sent to the provider. They also live on
|
|
213
|
-
[`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html), but
|
|
214
|
-
they solve a different problem from guards: instead of blocking execution,
|
|
215
|
-
they can normalize or scrub what gets sent. When a stream is present, that
|
|
216
|
-
lifecycle is also exposed through
|
|
217
|
-
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with
|
|
218
|
-
`on_transform` and `on_transform_finish`.
|
|
219
|
-
|
|
220
|
-
That makes them a good fit for things like PII scrubbing, prompt
|
|
221
|
-
normalization, or request-level param injection. A transformer just needs to
|
|
222
|
-
implement `call(ctx, prompt, params)` and return `[prompt, params]`. That
|
|
223
|
-
means a transformer can scrub plain text prompts, but it can also scrub
|
|
224
|
-
[`LLM::Function::Return`](https://0x1eef.github.io/x/llm.rb/LLM/Function/Return.html)
|
|
225
|
-
values. In other words, you can intercept a tool call's return value and
|
|
226
|
-
modify it before sending it back to the LLM.
|
|
227
|
-
|
|
228
|
-
That is also a useful UI hook. A stream can surface messages like
|
|
229
|
-
`Anonymizing your data...` before a scrubber runs and `Data anonymized.`
|
|
230
|
-
after it finishes.
|
|
131
|
+
Review the release state, summarize what changed, and prepare the release.
|
|
132
|
+
```
|
|
231
133
|
|
|
232
134
|
```ruby
|
|
233
|
-
|
|
234
|
-
EMAIL = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i
|
|
135
|
+
require "llm"
|
|
235
136
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
137
|
+
class ReleaseAgent < LLM::Agent
|
|
138
|
+
model "gpt-5.4-mini"
|
|
139
|
+
skills "./skills/release"
|
|
140
|
+
end
|
|
239
141
|
|
|
240
|
-
|
|
142
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
143
|
+
ReleaseAgent.new(llm, stream: $stdout).talk("Prepare the next release.")
|
|
144
|
+
```
|
|
241
145
|
|
|
242
|
-
|
|
243
|
-
case prompt
|
|
244
|
-
when String then prompt.gsub(EMAIL, "[REDACTED_EMAIL]")
|
|
245
|
-
when Array then prompt.map { scrub(_1) }
|
|
246
|
-
when LLM::Function::Return then on_tool_return(prompt)
|
|
247
|
-
else prompt
|
|
248
|
-
end
|
|
249
|
-
end
|
|
146
|
+
#### LLM::Stream
|
|
250
147
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
end
|
|
256
|
-
LLM::Function::Return.new(result.id, result.name, value)
|
|
257
|
-
end
|
|
148
|
+
The
|
|
149
|
+
[LLM::Stream](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
|
|
150
|
+
object lets you observe output and runtime events as they happen. You
|
|
151
|
+
can subclass it to handle streamed content in your own application:
|
|
258
152
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
end
|
|
153
|
+
```ruby
|
|
154
|
+
require "llm"
|
|
155
|
+
|
|
156
|
+
class Stream < LLM::Stream
|
|
157
|
+
def on_content(content)
|
|
158
|
+
$stdout << content
|
|
266
159
|
end
|
|
267
160
|
end
|
|
268
161
|
|
|
269
|
-
|
|
270
|
-
ctx
|
|
162
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
163
|
+
ctx = LLM::Context.new(llm, stream: Stream.new)
|
|
164
|
+
ctx.talk "Write a haiku about Ruby."
|
|
271
165
|
```
|
|
272
166
|
|
|
273
|
-
|
|
274
|
-
`on_transform` and `on_transform_finish` on
|
|
275
|
-
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
167
|
+
#### LLM::Stream (advanced)
|
|
276
168
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
`
|
|
281
|
-
|
|
282
|
-
means visible output, reasoning output, request rewriting, tool execution,
|
|
283
|
-
and context compaction can all be driven through the same execution path.
|
|
169
|
+
The
|
|
170
|
+
[LLM::Stream](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
|
|
171
|
+
object can also resolve tool calls while output is still streaming. In
|
|
172
|
+
`on_tool_call`, you can spawn the tool, push the work onto the stream
|
|
173
|
+
queue, and later drain it with `wait`:
|
|
284
174
|
|
|
285
175
|
```ruby
|
|
176
|
+
require "llm"
|
|
177
|
+
|
|
286
178
|
class Stream < LLM::Stream
|
|
287
|
-
def
|
|
288
|
-
|
|
179
|
+
def on_content(content)
|
|
180
|
+
$stdout << content
|
|
289
181
|
end
|
|
290
182
|
|
|
291
|
-
def
|
|
292
|
-
|
|
183
|
+
def on_tool_call(tool, error)
|
|
184
|
+
return queue << error if error
|
|
185
|
+
queue << ctx.spawn(tool, :thread)
|
|
293
186
|
end
|
|
294
187
|
end
|
|
188
|
+
|
|
189
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
190
|
+
ctx = LLM::Context.new(llm, stream: Stream.new, tools: [ReadFile])
|
|
191
|
+
ctx.talk "Read README.md and summarize the quick start."
|
|
192
|
+
ctx.talk(ctx.wait) while ctx.functions?
|
|
295
193
|
```
|
|
296
194
|
|
|
297
195
|
#### Concurrency
|
|
298
196
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
`:fiber` uses `Fiber.schedule`, so it requires `Fiber.scheduler`.
|
|
197
|
+
llm.rb can run tool work concurrently. This is useful when a model calls
|
|
198
|
+
multiple tools and you want to resolve them in parallel instead of one
|
|
199
|
+
at a time. On
|
|
200
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
201
|
+
you can enable this with `concurrency`. Common options are `:call` for
|
|
202
|
+
sequential execution, `:thread`, or `:task` for concurrent IO-bound work, and
|
|
203
|
+
`:ractor` or `:fork` for more isolated CPU-bound work:
|
|
307
204
|
|
|
308
205
|
```ruby
|
|
206
|
+
require "llm"
|
|
207
|
+
|
|
309
208
|
class Agent < LLM::Agent
|
|
310
209
|
model "gpt-5.4-mini"
|
|
311
|
-
tools
|
|
210
|
+
tools ReadFile
|
|
312
211
|
concurrency :thread
|
|
313
212
|
end
|
|
314
|
-
```
|
|
315
|
-
|
|
316
|
-
#### MCP
|
|
317
213
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
Use `mcp.run do ... end` for scoped work where the client should start and
|
|
323
|
-
stop around one block. Use `mcp.start` and `mcp.stop` directly when you need
|
|
324
|
-
finer sequential control across several steps before shutting the client down.
|
|
325
|
-
|
|
326
|
-
```ruby
|
|
327
|
-
mcp = LLM::MCP.http(
|
|
328
|
-
url: "https://api.githubcopilot.com/mcp/",
|
|
329
|
-
headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"},
|
|
330
|
-
persistent: true
|
|
331
|
-
)
|
|
332
|
-
mcp.run do
|
|
333
|
-
ctx = LLM::Context.new(llm, tools: mcp.tools)
|
|
334
|
-
end
|
|
214
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
215
|
+
agent = Agent.new(llm, stream: $stdout)
|
|
216
|
+
agent.talk "Read README.md and CHANGELOG.md and compare them."
|
|
335
217
|
```
|
|
336
218
|
|
|
337
|
-
####
|
|
219
|
+
#### Serialization
|
|
338
220
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
Go's context package. In fact, llm.rb is heavily inspired by Go but with a Ruby
|
|
344
|
-
twist.
|
|
221
|
+
The [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
222
|
+
object can be serialized to JSON, which makes it suitable for storing
|
|
223
|
+
in a file, a database column, or a Redis queue. The built-in
|
|
224
|
+
ActiveRecord and Sequel plugins are built on top of this feature:
|
|
345
225
|
|
|
346
226
|
```ruby
|
|
347
227
|
require "llm"
|
|
348
|
-
require "io/console"
|
|
349
228
|
|
|
350
229
|
llm = LLM.openai(key: ENV["KEY"])
|
|
351
|
-
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
352
|
-
worker = Thread.new do
|
|
353
|
-
ctx.talk("Write a very long essay about network protocols.")
|
|
354
|
-
rescue LLM::Interrupt
|
|
355
|
-
puts "Request was interrupted!"
|
|
356
|
-
end
|
|
357
230
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
231
|
+
# Serialize a context
|
|
232
|
+
ctx1 = LLM::Context.new(llm)
|
|
233
|
+
ctx1.talk "Remember that my favorite language is Ruby"
|
|
234
|
+
string = ctx1.to_json
|
|
362
235
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
Put providers, tools, MCP servers, and application APIs behind one runtime
|
|
369
|
-
model instead of stitching them together by hand.
|
|
370
|
-
- **Contexts are central** <br>
|
|
371
|
-
Keep history, tools, schema, usage, persistence, and execution state in one
|
|
372
|
-
place instead of spreading them across your app.
|
|
373
|
-
- **Contexts can be serialized** <br>
|
|
374
|
-
Save and restore live state for jobs, databases, retries, or long-running
|
|
375
|
-
workflows.
|
|
376
|
-
|
|
377
|
-
### Runtime Behavior
|
|
378
|
-
|
|
379
|
-
- **Streaming and tool execution work together** <br>
|
|
380
|
-
Start tool work while output is still streaming so you can hide latency
|
|
381
|
-
instead of waiting for turns to finish.
|
|
382
|
-
- **Agents auto-manage tool execution** <br>
|
|
383
|
-
Use `LLM::Agent` when you want the same stateful runtime surface as
|
|
384
|
-
`LLM::Context`, but with tool loops executed automatically according to a
|
|
385
|
-
configured concurrency mode such as `:call`, `:thread`, `:task`, `:fiber`,
|
|
386
|
-
`:fork`, or experimental `:ractor` support for class-based tools. MCP tools
|
|
387
|
-
are not supported by the current `:ractor` mode, but mixed tool sets can
|
|
388
|
-
still route MCP tools and local tools through different strategies at
|
|
389
|
-
runtime. By default, the tool attempt budget is `25`. When an agent
|
|
390
|
-
exhausts that budget, it sends advisory tool errors back through the model
|
|
391
|
-
instead of raising out of the runtime. Set `tool_attempts: nil` to disable
|
|
392
|
-
that advisory behavior.
|
|
393
|
-
- **Tool calls have an explicit lifecycle** <br>
|
|
394
|
-
A tool call can be executed, cancelled through
|
|
395
|
-
[`LLM::Function#cancel`](https://0x1eef.github.io/x/llm.rb/LLM/Function.html#cancel-instance_method),
|
|
396
|
-
or left unresolved for manual handling, but the normal runtime contract is
|
|
397
|
-
still that a model-issued tool request is answered with a tool return.
|
|
398
|
-
- **Requests can be interrupted cleanly** <br>
|
|
399
|
-
Stop in-flight provider work through the same runtime instead of treating
|
|
400
|
-
cancellation as a separate concern.
|
|
401
|
-
[`LLM::Context#cancel!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#cancel-21-instance_method)
|
|
402
|
-
is inspired by Go's context cancellation model.
|
|
403
|
-
- **Concurrency is a first-class feature** <br>
|
|
404
|
-
Use async tasks, threads, fibers, forks, or experimental ractors without
|
|
405
|
-
rewriting your tool layer. Async tasks, threads, and fibers are the
|
|
406
|
-
I/O-bound options. Fork and ractor are the CPU-bound options. `:fork`
|
|
407
|
-
requires [`xchan.rb`](https://github.com/0x1eef/xchan.rb#readme) support.
|
|
408
|
-
The current `:ractor` mode is for class-based tools, and MCP tools are
|
|
409
|
-
not supported by ractor, but mixed workloads can branch on `tool.mcp?`
|
|
410
|
-
and choose a supported strategy per tool. Class-based `:ractor` tools
|
|
411
|
-
still emit normal tool tracer callbacks. `:fiber` uses `Fiber.schedule`,
|
|
412
|
-
so it requires `Fiber.scheduler`.
|
|
413
|
-
- **Advanced workloads are built in, not bolted on** <br>
|
|
414
|
-
Streaming, concurrent tool execution, persistence, tracing, and MCP support
|
|
415
|
-
all fit the same runtime model.
|
|
416
|
-
|
|
417
|
-
### Integration
|
|
418
|
-
|
|
419
|
-
- **MCP is built in** <br>
|
|
420
|
-
Connect to MCP servers over stdio or HTTP without bolting on a separate
|
|
421
|
-
integration stack.
|
|
422
|
-
- **ActiveRecord and Sequel persistence are built in** <br>
|
|
423
|
-
llm.rb includes built-in ActiveRecord support through `acts_as_llm` and
|
|
424
|
-
`acts_as_agent`, plus built-in Sequel support through `plugin :llm` and
|
|
425
|
-
`plugin :agent`.
|
|
426
|
-
Use `acts_as_llm` when you want to wrap `LLM::Context`, `acts_as_agent`
|
|
427
|
-
when you want to wrap `LLM::Agent`, `plugin :llm` when you want a
|
|
428
|
-
`LLM::Context` on a Sequel model, or `plugin :agent` when you want an
|
|
429
|
-
`LLM::Agent`. These integrations support `provider:` and `context:` hooks,
|
|
430
|
-
plus `format: :string` for text columns or `format: :jsonb` for native
|
|
431
|
-
PostgreSQL JSON storage when ORM JSON typecasting support is enabled.
|
|
432
|
-
- **ORM models can become persistent agents** <br>
|
|
433
|
-
Turn an ActiveRecord or Sequel model into an agent-capable model with
|
|
434
|
-
built-in persistence, stored on the same table, with `jsonb` support when
|
|
435
|
-
your ORM and database support native JSON columns.
|
|
436
|
-
- **Persistent HTTP pooling is shared process-wide** <br>
|
|
437
|
-
When enabled, separate
|
|
438
|
-
[`LLM::Provider`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
|
|
439
|
-
instances with the same endpoint settings can share one persistent
|
|
440
|
-
pool, and separate HTTP
|
|
441
|
-
[`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
442
|
-
instances can do the same, instead of each object creating its own
|
|
443
|
-
isolated per-instance transport.
|
|
444
|
-
- **OpenAI-compatible gateways are supported** <br>
|
|
445
|
-
Target OpenAI-compatible services such as DeepInfra and OpenRouter, as well
|
|
446
|
-
as proxies and self-hosted servers, with `host:` and `base_path:` when they
|
|
447
|
-
preserve OpenAI request shapes but change the API root path.
|
|
448
|
-
- **Provider support is broad** <br>
|
|
449
|
-
Work with OpenAI, OpenAI-compatible endpoints, Anthropic, Google, DeepSeek,
|
|
450
|
-
Z.ai, xAI, AWS Bedrock, llama.cpp, and Ollama through the same runtime.
|
|
451
|
-
- **Tools are explicit** <br>
|
|
452
|
-
Run local tools, provider-native tools, and MCP tools through the same path
|
|
453
|
-
with fewer special cases.
|
|
454
|
-
- **Skills become bounded runtime capabilities** <br>
|
|
455
|
-
Point llm.rb at directories with a `SKILL.md`, resolve named tools through
|
|
456
|
-
the registry, and adapt each skill into its own callable capability through
|
|
457
|
-
the normal runtime. Unlike a generic skill-discovery tool, each skill runs
|
|
458
|
-
with its own bounded tool subset and behaves like a task-scoped sub-agent.
|
|
459
|
-
- **Providers are normalized, not flattened** <br>
|
|
460
|
-
Share one API surface across providers without losing access to provider-
|
|
461
|
-
specific capabilities where they matter.
|
|
462
|
-
- **Responses keep a uniform shape** <br>
|
|
463
|
-
Provider calls return
|
|
464
|
-
[`LLM::Response`](https://0x1eef.github.io/x/llm.rb/LLM/Response.html)
|
|
465
|
-
objects as a common base shape, then extend them with endpoint- or
|
|
466
|
-
provider-specific behavior when needed.
|
|
467
|
-
- **Low-level access is still there** <br>
|
|
468
|
-
Normalized responses still keep the raw `Net::HTTPResponse` available when
|
|
469
|
-
you need headers, status, or other HTTP details.
|
|
470
|
-
- **Local model metadata is included** <br>
|
|
471
|
-
Model capabilities, pricing, and limits are available locally without extra
|
|
472
|
-
API calls.
|
|
473
|
-
|
|
474
|
-
### Design Philosophy
|
|
475
|
-
|
|
476
|
-
- **Runs on the stdlib** <br>
|
|
477
|
-
Start with Ruby's standard library and add extra dependencies only when you
|
|
478
|
-
need them.
|
|
479
|
-
- **It is highly pluggable** <br>
|
|
480
|
-
Add tools, swap providers, change JSON backends, plug in tracing, or layer
|
|
481
|
-
internal APIs and MCP servers into the same execution path.
|
|
482
|
-
- **It scales from scripts to long-lived systems** <br>
|
|
483
|
-
The same primitives work for one-off scripts, background jobs, and more
|
|
484
|
-
demanding application workloads with streaming, persistence, and tracing.
|
|
485
|
-
- **Thread boundaries are clear** <br>
|
|
486
|
-
Providers are shareable. Contexts are stateful and should stay thread-local.
|
|
487
|
-
|
|
488
|
-
## Capabilities
|
|
489
|
-
|
|
490
|
-
Execution:
|
|
491
|
-
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
492
|
-
- **Context Serialization** — save and restore state across processes or time
|
|
493
|
-
- **Streaming** — visible output, reasoning output, tool-call events
|
|
494
|
-
- **Request Interruption** — stop in-flight provider work cleanly
|
|
495
|
-
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
496
|
-
|
|
497
|
-
Runtime Building Blocks:
|
|
498
|
-
- **Tool Calling** — class-based tools and closure-based functions
|
|
499
|
-
- **Run Tools While Streaming** — overlap model output with tool latency
|
|
500
|
-
- **Agents** — reusable assistants with tool auto-execution
|
|
501
|
-
- **Skills** — directory-backed capabilities loaded from `SKILL.md`
|
|
502
|
-
- **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
|
|
503
|
-
- **Context Compaction** — summarize older history in long-lived contexts
|
|
504
|
-
|
|
505
|
-
Data and Structure:
|
|
506
|
-
- **Structured Outputs** — JSON Schema-based responses
|
|
507
|
-
- **Responses API** — stateful response workflows where providers support them
|
|
508
|
-
- **Multimodal Inputs** — text, images, audio, documents, URLs
|
|
509
|
-
- **Audio** — speech generation, transcription, translation
|
|
510
|
-
- **Images** — generation and editing
|
|
511
|
-
- **Files API** — upload and reference files in prompts
|
|
512
|
-
- **Embeddings** — vector generation for search and RAG
|
|
513
|
-
- **Vector Stores** — retrieval workflows
|
|
514
|
-
|
|
515
|
-
Operations:
|
|
516
|
-
- **Cost Tracking** — local cost estimation without extra API calls
|
|
517
|
-
- **Observability** — tracing, logging, telemetry
|
|
518
|
-
- **Model Registry** — local metadata for capabilities, limits, pricing
|
|
519
|
-
- **Persistent HTTP** — optional connection pooling for providers and MCP
|
|
236
|
+
# Restore a context (from JSON)
|
|
237
|
+
ctx2 = LLM::Context.new(llm, stream: $stdout)
|
|
238
|
+
ctx2.restore(string:)
|
|
239
|
+
ctx2.talk "What is my favorite language?"
|
|
240
|
+
```
|
|
520
241
|
|
|
521
242
|
## Installation
|
|
522
243
|
|
|
@@ -566,80 +287,6 @@ ctx = LLM::Context.new(llm)
|
|
|
566
287
|
ctx.talk ["Summarize this document.", ctx.local_file("README.md")]
|
|
567
288
|
```
|
|
568
289
|
|
|
569
|
-
#### Agent
|
|
570
|
-
|
|
571
|
-
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
572
|
-
|
|
573
|
-
```ruby
|
|
574
|
-
require "llm"
|
|
575
|
-
|
|
576
|
-
class ShellAgent < LLM::Agent
|
|
577
|
-
model "gpt-5.4-mini"
|
|
578
|
-
instructions "You are a Linux system assistant."
|
|
579
|
-
tools Shell
|
|
580
|
-
concurrency :thread
|
|
581
|
-
end
|
|
582
|
-
|
|
583
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
584
|
-
agent = ShellAgent.new(llm)
|
|
585
|
-
puts agent.talk("What time is it on this system?").content
|
|
586
|
-
```
|
|
587
|
-
|
|
588
|
-
#### Skills
|
|
589
|
-
|
|
590
|
-
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. In llm.rb, a skill is exposed as a tool in the runtime. When that tool is called, it spawns a sub-agent with relevant context plus the instructions and tool subset declared in its own `SKILL.md`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
591
|
-
|
|
592
|
-
Each skill runs only with the tools declared in its own frontmatter.
|
|
593
|
-
|
|
594
|
-
```ruby
|
|
595
|
-
require "llm"
|
|
596
|
-
|
|
597
|
-
class Agent < LLM::Agent
|
|
598
|
-
model "gpt-5.4-mini"
|
|
599
|
-
instructions "You are a concise release assistant."
|
|
600
|
-
skills "./skills/release", "./skills/review"
|
|
601
|
-
tracer { LLM::Tracer::Logger.new(llm, path: "logs/release-agent.log") }
|
|
602
|
-
end
|
|
603
|
-
|
|
604
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
605
|
-
puts Agent.new(llm).talk("Use the review skill.").content
|
|
606
|
-
```
|
|
607
|
-
|
|
608
|
-
#### Streaming
|
|
609
|
-
|
|
610
|
-
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
611
|
-
|
|
612
|
-
```ruby
|
|
613
|
-
require "llm"
|
|
614
|
-
|
|
615
|
-
class Stream < LLM::Stream
|
|
616
|
-
def on_content(content)
|
|
617
|
-
$stdout << content
|
|
618
|
-
end
|
|
619
|
-
|
|
620
|
-
def on_tool_call(tool, error)
|
|
621
|
-
return queue << error if error
|
|
622
|
-
$stdout << "\nRunning tool #{tool.name}...\n"
|
|
623
|
-
queue << ctx.spawn(tool, :thread)
|
|
624
|
-
end
|
|
625
|
-
|
|
626
|
-
def on_tool_return(tool, result)
|
|
627
|
-
if result.error?
|
|
628
|
-
$stdout << "Tool #{tool.name} failed\n"
|
|
629
|
-
else
|
|
630
|
-
$stdout << "Finished tool #{tool.name}\n"
|
|
631
|
-
end
|
|
632
|
-
end
|
|
633
|
-
end
|
|
634
|
-
|
|
635
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
636
|
-
stream = Stream.new
|
|
637
|
-
ctx = LLM::Context.new(llm, stream:, tools: [System])
|
|
638
|
-
|
|
639
|
-
ctx.talk("Run `date` and `uname -a`.")
|
|
640
|
-
ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
|
|
641
|
-
```
|
|
642
|
-
|
|
643
290
|
#### Context Compaction
|
|
644
291
|
|
|
645
292
|
This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
|
|
@@ -652,7 +299,7 @@ compactor can also use its own `model:` if you want summarization to run on a
|
|
|
652
299
|
different model from the main context. `token_threshold:` accepts either a
|
|
653
300
|
fixed token count or a percentage string like `"90%"`, which resolves
|
|
654
301
|
against the active model context window and triggers compaction once total
|
|
655
|
-
token usage goes over that percentage.
|
|
302
|
+
token usage goes over that percentage. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
656
303
|
|
|
657
304
|
```ruby
|
|
658
305
|
require "llm"
|
|
@@ -744,7 +391,7 @@ class Context < Sequel::Model
|
|
|
744
391
|
private
|
|
745
392
|
|
|
746
393
|
def set_provider
|
|
747
|
-
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
394
|
+
LLM.openai(key: ENV["OPENAI_SECRET"], persistent: true)
|
|
748
395
|
end
|
|
749
396
|
|
|
750
397
|
def set_context
|
|
@@ -880,7 +527,7 @@ This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) o
|
|
|
880
527
|
require "llm"
|
|
881
528
|
require "net/http/persistent"
|
|
882
529
|
|
|
883
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
530
|
+
llm = LLM.openai(key: ENV["KEY"], persistent: true)
|
|
884
531
|
mcp = LLM::MCP.http(
|
|
885
532
|
url: "https://api.githubcopilot.com/mcp/",
|
|
886
533
|
headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"},
|
|
@@ -890,7 +537,7 @@ mcp = LLM::MCP.http(
|
|
|
890
537
|
mcp.start
|
|
891
538
|
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
892
539
|
ctx.talk("Pull information about my GitHub account.")
|
|
893
|
-
ctx.talk(ctx.
|
|
540
|
+
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
894
541
|
mcp.stop
|
|
895
542
|
```
|
|
896
543
|
|
|
@@ -905,7 +552,7 @@ mcp = LLM::MCP.http(
|
|
|
905
552
|
mcp.run do
|
|
906
553
|
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
907
554
|
ctx.talk("Pull information about my GitHub account.")
|
|
908
|
-
ctx.talk(ctx.
|
|
555
|
+
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
909
556
|
end
|
|
910
557
|
```
|
|
911
558
|
|