llm.rb 8.0.0 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +165 -2
- data/README.md +161 -509
- data/data/bedrock.json +2948 -0
- data/data/deepseek.json +8 -8
- data/data/openai.json +39 -2
- data/data/xai.json +35 -0
- data/data/zai.json +1 -1
- data/lib/llm/active_record/acts_as_llm.rb +7 -8
- data/lib/llm/agent.rb +36 -16
- data/lib/llm/context.rb +30 -26
- data/lib/llm/contract/completion.rb +45 -0
- data/lib/llm/cost.rb +81 -4
- data/lib/llm/error.rb +1 -1
- data/lib/llm/function/array.rb +8 -5
- data/lib/llm/function/call_group.rb +39 -0
- data/lib/llm/function/fork/task.rb +6 -0
- data/lib/llm/function/ractor/task.rb +6 -0
- data/lib/llm/function/task.rb +10 -0
- data/lib/llm/function.rb +1 -0
- data/lib/llm/mcp/transport/http.rb +26 -46
- data/lib/llm/mcp/transport/stdio.rb +0 -8
- data/lib/llm/mcp.rb +6 -23
- data/lib/llm/object.rb +8 -0
- data/lib/llm/provider.rb +29 -19
- data/lib/llm/providers/anthropic/error_handler.rb +6 -7
- data/lib/llm/providers/anthropic/files.rb +2 -2
- data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/anthropic.rb +1 -1
- data/lib/llm/providers/bedrock/error_handler.rb +79 -0
- data/lib/llm/providers/bedrock/models.rb +109 -0
- data/lib/llm/providers/bedrock/request_adapter/completion.rb +153 -0
- data/lib/llm/providers/bedrock/request_adapter.rb +95 -0
- data/lib/llm/providers/bedrock/response_adapter/completion.rb +173 -0
- data/lib/llm/providers/bedrock/response_adapter/models.rb +34 -0
- data/lib/llm/providers/bedrock/response_adapter.rb +40 -0
- data/lib/llm/providers/bedrock/signature.rb +166 -0
- data/lib/llm/providers/bedrock/stream_decoder.rb +140 -0
- data/lib/llm/providers/bedrock/stream_parser.rb +201 -0
- data/lib/llm/providers/bedrock.rb +272 -0
- data/lib/llm/providers/google/error_handler.rb +6 -7
- data/lib/llm/providers/google/files.rb +2 -4
- data/lib/llm/providers/google/images.rb +1 -1
- data/lib/llm/providers/google/models.rb +0 -2
- data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/google.rb +1 -1
- data/lib/llm/providers/ollama/error_handler.rb +6 -7
- data/lib/llm/providers/ollama/models.rb +0 -2
- data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/ollama.rb +1 -1
- data/lib/llm/providers/openai/audio.rb +3 -3
- data/lib/llm/providers/openai/error_handler.rb +6 -7
- data/lib/llm/providers/openai/files.rb +2 -2
- data/lib/llm/providers/openai/images.rb +3 -3
- data/lib/llm/providers/openai/models.rb +1 -1
- data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
- data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
- data/lib/llm/providers/openai/responses.rb +2 -2
- data/lib/llm/providers/openai/vector_stores.rb +1 -1
- data/lib/llm/providers/openai.rb +1 -1
- data/lib/llm/response.rb +10 -8
- data/lib/llm/sequel/plugin.rb +7 -8
- data/lib/llm/stream/queue.rb +15 -42
- data/lib/llm/stream.rb +4 -4
- data/lib/llm/transport/execution.rb +67 -0
- data/lib/llm/transport/http.rb +134 -0
- data/lib/llm/transport/persistent_http.rb +152 -0
- data/lib/llm/transport/response/http.rb +113 -0
- data/lib/llm/transport/response.rb +112 -0
- data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
- data/lib/llm/transport.rb +139 -0
- data/lib/llm/usage.rb +14 -5
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +10 -12
- data/llm.gemspec +2 -16
- metadata +23 -19
- data/lib/llm/provider/transport/http/execution.rb +0 -115
- data/lib/llm/provider/transport/http/interruptible.rb +0 -114
- data/lib/llm/provider/transport/http.rb +0 -145
- data/lib/llm/utils.rb +0 -19
data/README.md
CHANGED
|
@@ -4,514 +4,240 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-9.0.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
11
11
|
|
|
12
|
-
llm.rb is
|
|
13
|
-
<br>
|
|
14
|
-
|
|
15
|
-
llm.rb is designed for Ruby, and although it works great in Rails, it is not tightly
|
|
16
|
-
coupled to it. It runs on the standard library by default (zero dependencies),
|
|
17
|
-
loads optional pieces only when needed, includes built-in ActiveRecord support through
|
|
18
|
-
`acts_as_llm` and `acts_as_agent`, includes built-in Sequel support through
|
|
19
|
-
`plugin :llm` and `plugin :agent`, and is designed for engineers who want control over
|
|
20
|
-
long-lived, tool-capable, stateful AI workflows instead of just
|
|
21
|
-
request/response helpers.
|
|
22
|
-
|
|
23
|
-
It provides one runtime for providers, agents, tools, skills, MCP servers, streaming,
|
|
24
|
-
schemas, files, and persisted state, so real systems can be built out of one coherent
|
|
25
|
-
execution model instead of a pile of adapters.
|
|
26
|
-
|
|
27
|
-
It provides concurrent tool execution with multiple strategies exposed through a single
|
|
28
|
-
runtime: async-task, threads, fibers, ractors and processes (fork). The first three are
|
|
29
|
-
good for IO-bound work and the last two are good for CPU-bound work. Ractor support is
|
|
30
|
-
experimental and comes with limitations.
|
|
31
|
-
|
|
32
|
-
Want to see some code? Jump to [the examples](#examples) section. <br>
|
|
33
|
-
Want to see a self-hosted LLM environment built on llm.rb? Check out [Relay](https://github.com/llmrb/relay).
|
|
34
|
-
|
|
35
|
-
## Architecture
|
|
12
|
+
llm.rb is Ruby's most capable AI runtime.
|
|
36
13
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
## Core Concept
|
|
14
|
+
It runs on Ruby's standard library by default. loads optional pieces
|
|
15
|
+
only when needed, and offers a single runtime for providers, agents,
|
|
16
|
+
tools, skills, MCP, streaming, files, and persisted state. As a bonus,
|
|
17
|
+
llm.rb is also [available for mruby](https://github.com/llmrb/mruby-llm).
|
|
42
18
|
|
|
43
|
-
|
|
44
|
-
|
|
19
|
+
It supports OpenAI, OpenAI-compatible endpoints, Anthropic, Google
|
|
20
|
+
Gemini, DeepSeek, xAI, Z.ai, AWS Bedrock, Ollama, and llama.cpp. It
|
|
21
|
+
also includes built-in ActiveRecord and Sequel support, plus concurrent
|
|
22
|
+
tool execution through threads, tasks (via async gem), fibers, ractors,
|
|
23
|
+
and fork (via xchan.rb gem).
|
|
45
24
|
|
|
46
|
-
|
|
47
|
-
- message history
|
|
48
|
-
- tool state
|
|
49
|
-
- schemas
|
|
50
|
-
- streaming configuration
|
|
51
|
-
- usage and cost tracking
|
|
25
|
+
## Quick start
|
|
52
26
|
|
|
53
|
-
|
|
54
|
-
same context object.
|
|
27
|
+
#### LLM::Context
|
|
55
28
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
29
|
+
The
|
|
30
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
31
|
+
object is at the heart of the runtime. Almost all other features build
|
|
32
|
+
on top of it. It is a low-level interface to a model, and requires tool
|
|
33
|
+
execution to be managed manually. The
|
|
34
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
35
|
+
class is almost the same as
|
|
36
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
37
|
+
but it manages tool execution for you - we'll cover agents next:
|
|
61
38
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
require a second orchestration layer or a parallel abstraction. If you've
|
|
65
|
-
used Claude or Codex, you know the general idea of skills, and llm.rb
|
|
66
|
-
supports that same concept with the same execution model as the rest of the
|
|
67
|
-
system.
|
|
39
|
+
```ruby
|
|
40
|
+
require "llm"
|
|
68
41
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
loaded in the runtime.
|
|
42
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
43
|
+
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
44
|
+
ctx.talk "Hello world"
|
|
45
|
+
```
|
|
74
46
|
|
|
75
|
-
|
|
76
|
-
commands, you would typically pair the skill with a tool that can execute
|
|
77
|
-
system commands.
|
|
47
|
+
#### LLM::Agent
|
|
78
48
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
```
|
|
49
|
+
The
|
|
50
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
51
|
+
object is implemented on top of
|
|
52
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html).
|
|
53
|
+
It provides the same interface, but manages tool execution for you. It
|
|
54
|
+
also has builtin features such as a loop guard that detects repeated
|
|
55
|
+
tool call patterns, and another guard that detects infinite tool call
|
|
56
|
+
loops. Both guards advise the model to change course rather than raise
|
|
57
|
+
an error:
|
|
89
58
|
|
|
90
59
|
```ruby
|
|
91
|
-
|
|
92
|
-
model "gpt-5.4-mini"
|
|
93
|
-
skills "./skills/release"
|
|
94
|
-
tracer { LLM::Tracer::Logger.new(llm, path: "logs/release-agent.log") }
|
|
95
|
-
end
|
|
60
|
+
require "llm"
|
|
96
61
|
|
|
97
62
|
llm = LLM.openai(key: ENV["KEY"])
|
|
98
|
-
Agent.new(llm, stream: $stdout)
|
|
63
|
+
agent = LLM::Agent.new(llm, stream: $stdout)
|
|
64
|
+
agent.talk "Hello world"
|
|
99
65
|
```
|
|
100
66
|
|
|
101
|
-
####
|
|
102
|
-
|
|
103
|
-
Any ActiveRecord model or Sequel model can become an agent-capable model,
|
|
104
|
-
including existing business and domain models, without forcing you into a
|
|
105
|
-
separate agent table or a second persistence layer.
|
|
106
|
-
|
|
107
|
-
`acts_as_agent` extends a model with agent capabilities: the same runtime
|
|
108
|
-
surface as [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
109
|
-
because it actually wraps an `LLM::Agent`, plus persistence through one text,
|
|
110
|
-
JSON, or JSONB-backed `data` column on the same table. If your app also has
|
|
111
|
-
provider or model columns, provide them to llm.rb through `set_provider` and
|
|
112
|
-
`set_context`.
|
|
67
|
+
#### Tools
|
|
113
68
|
|
|
69
|
+
The
|
|
70
|
+
[LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html)
|
|
71
|
+
class can be subclassed to implement your own tools that can extend the
|
|
72
|
+
abilities of a model:
|
|
114
73
|
|
|
115
74
|
```ruby
|
|
116
|
-
class
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
def set_context
|
|
128
|
-
{ mode: :responses, store: false }
|
|
75
|
+
class ReadFile < LLM::Tool
|
|
76
|
+
name "read-file"
|
|
77
|
+
description "Read a file"
|
|
78
|
+
parameter :path, String, "The filename or path"
|
|
79
|
+
required %i[path]
|
|
80
|
+
|
|
81
|
+
def call(path:)
|
|
82
|
+
{contents: File.read(path)}
|
|
129
83
|
end
|
|
130
84
|
end
|
|
131
85
|
```
|
|
132
86
|
|
|
133
|
-
####
|
|
134
|
-
|
|
135
|
-
llm.rb is especially strong when you want to build agentic systems in a Ruby
|
|
136
|
-
way. Agents can be ordinary application models with state, associations,
|
|
137
|
-
tools, skills, and persistence, which makes it much easier to build systems
|
|
138
|
-
where users have their own specialized agents instead of treating agents as
|
|
139
|
-
something outside the app.
|
|
140
|
-
|
|
141
|
-
That pattern works so well in llm.rb because
|
|
142
|
-
[`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
143
|
-
`acts_as_agent`, `plugin :agent`, skills, tools, and persisted runtime state
|
|
144
|
-
all fit the same execution model. The runtime stays small enough that the
|
|
145
|
-
main design work becomes application design, not orchestration glue.
|
|
146
|
-
|
|
147
|
-
For a concrete example, see
|
|
148
|
-
[How to build a platform of agents](https://0x1eef.github.io/posts/how-to-build-a-platform-of-agents).
|
|
149
|
-
|
|
150
|
-
#### Persistence
|
|
87
|
+
#### MCP
|
|
151
88
|
|
|
152
|
-
The
|
|
153
|
-
|
|
154
|
-
|
|
89
|
+
The
|
|
90
|
+
[LLM::MCP](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
91
|
+
object lets llm.rb use tools provided by an MCP server. Those tools are
|
|
92
|
+
exposed through the same runtime as local tools, so you can pass them
|
|
93
|
+
to either
|
|
94
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
95
|
+
or
|
|
96
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html).
|
|
97
|
+
In this example, the MCP server runs over stdio and
|
|
98
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
99
|
+
uses the same tool loop as local tools:
|
|
155
100
|
|
|
156
101
|
```ruby
|
|
157
|
-
|
|
158
|
-
ctx.talk("Remember that my favorite language is Ruby.")
|
|
159
|
-
ctx.save(path: "context.json")
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
#### Context Compaction
|
|
102
|
+
require "llm"
|
|
163
103
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
through [`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html),
|
|
167
|
-
and when a stream is present it emits `on_compaction` and
|
|
168
|
-
`on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
169
|
-
The compactor can also use a different model from the main context, which is
|
|
170
|
-
useful when you want summarization to run on a cheaper or faster model.
|
|
171
|
-
`token_threshold:` accepts either a fixed token count or a percentage string
|
|
172
|
-
like `"90%"`, which resolves against the active model context window and
|
|
173
|
-
triggers compaction once total token usage goes over that percentage.
|
|
104
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
105
|
+
mcp = LLM::MCP.stdio(argv: ["ruby", "server.rb"])
|
|
174
106
|
|
|
175
|
-
|
|
176
|
-
ctx = LLM::Context.new(
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
retention_window: 8,
|
|
181
|
-
model: "gpt-5.4-mini"
|
|
182
|
-
}
|
|
183
|
-
)
|
|
107
|
+
mcp.run do
|
|
108
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
109
|
+
ctx.talk "Use the available tools to inspect the environment."
|
|
110
|
+
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
111
|
+
end
|
|
184
112
|
```
|
|
185
113
|
|
|
186
|
-
####
|
|
114
|
+
#### Skills
|
|
115
|
+
|
|
116
|
+
Skills are reusable instructions loaded from a `SKILL.md` directory. They let
|
|
117
|
+
you package behavior and tool access together, and they plug into the same
|
|
118
|
+
runtime as tools, agents, and MCP. When a skill runs, llm.rb spawns a
|
|
119
|
+
subagent with the skill instructions, access to only the tools listed in the
|
|
120
|
+
skill, and recent conversation context:
|
|
187
121
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
122
|
+
```yaml
|
|
123
|
+
---
|
|
124
|
+
name: release
|
|
125
|
+
description: Prepare a release
|
|
126
|
+
tools: ["search-docs", "git"]
|
|
127
|
+
---
|
|
192
128
|
|
|
193
|
-
|
|
194
|
-
the built-in implementation. It detects repeated tool-call patterns and
|
|
195
|
-
blocks pending tool execution with in-band guarded tool errors instead of
|
|
196
|
-
letting the loop keep spinning. [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
197
|
-
enables that guard by default through its wrapped context.
|
|
129
|
+
## Task
|
|
198
130
|
|
|
199
|
-
|
|
200
|
-
ctx = LLM::Context.new(llm)
|
|
201
|
-
ctx.guard = MyGuard.new
|
|
131
|
+
Review the release state, summarize what changed, and prepare the release.
|
|
202
132
|
```
|
|
203
133
|
|
|
204
|
-
#### Transformers
|
|
205
|
-
|
|
206
|
-
Transformers let llm.rb rewrite outgoing prompts and params before a request
|
|
207
|
-
is sent to the provider. They also live on
|
|
208
|
-
[`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html), but
|
|
209
|
-
they solve a different problem from guards: instead of blocking execution,
|
|
210
|
-
they can normalize or scrub what gets sent. When a stream is present, that
|
|
211
|
-
lifecycle is also exposed through
|
|
212
|
-
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with
|
|
213
|
-
`on_transform` and `on_transform_finish`.
|
|
214
|
-
|
|
215
|
-
That makes them a good fit for things like PII scrubbing, prompt
|
|
216
|
-
normalization, or request-level param injection. A transformer just needs to
|
|
217
|
-
implement `call(ctx, prompt, params)` and return `[prompt, params]`. That
|
|
218
|
-
means a transformer can scrub plain text prompts, but it can also scrub
|
|
219
|
-
[`LLM::Function::Return`](https://0x1eef.github.io/x/llm.rb/LLM/Function/Return.html)
|
|
220
|
-
values. In other words, you can intercept a tool call's return value and
|
|
221
|
-
modify it before sending it back to the LLM.
|
|
222
|
-
|
|
223
|
-
That is also a useful UI hook. A stream can surface messages like
|
|
224
|
-
`Anonymizing your data...` before a scrubber runs and `Data anonymized.`
|
|
225
|
-
after it finishes.
|
|
226
|
-
|
|
227
134
|
```ruby
|
|
228
|
-
|
|
229
|
-
EMAIL = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i
|
|
135
|
+
require "llm"
|
|
230
136
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
137
|
+
class ReleaseAgent < LLM::Agent
|
|
138
|
+
model "gpt-5.4-mini"
|
|
139
|
+
skills "./skills/release"
|
|
140
|
+
end
|
|
234
141
|
|
|
235
|
-
|
|
142
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
143
|
+
ReleaseAgent.new(llm, stream: $stdout).talk("Prepare the next release.")
|
|
144
|
+
```
|
|
236
145
|
|
|
237
|
-
|
|
238
|
-
case prompt
|
|
239
|
-
when String then prompt.gsub(EMAIL, "[REDACTED_EMAIL]")
|
|
240
|
-
when Array then prompt.map { scrub(_1) }
|
|
241
|
-
when LLM::Function::Return then on_tool_return(prompt)
|
|
242
|
-
else prompt
|
|
243
|
-
end
|
|
244
|
-
end
|
|
146
|
+
#### LLM::Stream
|
|
245
147
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
end
|
|
251
|
-
LLM::Function::Return.new(result.id, result.name, value)
|
|
252
|
-
end
|
|
148
|
+
The
|
|
149
|
+
[LLM::Stream](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
|
|
150
|
+
object lets you observe output and runtime events as they happen. You
|
|
151
|
+
can subclass it to handle streamed content in your own application:
|
|
253
152
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
end
|
|
153
|
+
```ruby
|
|
154
|
+
require "llm"
|
|
155
|
+
|
|
156
|
+
class Stream < LLM::Stream
|
|
157
|
+
def on_content(content)
|
|
158
|
+
$stdout << content
|
|
261
159
|
end
|
|
262
160
|
end
|
|
263
161
|
|
|
264
|
-
|
|
265
|
-
ctx
|
|
162
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
163
|
+
ctx = LLM::Context.new(llm, stream: Stream.new)
|
|
164
|
+
ctx.talk "Write a haiku about Ruby."
|
|
266
165
|
```
|
|
267
166
|
|
|
268
|
-
|
|
269
|
-
`on_transform` and `on_transform_finish` on
|
|
270
|
-
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
271
|
-
|
|
272
|
-
#### LLM::Stream
|
|
167
|
+
#### LLM::Stream (advanced)
|
|
273
168
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
and
|
|
169
|
+
The
|
|
170
|
+
[LLM::Stream](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
|
|
171
|
+
object can also resolve tool calls while output is still streaming. In
|
|
172
|
+
`on_tool_call`, you can spawn the tool, push the work onto the stream
|
|
173
|
+
queue, and later drain it with `wait`:
|
|
279
174
|
|
|
280
175
|
```ruby
|
|
176
|
+
require "llm"
|
|
177
|
+
|
|
281
178
|
class Stream < LLM::Stream
|
|
282
|
-
def
|
|
283
|
-
|
|
179
|
+
def on_content(content)
|
|
180
|
+
$stdout << content
|
|
284
181
|
end
|
|
285
182
|
|
|
286
|
-
def
|
|
287
|
-
|
|
183
|
+
def on_tool_call(tool, error)
|
|
184
|
+
return queue << error if error
|
|
185
|
+
queue << ctx.spawn(tool, :thread)
|
|
288
186
|
end
|
|
289
187
|
end
|
|
188
|
+
|
|
189
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
190
|
+
ctx = LLM::Context.new(llm, stream: Stream.new, tools: [ReadFile])
|
|
191
|
+
ctx.talk "Read README.md and summarize the quick start."
|
|
192
|
+
ctx.talk(ctx.wait) while ctx.functions?
|
|
290
193
|
```
|
|
291
194
|
|
|
292
195
|
#### Concurrency
|
|
293
196
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
`:fiber` uses `Fiber.schedule`, so it requires `Fiber.scheduler`.
|
|
197
|
+
llm.rb can run tool work concurrently. This is useful when a model calls
|
|
198
|
+
multiple tools and you want to resolve them in parallel instead of one
|
|
199
|
+
at a time. On
|
|
200
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
201
|
+
you can enable this with `concurrency`. Common options are `:call` for
|
|
202
|
+
sequential execution, `:thread`, or `:task` for concurrent IO-bound work, and
|
|
203
|
+
`:ractor` or `:fork` for more isolated CPU-bound work:
|
|
302
204
|
|
|
303
205
|
```ruby
|
|
206
|
+
require "llm"
|
|
207
|
+
|
|
304
208
|
class Agent < LLM::Agent
|
|
305
209
|
model "gpt-5.4-mini"
|
|
306
|
-
tools
|
|
210
|
+
tools ReadFile
|
|
307
211
|
concurrency :thread
|
|
308
212
|
end
|
|
309
|
-
```
|
|
310
213
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
stack. They adapt into the same tool and prompt path used by local tools,
|
|
315
|
-
skills, contexts, and agents.
|
|
316
|
-
|
|
317
|
-
Use `mcp.run do ... end` for scoped work where the client should start and
|
|
318
|
-
stop around one block. Use `mcp.start` and `mcp.stop` directly when you need
|
|
319
|
-
finer sequential control across several steps before shutting the client down.
|
|
320
|
-
|
|
321
|
-
```ruby
|
|
322
|
-
mcp = LLM::MCP.http(
|
|
323
|
-
url: "https://api.githubcopilot.com/mcp/",
|
|
324
|
-
headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"},
|
|
325
|
-
persistent: true
|
|
326
|
-
)
|
|
327
|
-
mcp.run do
|
|
328
|
-
ctx = LLM::Context.new(llm, tools: mcp.tools)
|
|
329
|
-
end
|
|
214
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
215
|
+
agent = Agent.new(llm, stream: $stdout)
|
|
216
|
+
agent.talk "Read README.md and CHANGELOG.md and compare them."
|
|
330
217
|
```
|
|
331
218
|
|
|
332
|
-
####
|
|
219
|
+
#### Serialization
|
|
333
220
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
Go's context package. In fact, llm.rb is heavily inspired by Go but with a Ruby
|
|
339
|
-
twist.
|
|
221
|
+
The [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
222
|
+
object can be serialized to JSON, which makes it suitable for storing
|
|
223
|
+
in a file, a database column, or a Redis queue. The built-in
|
|
224
|
+
ActiveRecord and Sequel plugins are built on top of this feature:
|
|
340
225
|
|
|
341
226
|
```ruby
|
|
342
227
|
require "llm"
|
|
343
|
-
require "io/console"
|
|
344
228
|
|
|
345
229
|
llm = LLM.openai(key: ENV["KEY"])
|
|
346
|
-
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
347
|
-
worker = Thread.new do
|
|
348
|
-
ctx.talk("Write a very long essay about network protocols.")
|
|
349
|
-
rescue LLM::Interrupt
|
|
350
|
-
puts "Request was interrupted!"
|
|
351
|
-
end
|
|
352
230
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
231
|
+
# Serialize a context
|
|
232
|
+
ctx1 = LLM::Context.new(llm)
|
|
233
|
+
ctx1.talk "Remember that my favorite language is Ruby"
|
|
234
|
+
string = ctx1.to_json
|
|
357
235
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
Put providers, tools, MCP servers, and application APIs behind one runtime
|
|
364
|
-
model instead of stitching them together by hand.
|
|
365
|
-
- **Contexts are central** <br>
|
|
366
|
-
Keep history, tools, schema, usage, persistence, and execution state in one
|
|
367
|
-
place instead of spreading them across your app.
|
|
368
|
-
- **Contexts can be serialized** <br>
|
|
369
|
-
Save and restore live state for jobs, databases, retries, or long-running
|
|
370
|
-
workflows.
|
|
371
|
-
|
|
372
|
-
### Runtime Behavior
|
|
373
|
-
|
|
374
|
-
- **Streaming and tool execution work together** <br>
|
|
375
|
-
Start tool work while output is still streaming so you can hide latency
|
|
376
|
-
instead of waiting for turns to finish.
|
|
377
|
-
- **Agents auto-manage tool execution** <br>
|
|
378
|
-
Use `LLM::Agent` when you want the same stateful runtime surface as
|
|
379
|
-
`LLM::Context`, but with tool loops executed automatically according to a
|
|
380
|
-
configured concurrency mode such as `:call`, `:thread`, `:task`, `:fiber`,
|
|
381
|
-
`:fork`, or experimental `:ractor` support for class-based tools. MCP tools
|
|
382
|
-
are not supported by the current `:ractor` mode, but mixed tool sets can
|
|
383
|
-
still route MCP tools and local tools through different strategies at
|
|
384
|
-
runtime. By default, the tool attempt budget is `25`. When an agent
|
|
385
|
-
exhausts that budget, it sends advisory tool errors back through the model
|
|
386
|
-
instead of raising out of the runtime. Set `tool_attempts: nil` to disable
|
|
387
|
-
that advisory behavior.
|
|
388
|
-
- **Tool calls have an explicit lifecycle** <br>
|
|
389
|
-
A tool call can be executed, cancelled through
|
|
390
|
-
[`LLM::Function#cancel`](https://0x1eef.github.io/x/llm.rb/LLM/Function.html#cancel-instance_method),
|
|
391
|
-
or left unresolved for manual handling, but the normal runtime contract is
|
|
392
|
-
still that a model-issued tool request is answered with a tool return.
|
|
393
|
-
- **Requests can be interrupted cleanly** <br>
|
|
394
|
-
Stop in-flight provider work through the same runtime instead of treating
|
|
395
|
-
cancellation as a separate concern.
|
|
396
|
-
[`LLM::Context#cancel!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#cancel-21-instance_method)
|
|
397
|
-
is inspired by Go's context cancellation model.
|
|
398
|
-
- **Concurrency is a first-class feature** <br>
|
|
399
|
-
Use async tasks, threads, fibers, forks, or experimental ractors without
|
|
400
|
-
rewriting your tool layer. Async tasks, threads, and fibers are the
|
|
401
|
-
I/O-bound options. Fork and ractor are the CPU-bound options. `:fork`
|
|
402
|
-
requires [`xchan.rb`](https://github.com/0x1eef/xchan.rb#readme) support.
|
|
403
|
-
The current `:ractor` mode is for class-based tools, and MCP tools are
|
|
404
|
-
not supported by ractor, but mixed workloads can branch on `tool.mcp?`
|
|
405
|
-
and choose a supported strategy per tool. Class-based `:ractor` tools
|
|
406
|
-
still emit normal tool tracer callbacks. `:fiber` uses `Fiber.schedule`,
|
|
407
|
-
so it requires `Fiber.scheduler`.
|
|
408
|
-
- **Advanced workloads are built in, not bolted on** <br>
|
|
409
|
-
Streaming, concurrent tool execution, persistence, tracing, and MCP support
|
|
410
|
-
all fit the same runtime model.
|
|
411
|
-
|
|
412
|
-
### Integration
|
|
413
|
-
|
|
414
|
-
- **MCP is built in** <br>
|
|
415
|
-
Connect to MCP servers over stdio or HTTP without bolting on a separate
|
|
416
|
-
integration stack.
|
|
417
|
-
- **ActiveRecord and Sequel persistence are built in** <br>
|
|
418
|
-
llm.rb includes built-in ActiveRecord support through `acts_as_llm` and
|
|
419
|
-
`acts_as_agent`, plus built-in Sequel support through `plugin :llm` and
|
|
420
|
-
`plugin :agent`.
|
|
421
|
-
Use `acts_as_llm` when you want to wrap `LLM::Context`, `acts_as_agent`
|
|
422
|
-
when you want to wrap `LLM::Agent`, `plugin :llm` when you want a
|
|
423
|
-
`LLM::Context` on a Sequel model, or `plugin :agent` when you want an
|
|
424
|
-
`LLM::Agent`. These integrations support `provider:` and `context:` hooks,
|
|
425
|
-
plus `format: :string` for text columns or `format: :jsonb` for native
|
|
426
|
-
PostgreSQL JSON storage when ORM JSON typecasting support is enabled.
|
|
427
|
-
- **ORM models can become persistent agents** <br>
|
|
428
|
-
Turn an ActiveRecord or Sequel model into an agent-capable model with
|
|
429
|
-
built-in persistence, stored on the same table, with `jsonb` support when
|
|
430
|
-
your ORM and database support native JSON columns.
|
|
431
|
-
- **Persistent HTTP pooling is shared process-wide** <br>
|
|
432
|
-
When enabled, separate
|
|
433
|
-
[`LLM::Provider`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
|
|
434
|
-
instances with the same endpoint settings can share one persistent
|
|
435
|
-
pool, and separate HTTP
|
|
436
|
-
[`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
437
|
-
instances can do the same, instead of each object creating its own
|
|
438
|
-
isolated per-instance transport.
|
|
439
|
-
- **OpenAI-compatible gateways are supported** <br>
|
|
440
|
-
Target OpenAI-compatible services such as DeepInfra and OpenRouter, as well
|
|
441
|
-
as proxies and self-hosted servers, with `host:` and `base_path:` when they
|
|
442
|
-
preserve OpenAI request shapes but change the API root path.
|
|
443
|
-
- **Provider support is broad** <br>
|
|
444
|
-
Work with OpenAI, OpenAI-compatible endpoints, Anthropic, Google, DeepSeek,
|
|
445
|
-
Z.ai, xAI, llama.cpp, and Ollama through the same runtime.
|
|
446
|
-
- **Tools are explicit** <br>
|
|
447
|
-
Run local tools, provider-native tools, and MCP tools through the same path
|
|
448
|
-
with fewer special cases.
|
|
449
|
-
- **Skills become bounded runtime capabilities** <br>
|
|
450
|
-
Point llm.rb at directories with a `SKILL.md`, resolve named tools through
|
|
451
|
-
the registry, and adapt each skill into its own callable capability through
|
|
452
|
-
the normal runtime. Unlike a generic skill-discovery tool, each skill runs
|
|
453
|
-
with its own bounded tool subset and behaves like a task-scoped sub-agent.
|
|
454
|
-
- **Providers are normalized, not flattened** <br>
|
|
455
|
-
Share one API surface across providers without losing access to provider-
|
|
456
|
-
specific capabilities where they matter.
|
|
457
|
-
- **Responses keep a uniform shape** <br>
|
|
458
|
-
Provider calls return
|
|
459
|
-
[`LLM::Response`](https://0x1eef.github.io/x/llm.rb/LLM/Response.html)
|
|
460
|
-
objects as a common base shape, then extend them with endpoint- or
|
|
461
|
-
provider-specific behavior when needed.
|
|
462
|
-
- **Low-level access is still there** <br>
|
|
463
|
-
Normalized responses still keep the raw `Net::HTTPResponse` available when
|
|
464
|
-
you need headers, status, or other HTTP details.
|
|
465
|
-
- **Local model metadata is included** <br>
|
|
466
|
-
Model capabilities, pricing, and limits are available locally without extra
|
|
467
|
-
API calls.
|
|
468
|
-
|
|
469
|
-
### Design Philosophy
|
|
470
|
-
|
|
471
|
-
- **Runs on the stdlib** <br>
|
|
472
|
-
Start with Ruby's standard library and add extra dependencies only when you
|
|
473
|
-
need them.
|
|
474
|
-
- **It is highly pluggable** <br>
|
|
475
|
-
Add tools, swap providers, change JSON backends, plug in tracing, or layer
|
|
476
|
-
internal APIs and MCP servers into the same execution path.
|
|
477
|
-
- **It scales from scripts to long-lived systems** <br>
|
|
478
|
-
The same primitives work for one-off scripts, background jobs, and more
|
|
479
|
-
demanding application workloads with streaming, persistence, and tracing.
|
|
480
|
-
- **Thread boundaries are clear** <br>
|
|
481
|
-
Providers are shareable. Contexts are stateful and should stay thread-local.
|
|
482
|
-
|
|
483
|
-
## Capabilities
|
|
484
|
-
|
|
485
|
-
Execution:
|
|
486
|
-
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
487
|
-
- **Context Serialization** — save and restore state across processes or time
|
|
488
|
-
- **Streaming** — visible output, reasoning output, tool-call events
|
|
489
|
-
- **Request Interruption** — stop in-flight provider work cleanly
|
|
490
|
-
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
491
|
-
|
|
492
|
-
Runtime Building Blocks:
|
|
493
|
-
- **Tool Calling** — class-based tools and closure-based functions
|
|
494
|
-
- **Run Tools While Streaming** — overlap model output with tool latency
|
|
495
|
-
- **Agents** — reusable assistants with tool auto-execution
|
|
496
|
-
- **Skills** — directory-backed capabilities loaded from `SKILL.md`
|
|
497
|
-
- **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
|
|
498
|
-
- **Context Compaction** — summarize older history in long-lived contexts
|
|
499
|
-
|
|
500
|
-
Data and Structure:
|
|
501
|
-
- **Structured Outputs** — JSON Schema-based responses
|
|
502
|
-
- **Responses API** — stateful response workflows where providers support them
|
|
503
|
-
- **Multimodal Inputs** — text, images, audio, documents, URLs
|
|
504
|
-
- **Audio** — speech generation, transcription, translation
|
|
505
|
-
- **Images** — generation and editing
|
|
506
|
-
- **Files API** — upload and reference files in prompts
|
|
507
|
-
- **Embeddings** — vector generation for search and RAG
|
|
508
|
-
- **Vector Stores** — retrieval workflows
|
|
509
|
-
|
|
510
|
-
Operations:
|
|
511
|
-
- **Cost Tracking** — local cost estimation without extra API calls
|
|
512
|
-
- **Observability** — tracing, logging, telemetry
|
|
513
|
-
- **Model Registry** — local metadata for capabilities, limits, pricing
|
|
514
|
-
- **Persistent HTTP** — optional connection pooling for providers and MCP
|
|
236
|
+
# Restore a context (from JSON)
|
|
237
|
+
ctx2 = LLM::Context.new(llm, stream: $stdout)
|
|
238
|
+
ctx2.restore(string:)
|
|
239
|
+
ctx2.talk "What is my favorite language?"
|
|
240
|
+
```
|
|
515
241
|
|
|
516
242
|
## Installation
|
|
517
243
|
|
|
@@ -561,80 +287,6 @@ ctx = LLM::Context.new(llm)
|
|
|
561
287
|
ctx.talk ["Summarize this document.", ctx.local_file("README.md")]
|
|
562
288
|
```
|
|
563
289
|
|
|
564
|
-
#### Agent
|
|
565
|
-
|
|
566
|
-
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
567
|
-
|
|
568
|
-
```ruby
|
|
569
|
-
require "llm"
|
|
570
|
-
|
|
571
|
-
class ShellAgent < LLM::Agent
|
|
572
|
-
model "gpt-5.4-mini"
|
|
573
|
-
instructions "You are a Linux system assistant."
|
|
574
|
-
tools Shell
|
|
575
|
-
concurrency :thread
|
|
576
|
-
end
|
|
577
|
-
|
|
578
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
579
|
-
agent = ShellAgent.new(llm)
|
|
580
|
-
puts agent.talk("What time is it on this system?").content
|
|
581
|
-
```
|
|
582
|
-
|
|
583
|
-
#### Skills
|
|
584
|
-
|
|
585
|
-
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. In llm.rb, a skill is exposed as a tool in the runtime. When that tool is called, it spawns a sub-agent with relevant context plus the instructions and tool subset declared in its own `SKILL.md`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
586
|
-
|
|
587
|
-
Each skill runs only with the tools declared in its own frontmatter.
|
|
588
|
-
|
|
589
|
-
```ruby
|
|
590
|
-
require "llm"
|
|
591
|
-
|
|
592
|
-
class Agent < LLM::Agent
|
|
593
|
-
model "gpt-5.4-mini"
|
|
594
|
-
instructions "You are a concise release assistant."
|
|
595
|
-
skills "./skills/release", "./skills/review"
|
|
596
|
-
tracer { LLM::Tracer::Logger.new(llm, path: "logs/release-agent.log") }
|
|
597
|
-
end
|
|
598
|
-
|
|
599
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
600
|
-
puts Agent.new(llm).talk("Use the review skill.").content
|
|
601
|
-
```
|
|
602
|
-
|
|
603
|
-
#### Streaming
|
|
604
|
-
|
|
605
|
-
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
606
|
-
|
|
607
|
-
```ruby
|
|
608
|
-
require "llm"
|
|
609
|
-
|
|
610
|
-
class Stream < LLM::Stream
|
|
611
|
-
def on_content(content)
|
|
612
|
-
$stdout << content
|
|
613
|
-
end
|
|
614
|
-
|
|
615
|
-
def on_tool_call(tool, error)
|
|
616
|
-
return queue << error if error
|
|
617
|
-
$stdout << "\nRunning tool #{tool.name}...\n"
|
|
618
|
-
queue << ctx.spawn(tool, :thread)
|
|
619
|
-
end
|
|
620
|
-
|
|
621
|
-
def on_tool_return(tool, result)
|
|
622
|
-
if result.error?
|
|
623
|
-
$stdout << "Tool #{tool.name} failed\n"
|
|
624
|
-
else
|
|
625
|
-
$stdout << "Finished tool #{tool.name}\n"
|
|
626
|
-
end
|
|
627
|
-
end
|
|
628
|
-
end
|
|
629
|
-
|
|
630
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
631
|
-
stream = Stream.new
|
|
632
|
-
ctx = LLM::Context.new(llm, stream:, tools: [System])
|
|
633
|
-
|
|
634
|
-
ctx.talk("Run `date` and `uname -a`.")
|
|
635
|
-
ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
|
|
636
|
-
```
|
|
637
|
-
|
|
638
290
|
#### Context Compaction
|
|
639
291
|
|
|
640
292
|
This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
|
|
@@ -647,7 +299,7 @@ compactor can also use its own `model:` if you want summarization to run on a
|
|
|
647
299
|
different model from the main context. `token_threshold:` accepts either a
|
|
648
300
|
fixed token count or a percentage string like `"90%"`, which resolves
|
|
649
301
|
against the active model context window and triggers compaction once total
|
|
650
|
-
token usage goes over that percentage.
|
|
302
|
+
token usage goes over that percentage. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
651
303
|
|
|
652
304
|
```ruby
|
|
653
305
|
require "llm"
|
|
@@ -739,7 +391,7 @@ class Context < Sequel::Model
|
|
|
739
391
|
private
|
|
740
392
|
|
|
741
393
|
def set_provider
|
|
742
|
-
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
394
|
+
LLM.openai(key: ENV["OPENAI_SECRET"], persistent: true)
|
|
743
395
|
end
|
|
744
396
|
|
|
745
397
|
def set_context
|
|
@@ -875,7 +527,7 @@ This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) o
|
|
|
875
527
|
require "llm"
|
|
876
528
|
require "net/http/persistent"
|
|
877
529
|
|
|
878
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
530
|
+
llm = LLM.openai(key: ENV["KEY"], persistent: true)
|
|
879
531
|
mcp = LLM::MCP.http(
|
|
880
532
|
url: "https://api.githubcopilot.com/mcp/",
|
|
881
533
|
headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"},
|
|
@@ -885,7 +537,7 @@ mcp = LLM::MCP.http(
|
|
|
885
537
|
mcp.start
|
|
886
538
|
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
887
539
|
ctx.talk("Pull information about my GitHub account.")
|
|
888
|
-
ctx.talk(ctx.
|
|
540
|
+
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
889
541
|
mcp.stop
|
|
890
542
|
```
|
|
891
543
|
|
|
@@ -900,7 +552,7 @@ mcp = LLM::MCP.http(
|
|
|
900
552
|
mcp.run do
|
|
901
553
|
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
902
554
|
ctx.talk("Pull information about my GitHub account.")
|
|
903
|
-
ctx.talk(ctx.
|
|
555
|
+
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
904
556
|
end
|
|
905
557
|
```
|
|
906
558
|
|