llm.rb 8.1.0 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +196 -6
- data/README.md +233 -518
- data/data/anthropic.json +278 -258
- data/data/bedrock.json +1288 -1561
- data/data/deepseek.json +38 -38
- data/data/google.json +656 -579
- data/data/openai.json +860 -818
- data/data/xai.json +243 -552
- data/data/zai.json +168 -168
- data/lib/llm/active_record/acts_as_agent.rb +5 -0
- data/lib/llm/active_record/acts_as_llm.rb +7 -8
- data/lib/llm/active_record.rb +1 -6
- data/lib/llm/agent.rb +121 -82
- data/lib/llm/context.rb +79 -74
- data/lib/llm/contract/completion.rb +45 -0
- data/lib/llm/cost.rb +81 -4
- data/lib/llm/error.rb +1 -1
- data/lib/llm/function/array.rb +8 -5
- data/lib/llm/function/call_group.rb +39 -0
- data/lib/llm/function/call_task.rb +46 -0
- data/lib/llm/function/fork/task.rb +6 -0
- data/lib/llm/function/ractor/task.rb +6 -0
- data/lib/llm/function/task.rb +10 -0
- data/lib/llm/function.rb +28 -1
- data/lib/llm/mcp/transport/http.rb +26 -46
- data/lib/llm/mcp/transport/stdio.rb +0 -8
- data/lib/llm/mcp.rb +6 -23
- data/lib/llm/provider.rb +30 -20
- data/lib/llm/providers/anthropic/error_handler.rb +6 -7
- data/lib/llm/providers/anthropic/files.rb +2 -2
- data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/anthropic/stream_parser.rb +2 -2
- data/lib/llm/providers/anthropic.rb +1 -1
- data/lib/llm/providers/bedrock/error_handler.rb +8 -9
- data/lib/llm/providers/bedrock/models.rb +13 -13
- data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/bedrock/stream_parser.rb +2 -2
- data/lib/llm/providers/bedrock.rb +1 -1
- data/lib/llm/providers/google/error_handler.rb +6 -7
- data/lib/llm/providers/google/files.rb +2 -4
- data/lib/llm/providers/google/images.rb +1 -1
- data/lib/llm/providers/google/models.rb +0 -2
- data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/google/stream_parser.rb +2 -2
- data/lib/llm/providers/google.rb +1 -1
- data/lib/llm/providers/ollama/error_handler.rb +6 -7
- data/lib/llm/providers/ollama/models.rb +0 -2
- data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/ollama.rb +1 -1
- data/lib/llm/providers/openai/audio.rb +3 -3
- data/lib/llm/providers/openai/error_handler.rb +6 -7
- data/lib/llm/providers/openai/files.rb +2 -2
- data/lib/llm/providers/openai/images.rb +3 -3
- data/lib/llm/providers/openai/models.rb +1 -1
- data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
- data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
- data/lib/llm/providers/openai/responses/stream_parser.rb +2 -2
- data/lib/llm/providers/openai/responses.rb +2 -2
- data/lib/llm/providers/openai/stream_parser.rb +2 -2
- data/lib/llm/providers/openai/vector_stores.rb +1 -1
- data/lib/llm/providers/openai.rb +1 -1
- data/lib/llm/response.rb +10 -8
- data/lib/llm/schema.rb +11 -0
- data/lib/llm/sequel/agent.rb +5 -0
- data/lib/llm/sequel/plugin.rb +8 -14
- data/lib/llm/stream/queue.rb +15 -42
- data/lib/llm/stream.rb +15 -40
- data/lib/llm/tool/param.rb +1 -8
- data/lib/llm/transport/execution.rb +67 -0
- data/lib/llm/transport/http.rb +134 -0
- data/lib/llm/transport/persistent_http.rb +152 -0
- data/lib/llm/transport/response/http.rb +113 -0
- data/lib/llm/transport/response.rb +112 -0
- data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
- data/lib/llm/transport.rb +139 -0
- data/lib/llm/usage.rb +14 -5
- data/lib/llm/utils.rb +24 -14
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +3 -12
- data/llm.gemspec +2 -16
- metadata +13 -20
- data/lib/llm/bot.rb +0 -3
- data/lib/llm/provider/transport/http/execution.rb +0 -115
- data/lib/llm/provider/transport/http/interruptible.rb +0 -114
- data/lib/llm/provider/transport/http.rb +0 -145
data/README.md
CHANGED
|
@@ -1,522 +1,281 @@
|
|
|
1
1
|
<p align="center">
|
|
2
|
-
<a href="llm.rb"
|
|
2
|
+
<a href="llm.rb">
|
|
3
|
+
<img src="https://github.com/llmrb/llm.rb/raw/main/llm.png" width="200" height="200" border="0" alt="llm.rb">
|
|
4
|
+
</a>
|
|
3
5
|
</p>
|
|
4
6
|
<p align="center">
|
|
5
|
-
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1">
|
|
8
|
+
<img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc">
|
|
9
|
+
</a>
|
|
10
|
+
<a href="https://opensource.org/license/0bsd">
|
|
11
|
+
<img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License">
|
|
12
|
+
</a>
|
|
13
|
+
<a href="https://github.com/llmrb/llm.rb/tags">
|
|
14
|
+
<img src="https://img.shields.io/badge/version-10.0.0-green.svg?" alt="Version">
|
|
15
|
+
</a>
|
|
8
16
|
</p>
|
|
9
17
|
|
|
10
18
|
## About
|
|
11
19
|
|
|
12
|
-
llm.rb is
|
|
13
|
-
<br>
|
|
14
|
-
|
|
15
|
-
llm.rb is designed for Ruby, and although it works great in Rails, it is not tightly
|
|
16
|
-
coupled to it. It runs on the standard library by default (zero dependencies),
|
|
17
|
-
loads optional pieces only when needed, includes built-in ActiveRecord support through
|
|
18
|
-
`acts_as_llm` and `acts_as_agent`, includes built-in Sequel support through
|
|
19
|
-
`plugin :llm` and `plugin :agent`, and is designed for engineers who want control over
|
|
20
|
-
long-lived, tool-capable, stateful AI workflows instead of just
|
|
21
|
-
request/response helpers.
|
|
22
|
-
|
|
23
|
-
It provides one runtime for providers, agents, tools, skills, MCP servers, streaming,
|
|
24
|
-
schemas, files, and persisted state, so real systems can be built out of one coherent
|
|
25
|
-
execution model instead of a pile of adapters.
|
|
26
|
-
|
|
27
|
-
It supports providers including OpenAI, Anthropic, Google Gemini, DeepSeek, xAI,
|
|
28
|
-
Z.ai, and AWS Bedrock.
|
|
20
|
+
llm.rb is Ruby's most capable AI runtime.
|
|
29
21
|
|
|
30
|
-
It
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
22
|
+
It runs on Ruby's standard library by default. loads optional pieces
|
|
23
|
+
only when needed, and offers a single runtime for providers, agents,
|
|
24
|
+
tools, skills, MCP, streaming, files, and persisted state. As a bonus,
|
|
25
|
+
llm.rb is also [available for mruby](https://github.com/llmrb/mruby-llm).
|
|
34
26
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
27
|
+
It supports OpenAI, OpenAI-compatible endpoints, Anthropic, Google
|
|
28
|
+
Gemini, DeepSeek, xAI, Z.ai, AWS Bedrock, Ollama, and llama.cpp. It
|
|
29
|
+
also includes built-in ActiveRecord and Sequel support, plus concurrent
|
|
30
|
+
tool execution through threads, tasks (via async gem), fibers, ractors,
|
|
31
|
+
and fork (via xchan.rb gem).
|
|
38
32
|
|
|
33
|
+
## Quick start
|
|
39
34
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
<p align="center">
|
|
43
|
-
<img src="https://github.com/llmrb/llm.rb/raw/main/resources/architecture.png" alt="llm.rb architecture" width="790">
|
|
44
|
-
</p>
|
|
35
|
+
#### LLM::Context
|
|
45
36
|
|
|
46
|
-
|
|
37
|
+
The
|
|
38
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
39
|
+
object is at the heart of the runtime. Almost all other features build
|
|
40
|
+
on top of it. It is a low-level interface to a model, and requires tool
|
|
41
|
+
execution to be managed manually. The
|
|
42
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
43
|
+
class is almost the same as
|
|
44
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
45
|
+
but it manages tool execution for you - we'll cover agents next:
|
|
47
46
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
It holds:
|
|
52
|
-
- message history
|
|
53
|
-
- tool state
|
|
54
|
-
- schemas
|
|
55
|
-
- streaming configuration
|
|
56
|
-
- usage and cost tracking
|
|
57
|
-
|
|
58
|
-
Instead of switching abstractions for each feature, everything builds on the
|
|
59
|
-
same context object.
|
|
47
|
+
```ruby
|
|
48
|
+
require "llm"
|
|
60
49
|
|
|
61
|
-
|
|
50
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
51
|
+
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
52
|
+
ctx.talk "Hello world"
|
|
53
|
+
```
|
|
62
54
|
|
|
63
|
-
|
|
55
|
+
#### LLM::Agent
|
|
64
56
|
|
|
65
|
-
|
|
57
|
+
The
|
|
58
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
59
|
+
object is implemented on top of
|
|
60
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html).
|
|
61
|
+
It provides the same interface, but manages tool execution for you. It
|
|
62
|
+
also has builtin features such as a loop guard that detects repeated
|
|
63
|
+
tool call patterns, and another guard that detects infinite tool call
|
|
64
|
+
loops. Both guards advise the model to change course rather than raise
|
|
65
|
+
an error:
|
|
66
66
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
require a second orchestration layer or a parallel abstraction. If you've
|
|
70
|
-
used Claude or Codex, you know the general idea of skills, and llm.rb
|
|
71
|
-
supports that same concept with the same execution model as the rest of the
|
|
72
|
-
system.
|
|
67
|
+
```ruby
|
|
68
|
+
require "llm"
|
|
73
69
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
loaded in the runtime.
|
|
70
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
71
|
+
agent = LLM::Agent.new(llm, stream: $stdout)
|
|
72
|
+
agent.talk "Hello world"
|
|
73
|
+
```
|
|
79
74
|
|
|
80
|
-
|
|
81
|
-
commands, you would typically pair the skill with a tool that can execute
|
|
82
|
-
system commands.
|
|
75
|
+
#### Agents (Advanced)
|
|
83
76
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
- git
|
|
91
|
-
---
|
|
92
|
-
Review the release state, summarize what changed, and prepare the release.
|
|
93
|
-
```
|
|
77
|
+
An agent can be configured to require confirmation before a tool is
|
|
78
|
+
executed. When a matching tool is called, llm.rb runs
|
|
79
|
+
`on_tool_confirmation`. That callback must decide whether to cancel the
|
|
80
|
+
tool call or approve it and execute it by calling
|
|
81
|
+
`fn.spawn(strategy).wait`, and it must always return an instance of
|
|
82
|
+
[`LLM::Function::Return`](https://0x1eef.github.io/x/llm.rb/LLM/Function/Return.html):
|
|
94
83
|
|
|
95
84
|
```ruby
|
|
85
|
+
require "llm"
|
|
86
|
+
|
|
96
87
|
class Agent < LLM::Agent
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
88
|
+
tools DeleteFile
|
|
89
|
+
confirm "delete-file"
|
|
90
|
+
|
|
91
|
+
def on_tool_confirmation(fn, strategy)
|
|
92
|
+
path = fn.arguments["path"] || fn.arguments[:path]
|
|
93
|
+
if path.start_with?("/tmp/")
|
|
94
|
+
fn.spawn(strategy).wait
|
|
95
|
+
else
|
|
96
|
+
fn.cancel(reason: "Deletion requires approval")
|
|
97
|
+
end
|
|
98
|
+
end
|
|
100
99
|
end
|
|
101
100
|
|
|
102
101
|
llm = LLM.openai(key: ENV["KEY"])
|
|
103
|
-
Agent.new(llm, stream: $stdout).talk("
|
|
102
|
+
Agent.new(llm, stream: $stdout).talk("Delete /tmp/example.txt.")
|
|
104
103
|
```
|
|
105
104
|
|
|
106
|
-
####
|
|
107
|
-
|
|
108
|
-
Any ActiveRecord model or Sequel model can become an agent-capable model,
|
|
109
|
-
including existing business and domain models, without forcing you into a
|
|
110
|
-
separate agent table or a second persistence layer.
|
|
111
|
-
|
|
112
|
-
`acts_as_agent` extends a model with agent capabilities: the same runtime
|
|
113
|
-
surface as [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
114
|
-
because it actually wraps an `LLM::Agent`, plus persistence through one text,
|
|
115
|
-
JSON, or JSONB-backed `data` column on the same table. If your app also has
|
|
116
|
-
provider or model columns, provide them to llm.rb through `set_provider` and
|
|
117
|
-
`set_context`.
|
|
105
|
+
#### Tools
|
|
118
106
|
|
|
107
|
+
The
|
|
108
|
+
[LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html)
|
|
109
|
+
class can be subclassed to implement your own tools that can extend the
|
|
110
|
+
abilities of a model:
|
|
119
111
|
|
|
120
112
|
```ruby
|
|
121
|
-
class
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
def set_context
|
|
133
|
-
{ mode: :responses, store: false }
|
|
113
|
+
class ReadFile < LLM::Tool
|
|
114
|
+
name "read-file"
|
|
115
|
+
description "Read a file"
|
|
116
|
+
parameter :path, String, "The filename or path"
|
|
117
|
+
required %i[path]
|
|
118
|
+
|
|
119
|
+
def call(path:)
|
|
120
|
+
{contents: File.read(path)}
|
|
134
121
|
end
|
|
135
122
|
end
|
|
136
123
|
```
|
|
137
124
|
|
|
138
|
-
####
|
|
139
|
-
|
|
140
|
-
llm.rb is especially strong when you want to build agentic systems in a Ruby
|
|
141
|
-
way. Agents can be ordinary application models with state, associations,
|
|
142
|
-
tools, skills, and persistence, which makes it much easier to build systems
|
|
143
|
-
where users have their own specialized agents instead of treating agents as
|
|
144
|
-
something outside the app.
|
|
145
|
-
|
|
146
|
-
That pattern works so well in llm.rb because
|
|
147
|
-
[`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
148
|
-
`acts_as_agent`, `plugin :agent`, skills, tools, and persisted runtime state
|
|
149
|
-
all fit the same execution model. The runtime stays small enough that the
|
|
150
|
-
main design work becomes application design, not orchestration glue.
|
|
151
|
-
|
|
152
|
-
For a concrete example, see
|
|
153
|
-
[How to build a platform of agents](https://0x1eef.github.io/posts/how-to-build-a-platform-of-agents).
|
|
154
|
-
|
|
155
|
-
#### Persistence
|
|
125
|
+
#### MCP
|
|
156
126
|
|
|
157
|
-
The
|
|
158
|
-
|
|
159
|
-
|
|
127
|
+
The
|
|
128
|
+
[LLM::MCP](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
129
|
+
object lets llm.rb use tools provided by an MCP server. Those tools are
|
|
130
|
+
exposed through the same runtime as local tools, so you can pass them
|
|
131
|
+
to either
|
|
132
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
133
|
+
or
|
|
134
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html).
|
|
135
|
+
In this example, the MCP server runs over stdio and
|
|
136
|
+
[LLM::Context](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
137
|
+
uses the same tool loop as local tools:
|
|
160
138
|
|
|
161
139
|
```ruby
|
|
162
|
-
|
|
163
|
-
ctx.talk("Remember that my favorite language is Ruby.")
|
|
164
|
-
ctx.save(path: "context.json")
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
#### Context Compaction
|
|
140
|
+
require "llm"
|
|
168
141
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
through [`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html),
|
|
172
|
-
and when a stream is present it emits `on_compaction` and
|
|
173
|
-
`on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
174
|
-
The compactor can also use a different model from the main context, which is
|
|
175
|
-
useful when you want summarization to run on a cheaper or faster model.
|
|
176
|
-
`token_threshold:` accepts either a fixed token count or a percentage string
|
|
177
|
-
like `"90%"`, which resolves against the active model context window and
|
|
178
|
-
triggers compaction once total token usage goes over that percentage.
|
|
142
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
143
|
+
mcp = LLM::MCP.stdio(argv: ["ruby", "server.rb"])
|
|
179
144
|
|
|
180
|
-
|
|
181
|
-
ctx = LLM::Context.new(
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
retention_window: 8,
|
|
186
|
-
model: "gpt-5.4-mini"
|
|
187
|
-
}
|
|
188
|
-
)
|
|
145
|
+
mcp.run do
|
|
146
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
147
|
+
ctx.talk "Use the available tools to inspect the environment."
|
|
148
|
+
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
149
|
+
end
|
|
189
150
|
```
|
|
190
151
|
|
|
191
|
-
####
|
|
152
|
+
#### Skills
|
|
192
153
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
154
|
+
Skills are reusable instructions loaded from a `SKILL.md` directory. They let
|
|
155
|
+
you package behavior and tool access together, and they plug into the same
|
|
156
|
+
runtime as tools, agents, and MCP. When a skill runs, llm.rb spawns a
|
|
157
|
+
subagent with the skill instructions, access to only the tools listed in the
|
|
158
|
+
skill, and recent conversation context:
|
|
197
159
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
160
|
+
```yaml
|
|
161
|
+
---
|
|
162
|
+
name: release
|
|
163
|
+
description: Prepare a release
|
|
164
|
+
tools: ["search-docs", "git"]
|
|
165
|
+
---
|
|
203
166
|
|
|
204
|
-
|
|
205
|
-
ctx = LLM::Context.new(llm)
|
|
206
|
-
ctx.guard = MyGuard.new
|
|
207
|
-
```
|
|
167
|
+
## Task
|
|
208
168
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
Transformers let llm.rb rewrite outgoing prompts and params before a request
|
|
212
|
-
is sent to the provider. They also live on
|
|
213
|
-
[`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html), but
|
|
214
|
-
they solve a different problem from guards: instead of blocking execution,
|
|
215
|
-
they can normalize or scrub what gets sent. When a stream is present, that
|
|
216
|
-
lifecycle is also exposed through
|
|
217
|
-
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with
|
|
218
|
-
`on_transform` and `on_transform_finish`.
|
|
219
|
-
|
|
220
|
-
That makes them a good fit for things like PII scrubbing, prompt
|
|
221
|
-
normalization, or request-level param injection. A transformer just needs to
|
|
222
|
-
implement `call(ctx, prompt, params)` and return `[prompt, params]`. That
|
|
223
|
-
means a transformer can scrub plain text prompts, but it can also scrub
|
|
224
|
-
[`LLM::Function::Return`](https://0x1eef.github.io/x/llm.rb/LLM/Function/Return.html)
|
|
225
|
-
values. In other words, you can intercept a tool call's return value and
|
|
226
|
-
modify it before sending it back to the LLM.
|
|
227
|
-
|
|
228
|
-
That is also a useful UI hook. A stream can surface messages like
|
|
229
|
-
`Anonymizing your data...` before a scrubber runs and `Data anonymized.`
|
|
230
|
-
after it finishes.
|
|
169
|
+
Review the release state, summarize what changed, and prepare the release.
|
|
170
|
+
```
|
|
231
171
|
|
|
232
172
|
```ruby
|
|
233
|
-
|
|
234
|
-
EMAIL = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i
|
|
173
|
+
require "llm"
|
|
235
174
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
175
|
+
class ReleaseAgent < LLM::Agent
|
|
176
|
+
model "gpt-5.4-mini"
|
|
177
|
+
skills "./skills/release"
|
|
178
|
+
end
|
|
239
179
|
|
|
240
|
-
|
|
180
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
181
|
+
ReleaseAgent.new(llm, stream: $stdout).talk("Prepare the next release.")
|
|
182
|
+
```
|
|
241
183
|
|
|
242
|
-
|
|
243
|
-
case prompt
|
|
244
|
-
when String then prompt.gsub(EMAIL, "[REDACTED_EMAIL]")
|
|
245
|
-
when Array then prompt.map { scrub(_1) }
|
|
246
|
-
when LLM::Function::Return then on_tool_return(prompt)
|
|
247
|
-
else prompt
|
|
248
|
-
end
|
|
249
|
-
end
|
|
184
|
+
#### LLM::Stream
|
|
250
185
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
end
|
|
256
|
-
LLM::Function::Return.new(result.id, result.name, value)
|
|
257
|
-
end
|
|
186
|
+
The
|
|
187
|
+
[LLM::Stream](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
|
|
188
|
+
object lets you observe output and runtime events as they happen. You
|
|
189
|
+
can subclass it to handle streamed content in your own application:
|
|
258
190
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
end
|
|
191
|
+
```ruby
|
|
192
|
+
require "llm"
|
|
193
|
+
|
|
194
|
+
class Stream < LLM::Stream
|
|
195
|
+
def on_content(content)
|
|
196
|
+
$stdout << content
|
|
266
197
|
end
|
|
267
198
|
end
|
|
268
199
|
|
|
269
|
-
|
|
270
|
-
ctx
|
|
200
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
201
|
+
ctx = LLM::Context.new(llm, stream: Stream.new)
|
|
202
|
+
ctx.talk "Write a haiku about Ruby."
|
|
271
203
|
```
|
|
272
204
|
|
|
273
|
-
|
|
274
|
-
`on_transform` and `on_transform_finish` on
|
|
275
|
-
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
205
|
+
#### LLM::Stream (advanced)
|
|
276
206
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
`
|
|
281
|
-
|
|
282
|
-
means visible output, reasoning output, request rewriting, tool execution,
|
|
283
|
-
and context compaction can all be driven through the same execution path.
|
|
207
|
+
The
|
|
208
|
+
[LLM::Stream](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
|
|
209
|
+
object can also resolve tool calls while output is still streaming. In
|
|
210
|
+
`on_tool_call`, you can spawn the tool, push the work onto the stream
|
|
211
|
+
queue, and later drain it with `wait`:
|
|
284
212
|
|
|
285
213
|
```ruby
|
|
214
|
+
require "llm"
|
|
215
|
+
|
|
286
216
|
class Stream < LLM::Stream
|
|
287
|
-
def
|
|
288
|
-
|
|
217
|
+
def on_content(content)
|
|
218
|
+
$stdout << content
|
|
289
219
|
end
|
|
290
220
|
|
|
291
|
-
def
|
|
292
|
-
|
|
221
|
+
def on_tool_call(tool, error)
|
|
222
|
+
return queue << error if error
|
|
223
|
+
queue << ctx.spawn(tool, :thread)
|
|
293
224
|
end
|
|
294
225
|
end
|
|
226
|
+
|
|
227
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
228
|
+
ctx = LLM::Context.new(llm, stream: Stream.new, tools: [ReadFile])
|
|
229
|
+
ctx.talk "Read README.md and summarize the quick start."
|
|
230
|
+
ctx.talk(ctx.wait) while ctx.functions?
|
|
295
231
|
```
|
|
296
232
|
|
|
297
233
|
#### Concurrency
|
|
298
234
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
`:fiber` uses `Fiber.schedule`, so it requires `Fiber.scheduler`.
|
|
235
|
+
llm.rb can run tool work concurrently. This is useful when a model calls
|
|
236
|
+
multiple tools and you want to resolve them in parallel instead of one
|
|
237
|
+
at a time. On
|
|
238
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
239
|
+
you can enable this with `concurrency`. Common options are `:call` for
|
|
240
|
+
sequential execution, `:thread`, or `:task` for concurrent IO-bound work, and
|
|
241
|
+
`:ractor` or `:fork` for more isolated CPU-bound work:
|
|
307
242
|
|
|
308
243
|
```ruby
|
|
244
|
+
require "llm"
|
|
245
|
+
|
|
309
246
|
class Agent < LLM::Agent
|
|
310
247
|
model "gpt-5.4-mini"
|
|
311
|
-
tools
|
|
248
|
+
tools ReadFile
|
|
312
249
|
concurrency :thread
|
|
313
250
|
end
|
|
314
|
-
```
|
|
315
|
-
|
|
316
|
-
#### MCP
|
|
317
251
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
Use `mcp.run do ... end` for scoped work where the client should start and
|
|
323
|
-
stop around one block. Use `mcp.start` and `mcp.stop` directly when you need
|
|
324
|
-
finer sequential control across several steps before shutting the client down.
|
|
325
|
-
|
|
326
|
-
```ruby
|
|
327
|
-
mcp = LLM::MCP.http(
|
|
328
|
-
url: "https://api.githubcopilot.com/mcp/",
|
|
329
|
-
headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"},
|
|
330
|
-
persistent: true
|
|
331
|
-
)
|
|
332
|
-
mcp.run do
|
|
333
|
-
ctx = LLM::Context.new(llm, tools: mcp.tools)
|
|
334
|
-
end
|
|
252
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
253
|
+
agent = Agent.new(llm, stream: $stdout)
|
|
254
|
+
agent.talk "Read README.md and CHANGELOG.md and compare them."
|
|
335
255
|
```
|
|
336
256
|
|
|
337
|
-
####
|
|
257
|
+
#### Serialization
|
|
338
258
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
Go's context package. In fact, llm.rb is heavily inspired by Go but with a Ruby
|
|
344
|
-
twist.
|
|
259
|
+
The [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
260
|
+
object can be serialized to JSON, which makes it suitable for storing
|
|
261
|
+
in a file, a database column, or a Redis queue. The built-in
|
|
262
|
+
ActiveRecord and Sequel plugins are built on top of this feature:
|
|
345
263
|
|
|
346
264
|
```ruby
|
|
347
265
|
require "llm"
|
|
348
|
-
require "io/console"
|
|
349
266
|
|
|
350
267
|
llm = LLM.openai(key: ENV["KEY"])
|
|
351
|
-
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
352
|
-
worker = Thread.new do
|
|
353
|
-
ctx.talk("Write a very long essay about network protocols.")
|
|
354
|
-
rescue LLM::Interrupt
|
|
355
|
-
puts "Request was interrupted!"
|
|
356
|
-
end
|
|
357
268
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
269
|
+
# Serialize a context
|
|
270
|
+
ctx1 = LLM::Context.new(llm)
|
|
271
|
+
ctx1.talk "Remember that my favorite language is Ruby"
|
|
272
|
+
string = ctx1.to_json
|
|
362
273
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
Put providers, tools, MCP servers, and application APIs behind one runtime
|
|
369
|
-
model instead of stitching them together by hand.
|
|
370
|
-
- **Contexts are central** <br>
|
|
371
|
-
Keep history, tools, schema, usage, persistence, and execution state in one
|
|
372
|
-
place instead of spreading them across your app.
|
|
373
|
-
- **Contexts can be serialized** <br>
|
|
374
|
-
Save and restore live state for jobs, databases, retries, or long-running
|
|
375
|
-
workflows.
|
|
376
|
-
|
|
377
|
-
### Runtime Behavior
|
|
378
|
-
|
|
379
|
-
- **Streaming and tool execution work together** <br>
|
|
380
|
-
Start tool work while output is still streaming so you can hide latency
|
|
381
|
-
instead of waiting for turns to finish.
|
|
382
|
-
- **Agents auto-manage tool execution** <br>
|
|
383
|
-
Use `LLM::Agent` when you want the same stateful runtime surface as
|
|
384
|
-
`LLM::Context`, but with tool loops executed automatically according to a
|
|
385
|
-
configured concurrency mode such as `:call`, `:thread`, `:task`, `:fiber`,
|
|
386
|
-
`:fork`, or experimental `:ractor` support for class-based tools. MCP tools
|
|
387
|
-
are not supported by the current `:ractor` mode, but mixed tool sets can
|
|
388
|
-
still route MCP tools and local tools through different strategies at
|
|
389
|
-
runtime. By default, the tool attempt budget is `25`. When an agent
|
|
390
|
-
exhausts that budget, it sends advisory tool errors back through the model
|
|
391
|
-
instead of raising out of the runtime. Set `tool_attempts: nil` to disable
|
|
392
|
-
that advisory behavior.
|
|
393
|
-
- **Tool calls have an explicit lifecycle** <br>
|
|
394
|
-
A tool call can be executed, cancelled through
|
|
395
|
-
[`LLM::Function#cancel`](https://0x1eef.github.io/x/llm.rb/LLM/Function.html#cancel-instance_method),
|
|
396
|
-
or left unresolved for manual handling, but the normal runtime contract is
|
|
397
|
-
still that a model-issued tool request is answered with a tool return.
|
|
398
|
-
- **Requests can be interrupted cleanly** <br>
|
|
399
|
-
Stop in-flight provider work through the same runtime instead of treating
|
|
400
|
-
cancellation as a separate concern.
|
|
401
|
-
[`LLM::Context#cancel!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#cancel-21-instance_method)
|
|
402
|
-
is inspired by Go's context cancellation model.
|
|
403
|
-
- **Concurrency is a first-class feature** <br>
|
|
404
|
-
Use async tasks, threads, fibers, forks, or experimental ractors without
|
|
405
|
-
rewriting your tool layer. Async tasks, threads, and fibers are the
|
|
406
|
-
I/O-bound options. Fork and ractor are the CPU-bound options. `:fork`
|
|
407
|
-
requires [`xchan.rb`](https://github.com/0x1eef/xchan.rb#readme) support.
|
|
408
|
-
The current `:ractor` mode is for class-based tools, and MCP tools are
|
|
409
|
-
not supported by ractor, but mixed workloads can branch on `tool.mcp?`
|
|
410
|
-
and choose a supported strategy per tool. Class-based `:ractor` tools
|
|
411
|
-
still emit normal tool tracer callbacks. `:fiber` uses `Fiber.schedule`,
|
|
412
|
-
so it requires `Fiber.scheduler`.
|
|
413
|
-
- **Advanced workloads are built in, not bolted on** <br>
|
|
414
|
-
Streaming, concurrent tool execution, persistence, tracing, and MCP support
|
|
415
|
-
all fit the same runtime model.
|
|
416
|
-
|
|
417
|
-
### Integration
|
|
418
|
-
|
|
419
|
-
- **MCP is built in** <br>
|
|
420
|
-
Connect to MCP servers over stdio or HTTP without bolting on a separate
|
|
421
|
-
integration stack.
|
|
422
|
-
- **ActiveRecord and Sequel persistence are built in** <br>
|
|
423
|
-
llm.rb includes built-in ActiveRecord support through `acts_as_llm` and
|
|
424
|
-
`acts_as_agent`, plus built-in Sequel support through `plugin :llm` and
|
|
425
|
-
`plugin :agent`.
|
|
426
|
-
Use `acts_as_llm` when you want to wrap `LLM::Context`, `acts_as_agent`
|
|
427
|
-
when you want to wrap `LLM::Agent`, `plugin :llm` when you want a
|
|
428
|
-
`LLM::Context` on a Sequel model, or `plugin :agent` when you want an
|
|
429
|
-
`LLM::Agent`. These integrations support `provider:` and `context:` hooks,
|
|
430
|
-
plus `format: :string` for text columns or `format: :jsonb` for native
|
|
431
|
-
PostgreSQL JSON storage when ORM JSON typecasting support is enabled.
|
|
432
|
-
- **ORM models can become persistent agents** <br>
|
|
433
|
-
Turn an ActiveRecord or Sequel model into an agent-capable model with
|
|
434
|
-
built-in persistence, stored on the same table, with `jsonb` support when
|
|
435
|
-
your ORM and database support native JSON columns.
|
|
436
|
-
- **Persistent HTTP pooling is shared process-wide** <br>
|
|
437
|
-
When enabled, separate
|
|
438
|
-
[`LLM::Provider`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
|
|
439
|
-
instances with the same endpoint settings can share one persistent
|
|
440
|
-
pool, and separate HTTP
|
|
441
|
-
[`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
442
|
-
instances can do the same, instead of each object creating its own
|
|
443
|
-
isolated per-instance transport.
|
|
444
|
-
- **OpenAI-compatible gateways are supported** <br>
|
|
445
|
-
Target OpenAI-compatible services such as DeepInfra and OpenRouter, as well
|
|
446
|
-
as proxies and self-hosted servers, with `host:` and `base_path:` when they
|
|
447
|
-
preserve OpenAI request shapes but change the API root path.
|
|
448
|
-
- **Provider support is broad** <br>
|
|
449
|
-
Work with OpenAI, OpenAI-compatible endpoints, Anthropic, Google, DeepSeek,
|
|
450
|
-
Z.ai, xAI, AWS Bedrock, llama.cpp, and Ollama through the same runtime.
|
|
451
|
-
- **Tools are explicit** <br>
|
|
452
|
-
Run local tools, provider-native tools, and MCP tools through the same path
|
|
453
|
-
with fewer special cases.
|
|
454
|
-
- **Skills become bounded runtime capabilities** <br>
|
|
455
|
-
Point llm.rb at directories with a `SKILL.md`, resolve named tools through
|
|
456
|
-
the registry, and adapt each skill into its own callable capability through
|
|
457
|
-
the normal runtime. Unlike a generic skill-discovery tool, each skill runs
|
|
458
|
-
with its own bounded tool subset and behaves like a task-scoped sub-agent.
|
|
459
|
-
- **Providers are normalized, not flattened** <br>
|
|
460
|
-
Share one API surface across providers without losing access to provider-
|
|
461
|
-
specific capabilities where they matter.
|
|
462
|
-
- **Responses keep a uniform shape** <br>
|
|
463
|
-
Provider calls return
|
|
464
|
-
[`LLM::Response`](https://0x1eef.github.io/x/llm.rb/LLM/Response.html)
|
|
465
|
-
objects as a common base shape, then extend them with endpoint- or
|
|
466
|
-
provider-specific behavior when needed.
|
|
467
|
-
- **Low-level access is still there** <br>
|
|
468
|
-
Normalized responses still keep the raw `Net::HTTPResponse` available when
|
|
469
|
-
you need headers, status, or other HTTP details.
|
|
470
|
-
- **Local model metadata is included** <br>
|
|
471
|
-
Model capabilities, pricing, and limits are available locally without extra
|
|
472
|
-
API calls.
|
|
473
|
-
|
|
474
|
-
### Design Philosophy
|
|
475
|
-
|
|
476
|
-
- **Runs on the stdlib** <br>
|
|
477
|
-
Start with Ruby's standard library and add extra dependencies only when you
|
|
478
|
-
need them.
|
|
479
|
-
- **It is highly pluggable** <br>
|
|
480
|
-
Add tools, swap providers, change JSON backends, plug in tracing, or layer
|
|
481
|
-
internal APIs and MCP servers into the same execution path.
|
|
482
|
-
- **It scales from scripts to long-lived systems** <br>
|
|
483
|
-
The same primitives work for one-off scripts, background jobs, and more
|
|
484
|
-
demanding application workloads with streaming, persistence, and tracing.
|
|
485
|
-
- **Thread boundaries are clear** <br>
|
|
486
|
-
Providers are shareable. Contexts are stateful and should stay thread-local.
|
|
487
|
-
|
|
488
|
-
## Capabilities
|
|
489
|
-
|
|
490
|
-
Execution:
|
|
491
|
-
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
492
|
-
- **Context Serialization** — save and restore state across processes or time
|
|
493
|
-
- **Streaming** — visible output, reasoning output, tool-call events
|
|
494
|
-
- **Request Interruption** — stop in-flight provider work cleanly
|
|
495
|
-
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
496
|
-
|
|
497
|
-
Runtime Building Blocks:
|
|
498
|
-
- **Tool Calling** — class-based tools and closure-based functions
|
|
499
|
-
- **Run Tools While Streaming** — overlap model output with tool latency
|
|
500
|
-
- **Agents** — reusable assistants with tool auto-execution
|
|
501
|
-
- **Skills** — directory-backed capabilities loaded from `SKILL.md`
|
|
502
|
-
- **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
|
|
503
|
-
- **Context Compaction** — summarize older history in long-lived contexts
|
|
504
|
-
|
|
505
|
-
Data and Structure:
|
|
506
|
-
- **Structured Outputs** — JSON Schema-based responses
|
|
507
|
-
- **Responses API** — stateful response workflows where providers support them
|
|
508
|
-
- **Multimodal Inputs** — text, images, audio, documents, URLs
|
|
509
|
-
- **Audio** — speech generation, transcription, translation
|
|
510
|
-
- **Images** — generation and editing
|
|
511
|
-
- **Files API** — upload and reference files in prompts
|
|
512
|
-
- **Embeddings** — vector generation for search and RAG
|
|
513
|
-
- **Vector Stores** — retrieval workflows
|
|
514
|
-
|
|
515
|
-
Operations:
|
|
516
|
-
- **Cost Tracking** — local cost estimation without extra API calls
|
|
517
|
-
- **Observability** — tracing, logging, telemetry
|
|
518
|
-
- **Model Registry** — local metadata for capabilities, limits, pricing
|
|
519
|
-
- **Persistent HTTP** — optional connection pooling for providers and MCP
|
|
274
|
+
# Restore a context (from JSON)
|
|
275
|
+
ctx2 = LLM::Context.new(llm, stream: $stdout)
|
|
276
|
+
ctx2.restore(string:)
|
|
277
|
+
ctx2.talk "What is my favorite language?"
|
|
278
|
+
```
|
|
520
279
|
|
|
521
280
|
## Installation
|
|
522
281
|
|
|
@@ -528,7 +287,10 @@ gem install llm.rb
|
|
|
528
287
|
|
|
529
288
|
#### REPL
|
|
530
289
|
|
|
531
|
-
This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
290
|
+
This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
291
|
+
directly for an interactive REPL. <br> See the
|
|
292
|
+
[deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
|
|
293
|
+
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
532
294
|
|
|
533
295
|
```ruby
|
|
534
296
|
require "llm"
|
|
@@ -566,80 +328,6 @@ ctx = LLM::Context.new(llm)
|
|
|
566
328
|
ctx.talk ["Summarize this document.", ctx.local_file("README.md")]
|
|
567
329
|
```
|
|
568
330
|
|
|
569
|
-
#### Agent
|
|
570
|
-
|
|
571
|
-
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
572
|
-
|
|
573
|
-
```ruby
|
|
574
|
-
require "llm"
|
|
575
|
-
|
|
576
|
-
class ShellAgent < LLM::Agent
|
|
577
|
-
model "gpt-5.4-mini"
|
|
578
|
-
instructions "You are a Linux system assistant."
|
|
579
|
-
tools Shell
|
|
580
|
-
concurrency :thread
|
|
581
|
-
end
|
|
582
|
-
|
|
583
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
584
|
-
agent = ShellAgent.new(llm)
|
|
585
|
-
puts agent.talk("What time is it on this system?").content
|
|
586
|
-
```
|
|
587
|
-
|
|
588
|
-
#### Skills
|
|
589
|
-
|
|
590
|
-
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. In llm.rb, a skill is exposed as a tool in the runtime. When that tool is called, it spawns a sub-agent with relevant context plus the instructions and tool subset declared in its own `SKILL.md`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
591
|
-
|
|
592
|
-
Each skill runs only with the tools declared in its own frontmatter.
|
|
593
|
-
|
|
594
|
-
```ruby
|
|
595
|
-
require "llm"
|
|
596
|
-
|
|
597
|
-
class Agent < LLM::Agent
|
|
598
|
-
model "gpt-5.4-mini"
|
|
599
|
-
instructions "You are a concise release assistant."
|
|
600
|
-
skills "./skills/release", "./skills/review"
|
|
601
|
-
tracer { LLM::Tracer::Logger.new(llm, path: "logs/release-agent.log") }
|
|
602
|
-
end
|
|
603
|
-
|
|
604
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
605
|
-
puts Agent.new(llm).talk("Use the review skill.").content
|
|
606
|
-
```
|
|
607
|
-
|
|
608
|
-
#### Streaming
|
|
609
|
-
|
|
610
|
-
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
611
|
-
|
|
612
|
-
```ruby
|
|
613
|
-
require "llm"
|
|
614
|
-
|
|
615
|
-
class Stream < LLM::Stream
|
|
616
|
-
def on_content(content)
|
|
617
|
-
$stdout << content
|
|
618
|
-
end
|
|
619
|
-
|
|
620
|
-
def on_tool_call(tool, error)
|
|
621
|
-
return queue << error if error
|
|
622
|
-
$stdout << "\nRunning tool #{tool.name}...\n"
|
|
623
|
-
queue << ctx.spawn(tool, :thread)
|
|
624
|
-
end
|
|
625
|
-
|
|
626
|
-
def on_tool_return(tool, result)
|
|
627
|
-
if result.error?
|
|
628
|
-
$stdout << "Tool #{tool.name} failed\n"
|
|
629
|
-
else
|
|
630
|
-
$stdout << "Finished tool #{tool.name}\n"
|
|
631
|
-
end
|
|
632
|
-
end
|
|
633
|
-
end
|
|
634
|
-
|
|
635
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
636
|
-
stream = Stream.new
|
|
637
|
-
ctx = LLM::Context.new(llm, stream:, tools: [System])
|
|
638
|
-
|
|
639
|
-
ctx.talk("Run `date` and `uname -a`.")
|
|
640
|
-
ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
|
|
641
|
-
```
|
|
642
|
-
|
|
643
331
|
#### Context Compaction
|
|
644
332
|
|
|
645
333
|
This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
|
|
@@ -652,7 +340,9 @@ compactor can also use its own `model:` if you want summarization to run on a
|
|
|
652
340
|
different model from the main context. `token_threshold:` accepts either a
|
|
653
341
|
fixed token count or a percentage string like `"90%"`, which resolves
|
|
654
342
|
against the active model context window and triggers compaction once total
|
|
655
|
-
token usage goes over that percentage.
|
|
343
|
+
token usage goes over that percentage. See the
|
|
344
|
+
[deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
|
|
345
|
+
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
656
346
|
|
|
657
347
|
```ruby
|
|
658
348
|
require "llm"
|
|
@@ -681,7 +371,15 @@ ctx = LLM::Context.new(
|
|
|
681
371
|
|
|
682
372
|
#### Reasoning
|
|
683
373
|
|
|
684
|
-
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
|
|
374
|
+
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html)
|
|
375
|
+
with the OpenAI Responses API so reasoning output is streamed separately from
|
|
376
|
+
visible assistant output. See the
|
|
377
|
+
[deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
|
|
378
|
+
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
379
|
+
|
|
380
|
+
To use the Responses API (OpenAI-specific), initialize a
|
|
381
|
+
context or agent with `mode: :responses` and keep using
|
|
382
|
+
`talk` for turns.
|
|
685
383
|
|
|
686
384
|
```ruby
|
|
687
385
|
require "llm"
|
|
@@ -709,7 +407,10 @@ ctx.talk("Solve 17 * 19 and show your work.")
|
|
|
709
407
|
|
|
710
408
|
#### Request Cancellation
|
|
711
409
|
|
|
712
|
-
Need to cancel a stream? llm.rb has you covered through
|
|
410
|
+
Need to cancel a stream? llm.rb has you covered through
|
|
411
|
+
[`LLM::Context#interrupt!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#interrupt-21-instance_method).
|
|
412
|
+
<br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html)
|
|
413
|
+
or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
713
414
|
|
|
714
415
|
```ruby
|
|
715
416
|
require "llm"
|
|
@@ -730,7 +431,14 @@ worker.join
|
|
|
730
431
|
|
|
731
432
|
#### Sequel (ORM)
|
|
732
433
|
|
|
733
|
-
The `plugin :llm` integration wraps
|
|
434
|
+
The `plugin :llm` integration wraps
|
|
435
|
+
[`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) on a
|
|
436
|
+
`Sequel::Model` and keeps tool execution explicit. Like the ActiveRecord
|
|
437
|
+
wrappers, its built-in persistence contract is the serialized `data` column,
|
|
438
|
+
while `provider:` resolves a real `LLM::Provider` instance and `context:`
|
|
439
|
+
injects defaults such as `model:`. <br> See the
|
|
440
|
+
[deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
|
|
441
|
+
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
734
442
|
|
|
735
443
|
```ruby
|
|
736
444
|
require "llm"
|
|
@@ -744,7 +452,7 @@ class Context < Sequel::Model
|
|
|
744
452
|
private
|
|
745
453
|
|
|
746
454
|
def set_provider
|
|
747
|
-
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
455
|
+
LLM.openai(key: ENV["OPENAI_SECRET"], persistent: true)
|
|
748
456
|
end
|
|
749
457
|
|
|
750
458
|
def set_context
|
|
@@ -765,7 +473,8 @@ one serialized `data` column. If your app has provider, model, or usage
|
|
|
765
473
|
columns, provide them to llm.rb through `provider:` and `context:` instead of
|
|
766
474
|
relying on reserved wrapper columns.
|
|
767
475
|
|
|
768
|
-
See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html)
|
|
476
|
+
See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html)
|
|
477
|
+
or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
769
478
|
|
|
770
479
|
```ruby
|
|
771
480
|
require "llm"
|
|
@@ -821,7 +530,8 @@ manages tool execution for you. Like `acts_as_llm`, its built-in persistence
|
|
|
821
530
|
contract is one serialized `data` column. If your app has provider or model
|
|
822
531
|
columns, provide them to llm.rb through your hooks and agent DSL.
|
|
823
532
|
|
|
824
|
-
See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html)
|
|
533
|
+
See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html)
|
|
534
|
+
or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
825
535
|
|
|
826
536
|
```ruby
|
|
827
537
|
require "llm"
|
|
@@ -874,13 +584,18 @@ end
|
|
|
874
584
|
|
|
875
585
|
#### MCP
|
|
876
586
|
|
|
877
|
-
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
587
|
+
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
588
|
+
over HTTP so remote GitHub MCP tools run through the same
|
|
589
|
+
`LLM::Context` tool path as local tools. It expects a GitHub token in
|
|
590
|
+
`ENV["GITHUB_PAT"]`. See the
|
|
591
|
+
[deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or
|
|
592
|
+
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
878
593
|
|
|
879
594
|
```ruby
|
|
880
595
|
require "llm"
|
|
881
596
|
require "net/http/persistent"
|
|
882
597
|
|
|
883
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
598
|
+
llm = LLM.openai(key: ENV["KEY"], persistent: true)
|
|
884
599
|
mcp = LLM::MCP.http(
|
|
885
600
|
url: "https://api.githubcopilot.com/mcp/",
|
|
886
601
|
headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"},
|
|
@@ -890,7 +605,7 @@ mcp = LLM::MCP.http(
|
|
|
890
605
|
mcp.start
|
|
891
606
|
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
892
607
|
ctx.talk("Pull information about my GitHub account.")
|
|
893
|
-
ctx.talk(ctx.
|
|
608
|
+
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
894
609
|
mcp.stop
|
|
895
610
|
```
|
|
896
611
|
|
|
@@ -905,7 +620,7 @@ mcp = LLM::MCP.http(
|
|
|
905
620
|
mcp.run do
|
|
906
621
|
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
907
622
|
ctx.talk("Pull information about my GitHub account.")
|
|
908
|
-
ctx.talk(ctx.
|
|
623
|
+
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
909
624
|
end
|
|
910
625
|
```
|
|
911
626
|
|