llm.rb 4.20.2 → 4.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +70 -0
- data/README.md +286 -52
- data/data/anthropic.json +35 -2
- data/data/google.json +7 -2
- data/data/openai.json +0 -30
- data/lib/llm/active_record/acts_as_agent.rb +11 -64
- data/lib/llm/active_record/acts_as_llm.rb +81 -61
- data/lib/llm/agent.rb +28 -4
- data/lib/llm/context.rb +14 -0
- data/lib/llm/sequel/agent.rb +94 -0
- data/lib/llm/sequel/plugin.rb +82 -60
- data/lib/llm/skill.rb +131 -0
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +1 -0
- data/lib/sequel/plugins/agent.rb +8 -0
- data/llm.gemspec +3 -0
- metadata +46 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 96698cb3af793b0bd83cae7635279cefbff24f86b11f59c9209edd76f76b757c
|
|
4
|
+
data.tar.gz: 389e4372ab3b4a2e90020e6e2e838b5a36516d5a5dd82a71243975dfe6f8f959
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6bd4fa02802333bbb925db2e513913bd1669e8a4d7c85d8cb76b88399e9b0e84bfd5ddf922c7816a2afd0c0d76d6a9f8c873702c789665dfe3205ada01d34203
|
|
7
|
+
data.tar.gz: 0d579386ead2158a4e7ad4991ff0c025758ac51624947d07e5d112779d46cb36bcabdd492ac20bbabc981b3e75e25300d04ba8b86808e4825b5c66e2186e52ae
|
data/CHANGELOG.md
CHANGED
|
@@ -2,8 +2,78 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
Changes since `v4.22.0`.
|
|
6
|
+
|
|
7
|
+
## v4.22.0
|
|
8
|
+
|
|
9
|
+
Changes since `v4.21.0`.
|
|
10
|
+
|
|
11
|
+
This release deepens the runtime shape of llm.rb. It reduces helper-method
|
|
12
|
+
surface on persisted ORM models, expands real ORM coverage, and makes skills
|
|
13
|
+
behave more like bounded sub-agents with inherited recent context and proper
|
|
14
|
+
instruction injection.
|
|
15
|
+
|
|
16
|
+
### Change
|
|
17
|
+
|
|
18
|
+
* **Reduce ActiveRecord wrapper model surface** <br>
|
|
19
|
+
Move helper methods such as option resolution, column mapping,
|
|
20
|
+
serialization, and persistence into `Utils` for the ActiveRecord
|
|
21
|
+
wrappers so wrapped models include fewer internal helper methods.
|
|
22
|
+
|
|
23
|
+
* **Reduce Sequel wrapper model surface** <br>
|
|
24
|
+
Move helper methods such as option resolution, column mapping,
|
|
25
|
+
serialization, and persistence into `Utils` for the Sequel wrappers
|
|
26
|
+
so wrapped models include fewer internal helper methods.
|
|
27
|
+
|
|
28
|
+
* **Expand ORM integration coverage** <br>
|
|
29
|
+
Add broader ActiveRecord and Sequel coverage for persisted context and
|
|
30
|
+
agent wrappers, including real SQLite-backed records and cassette-backed
|
|
31
|
+
OpenAI persistence paths.
|
|
32
|
+
|
|
33
|
+
* **Make skills inherit recent parent context** <br>
|
|
34
|
+
Run `LLM::Skill` with a curated slice of recent parent user and assistant
|
|
35
|
+
messages, prefixed with `Recent context:`, so skills behave more like
|
|
36
|
+
task-scoped sub-agents instead of instruction-only helpers.
|
|
37
|
+
|
|
38
|
+
### Fix
|
|
39
|
+
|
|
40
|
+
* **Fix Sequel `plugin :agent` load order** <br>
|
|
41
|
+
Require the shared Sequel plugin support from `LLM::Sequel::Agent` so
|
|
42
|
+
`plugin :agent` can load independently without raising
|
|
43
|
+
`uninitialized constant LLM::Sequel::Plugin`.
|
|
44
|
+
|
|
45
|
+
* **Make skill execution inherit parent context request settings** <br>
|
|
46
|
+
Run `LLM::Skill` through a parent `LLM::Context` instead of a bare
|
|
47
|
+
provider so nested skill agents inherit context-level settings such as
|
|
48
|
+
`mode: :responses`, `store: false`, streaming, and other request defaults,
|
|
49
|
+
while still keeping skill-local tools and avoiding parent schemas.
|
|
50
|
+
|
|
51
|
+
* **Keep agent instructions when history is preseeded** <br>
|
|
52
|
+
Inject `LLM::Agent` instructions once unless a system message is already
|
|
53
|
+
present, so agents and nested skills still get their instructions when
|
|
54
|
+
they start with inherited non-system context.
|
|
55
|
+
|
|
56
|
+
## v4.21.0
|
|
57
|
+
|
|
5
58
|
Changes since `v4.20.2`.
|
|
6
59
|
|
|
60
|
+
This release expands higher-level composition in llm.rb. It adds Sequel agent
|
|
61
|
+
persistence through `plugin :agent` and introduces directory-backed skills
|
|
62
|
+
that load from `SKILL.md`, resolve named tools, and plug directly into
|
|
63
|
+
`LLM::Context` and `LLM::Agent`.
|
|
64
|
+
|
|
65
|
+
### Change
|
|
66
|
+
|
|
67
|
+
* **Add `plugin :agent` for Sequel models** <br>
|
|
68
|
+
Add Sequel support for `plugin :agent`, similar to ActiveRecord's
|
|
69
|
+
`acts_as_agent`, so models can wrap `LLM::Agent` with built-in
|
|
70
|
+
persistence.
|
|
71
|
+
|
|
72
|
+
* **Load directory-backed skills through `LLM::Context` and `LLM::Agent`** <br>
|
|
73
|
+
Add `skills:` to `LLM::Context` and `skills ...` to `LLM::Agent` so
|
|
74
|
+
directories with `SKILL.md` can be loaded, resolved into tools, and run
|
|
75
|
+
through the normal llm.rb tool path.
|
|
76
|
+
|
|
7
77
|
## v4.20.2
|
|
8
78
|
|
|
9
79
|
Changes since `v4.20.1`.
|
data/README.md
CHANGED
|
@@ -4,25 +4,26 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.21.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
11
11
|
|
|
12
|
-
llm.rb is
|
|
13
|
-
|
|
14
|
-
It is not just an API wrapper. llm.rb gives you one runtime for providers,
|
|
15
|
-
contexts, agents, tools, MCP servers, streaming, schemas, files, and persisted
|
|
16
|
-
state, so real systems can be built out of one coherent execution model instead
|
|
17
|
-
of a pile of adapters.
|
|
12
|
+
llm.rb is the most capable runtime for building AI systems in Ruby.
|
|
13
|
+
<br>
|
|
18
14
|
|
|
19
|
-
|
|
20
|
-
|
|
15
|
+
llm.rb is designed for Ruby, and although it works great in Rails, it is not tightly
|
|
16
|
+
coupled to it. It runs on the standard library by default (zero dependencies),
|
|
17
|
+
loads optional pieces only when needed, includes built-in ActiveRecord support through
|
|
21
18
|
`acts_as_llm` and `acts_as_agent`, includes built-in Sequel support through
|
|
22
|
-
`plugin :llm`, and is designed for engineers who want control over
|
|
19
|
+
`plugin :llm` and `plugin :agent`, and is designed for engineers who want control over
|
|
23
20
|
long-lived, tool-capable, stateful AI workflows instead of just
|
|
24
21
|
request/response helpers.
|
|
25
22
|
|
|
23
|
+
It provides one runtime for providers, agents, tools, skills, MCP servers, streaming,
|
|
24
|
+
schemas, files, and persisted state, so real systems can be built out of one coherent
|
|
25
|
+
execution model instead of a pile of adapters.
|
|
26
|
+
|
|
26
27
|
Want to see some code? Jump to [the examples](#examples) section. <br>
|
|
27
28
|
Want a taste of what llm.rb can build? See [the screencast](#screencast).
|
|
28
29
|
|
|
@@ -47,6 +48,175 @@ It holds:
|
|
|
47
48
|
Instead of switching abstractions for each feature, everything builds on the
|
|
48
49
|
same context object.
|
|
49
50
|
|
|
51
|
+
## Standout features
|
|
52
|
+
|
|
53
|
+
The following list is **not exhaustive**, but it covers a lot of ground.
|
|
54
|
+
|
|
55
|
+
#### Skills
|
|
56
|
+
|
|
57
|
+
Skills are reusable, directory-backed capabilities loaded from `SKILL.md`.
|
|
58
|
+
They run through the same runtime as tools, agents, and MCP. They do not
|
|
59
|
+
require a second orchestration layer or a parallel abstraction. If you've
|
|
60
|
+
used Claude or Codex, you know the general idea of skills, and llm.rb
|
|
61
|
+
supports that same concept with the same execution model as the rest of the
|
|
62
|
+
system.
|
|
63
|
+
|
|
64
|
+
In llm.rb, a skill has frontmatter and instructions. The frontmatter can
|
|
65
|
+
define `name`, `description`, and `tools`. The `tools` entries are tool names,
|
|
66
|
+
and each name must resolve to a subclass of
|
|
67
|
+
[`LLM::Tool`](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) that is already
|
|
68
|
+
loaded in the runtime.
|
|
69
|
+
|
|
70
|
+
If you want Claude/Codex-like skills that can drive scripts or shell
|
|
71
|
+
commands, you would typically pair the skill with a tool that can execute
|
|
72
|
+
system commands.
|
|
73
|
+
|
|
74
|
+
```yaml
|
|
75
|
+
---
|
|
76
|
+
name: release
|
|
77
|
+
description: Prepare a release
|
|
78
|
+
tools:
|
|
79
|
+
- search_docs
|
|
80
|
+
- git
|
|
81
|
+
---
|
|
82
|
+
Review the release state, summarize what changed, and prepare the release.
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
class Agent < LLM::Agent
|
|
87
|
+
model "gpt-5.4-mini"
|
|
88
|
+
skills "./skills/release"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
92
|
+
Agent.new(llm, stream: $stdout).talk("Let's prepare the release!")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
#### ORM
|
|
96
|
+
|
|
97
|
+
Any ActiveRecord model or Sequel model can become an agent-capable model,
|
|
98
|
+
including existing business and domain models, without forcing you into a
|
|
99
|
+
separate agent table or a second persistence layer.
|
|
100
|
+
|
|
101
|
+
`acts_as_agent` extends a model with agent capabilities: the same runtime
|
|
102
|
+
surface as [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
103
|
+
because it actually wraps an `LLM::Agent`, plus persistence through a text,
|
|
104
|
+
JSON, or JSONB-backed column on the same table.
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
```ruby
|
|
108
|
+
class Ticket < ApplicationRecord
|
|
109
|
+
acts_as_agent provider: :set_provider
|
|
110
|
+
model "gpt-5.4-mini"
|
|
111
|
+
instructions "You are a support assistant."
|
|
112
|
+
|
|
113
|
+
private
|
|
114
|
+
|
|
115
|
+
def set_provider
|
|
116
|
+
{ key: ENV["#{provider.upcase}_SECRET"], persistent: true }
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
#### Agentic Patterns
|
|
122
|
+
|
|
123
|
+
llm.rb is especially strong when you want to build agentic systems in a Ruby
|
|
124
|
+
way. Agents can be ordinary application models with state, associations,
|
|
125
|
+
tools, skills, and persistence, which makes it much easier to build systems
|
|
126
|
+
where users have their own specialized agents instead of treating agents as
|
|
127
|
+
something outside the app.
|
|
128
|
+
|
|
129
|
+
That pattern works so well in llm.rb because
|
|
130
|
+
[`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
131
|
+
`acts_as_agent`, `plugin :agent`, skills, tools, and persisted runtime state
|
|
132
|
+
all fit the same execution model. The runtime stays small enough that the
|
|
133
|
+
main design work becomes application design, not orchestration glue.
|
|
134
|
+
|
|
135
|
+
For a concrete example, see
|
|
136
|
+
[How to build a platform of agents](https://0x1eef.github.io/posts/how-to-build-a-platform-of-agents).
|
|
137
|
+
|
|
138
|
+
#### Persistence
|
|
139
|
+
|
|
140
|
+
The same runtime can be serialized to disk, restored later, persisted in JSON
|
|
141
|
+
or JSONB-backed ORM columns, resumed across process boundaries, or shared
|
|
142
|
+
across long-lived workflows.
|
|
143
|
+
|
|
144
|
+
```ruby
|
|
145
|
+
ctx = LLM::Context.new(llm)
|
|
146
|
+
ctx.talk("Remember that my favorite language is Ruby.")
|
|
147
|
+
ctx.save(path: "context.json")
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
#### LLM::Stream
|
|
151
|
+
|
|
152
|
+
`LLM::Stream` is not just for printing tokens. It supports `on_content`,
|
|
153
|
+
`on_reasoning_content`, `on_tool_call`, and `on_tool_return`, which means
|
|
154
|
+
visible output, reasoning output, and tool execution can all be driven through
|
|
155
|
+
the same execution path.
|
|
156
|
+
|
|
157
|
+
```ruby
|
|
158
|
+
class Stream < LLM::Stream
|
|
159
|
+
def on_tool_call(tool, error)
|
|
160
|
+
queue << tool.spawn(:thread)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def on_tool_return(tool, result)
|
|
164
|
+
puts(result.value)
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
#### Concurrency
|
|
170
|
+
|
|
171
|
+
Tool execution can run sequentially with `:call` or concurrently through
|
|
172
|
+
`:thread`, `:task`, `:fiber`, and experimental `:ractor`, without rewriting
|
|
173
|
+
your tool layer.
|
|
174
|
+
|
|
175
|
+
```ruby
|
|
176
|
+
class Agent < LLM::Agent
|
|
177
|
+
model "gpt-5.4-mini"
|
|
178
|
+
tools FetchWeather, FetchNews, FetchStock
|
|
179
|
+
concurrency :thread
|
|
180
|
+
end
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
#### MCP
|
|
184
|
+
|
|
185
|
+
Remote MCP tools and prompts are not bolted on as a separate integration
|
|
186
|
+
stack. They adapt into the same tool and prompt path used by local tools,
|
|
187
|
+
skills, contexts, and agents.
|
|
188
|
+
|
|
189
|
+
```ruby
|
|
190
|
+
begin
|
|
191
|
+
mcp = LLM::MCP.http(url: "https://api.githubcopilot.com/mcp/").persistent
|
|
192
|
+
mcp.start
|
|
193
|
+
ctx = LLM::Context.new(llm, tools: mcp.tools)
|
|
194
|
+
ensure
|
|
195
|
+
mcp.stop
|
|
196
|
+
end
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
#### Cancellation
|
|
200
|
+
|
|
201
|
+
Cancellation is one of the harder problems to get right, and while llm.rb
|
|
202
|
+
makes it possible, it still requires careful engineering to use effectively.
|
|
203
|
+
The point though is that it is possible to stop in-flight provider work cleanly
|
|
204
|
+
through the same runtime, and the model used by llm.rb is directly inspired by
|
|
205
|
+
Go's context package. In fact, llm.rb is heavily inspired by Go but with a Ruby
|
|
206
|
+
twist.
|
|
207
|
+
|
|
208
|
+
```ruby
|
|
209
|
+
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
210
|
+
worker = Thread.new do
|
|
211
|
+
ctx.talk("Write a very long essay about network protocols.")
|
|
212
|
+
rescue LLM::Interrupt
|
|
213
|
+
puts "Request was interrupted!"
|
|
214
|
+
end
|
|
215
|
+
STDIN.getch
|
|
216
|
+
ctx.interrupt!
|
|
217
|
+
worker.join
|
|
218
|
+
```
|
|
219
|
+
|
|
50
220
|
## Differentiators
|
|
51
221
|
|
|
52
222
|
### Execution Model
|
|
@@ -101,13 +271,18 @@ same context object.
|
|
|
101
271
|
integration stack.
|
|
102
272
|
- **ActiveRecord and Sequel persistence are built in** <br>
|
|
103
273
|
llm.rb includes built-in ActiveRecord support through `acts_as_llm` and
|
|
104
|
-
`acts_as_agent`, plus built-in Sequel support through `plugin :llm
|
|
274
|
+
`acts_as_agent`, plus built-in Sequel support through `plugin :llm` and
|
|
275
|
+
`plugin :agent`.
|
|
105
276
|
Use `acts_as_llm` when you want to wrap `LLM::Context`, `acts_as_agent`
|
|
106
|
-
when you want to wrap `LLM::Agent`,
|
|
107
|
-
|
|
108
|
-
integrations support `provider:` and `context:` hooks,
|
|
109
|
-
:string` for text columns or `format: :jsonb` for native
|
|
110
|
-
storage when ORM JSON typecasting support is enabled.
|
|
277
|
+
when you want to wrap `LLM::Agent`, `plugin :llm` when you want a
|
|
278
|
+
`LLM::Context` on a Sequel model, or `plugin :agent` when you want an
|
|
279
|
+
`LLM::Agent`. These integrations support `provider:` and `context:` hooks,
|
|
280
|
+
plus `format: :string` for text columns or `format: :jsonb` for native
|
|
281
|
+
PostgreSQL JSON storage when ORM JSON typecasting support is enabled.
|
|
282
|
+
- **ORM models can become persistent agents** <br>
|
|
283
|
+
Turn an ActiveRecord or Sequel model into an agent-capable model with
|
|
284
|
+
built-in persistence, stored on the same table, with `jsonb` support when
|
|
285
|
+
your ORM and database support native JSON columns.
|
|
111
286
|
- **Persistent HTTP pooling is shared process-wide** <br>
|
|
112
287
|
When enabled, separate
|
|
113
288
|
[`LLM::Provider`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
|
|
@@ -126,6 +301,11 @@ same context object.
|
|
|
126
301
|
- **Tools are explicit** <br>
|
|
127
302
|
Run local tools, provider-native tools, and MCP tools through the same path
|
|
128
303
|
with fewer special cases.
|
|
304
|
+
- **Skills become bounded runtime capabilities** <br>
|
|
305
|
+
Point llm.rb at directories with a `SKILL.md`, resolve named tools through
|
|
306
|
+
the registry, and adapt each skill into its own callable capability through
|
|
307
|
+
the normal runtime. Unlike a generic skill-discovery tool, each skill runs
|
|
308
|
+
with its own bounded tool subset and behaves like a task-scoped sub-agent.
|
|
129
309
|
- **Providers are normalized, not flattened** <br>
|
|
130
310
|
Share one API surface across providers without losing access to provider-
|
|
131
311
|
specific capabilities where they matter.
|
|
@@ -157,23 +337,31 @@ same context object.
|
|
|
157
337
|
|
|
158
338
|
## Capabilities
|
|
159
339
|
|
|
340
|
+
Execution:
|
|
160
341
|
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
161
342
|
- **Context Serialization** — save and restore state across processes or time
|
|
162
343
|
- **Streaming** — visible output, reasoning output, tool-call events
|
|
163
344
|
- **Request Interruption** — stop in-flight provider work cleanly
|
|
345
|
+
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
346
|
+
|
|
347
|
+
Runtime Building Blocks:
|
|
164
348
|
- **Tool Calling** — class-based tools and closure-based functions
|
|
165
349
|
- **Run Tools While Streaming** — overlap model output with tool latency
|
|
166
|
-
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
167
350
|
- **Agents** — reusable assistants with tool auto-execution
|
|
351
|
+
- **Skills** — directory-backed capabilities loaded from `SKILL.md`
|
|
352
|
+
- **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
|
|
353
|
+
|
|
354
|
+
Data and Structure:
|
|
168
355
|
- **Structured Outputs** — JSON Schema-based responses
|
|
169
356
|
- **Responses API** — stateful response workflows where providers support them
|
|
170
|
-
- **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
|
|
171
357
|
- **Multimodal Inputs** — text, images, audio, documents, URLs
|
|
172
358
|
- **Audio** — speech generation, transcription, translation
|
|
173
359
|
- **Images** — generation and editing
|
|
174
360
|
- **Files API** — upload and reference files in prompts
|
|
175
361
|
- **Embeddings** — vector generation for search and RAG
|
|
176
362
|
- **Vector Stores** — retrieval workflows
|
|
363
|
+
|
|
364
|
+
Operations:
|
|
177
365
|
- **Cost Tracking** — local cost estimation without extra API calls
|
|
178
366
|
- **Observability** — tracing, logging, telemetry
|
|
179
367
|
- **Model Registry** — local metadata for capabilities, limits, pricing
|
|
@@ -189,7 +377,7 @@ gem install llm.rb
|
|
|
189
377
|
|
|
190
378
|
#### REPL
|
|
191
379
|
|
|
192
|
-
This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) directly for an interactive REPL. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
|
|
380
|
+
This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) directly for an interactive REPL. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
193
381
|
|
|
194
382
|
```ruby
|
|
195
383
|
require "llm"
|
|
@@ -204,9 +392,47 @@ loop do
|
|
|
204
392
|
end
|
|
205
393
|
```
|
|
206
394
|
|
|
395
|
+
#### Agent
|
|
396
|
+
|
|
397
|
+
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
398
|
+
|
|
399
|
+
```ruby
|
|
400
|
+
require "llm"
|
|
401
|
+
|
|
402
|
+
class ShellAgent < LLM::Agent
|
|
403
|
+
model "gpt-5.4-mini"
|
|
404
|
+
instructions "You are a Linux system assistant."
|
|
405
|
+
tools Shell
|
|
406
|
+
concurrency :thread
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
410
|
+
agent = ShellAgent.new(llm)
|
|
411
|
+
puts agent.talk("What time is it on this system?").content
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
#### Skills
|
|
415
|
+
|
|
416
|
+
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. In llm.rb, a skill is exposed as a tool in the runtime. When that tool is called, it spawns a sub-agent with relevant context plus the instructions and tool subset declared in its own `SKILL.md`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
417
|
+
|
|
418
|
+
Each skill runs only with the tools declared in its own frontmatter.
|
|
419
|
+
|
|
420
|
+
```ruby
|
|
421
|
+
require "llm"
|
|
422
|
+
|
|
423
|
+
class Agent < LLM::Agent
|
|
424
|
+
model "gpt-5.4-mini"
|
|
425
|
+
instructions "You are a concise release assistant."
|
|
426
|
+
skills "./skills/release", "./skills/review"
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
430
|
+
puts Agent.new(llm).talk("Use the review skill.").content
|
|
431
|
+
```
|
|
432
|
+
|
|
207
433
|
#### Streaming
|
|
208
434
|
|
|
209
|
-
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
|
|
435
|
+
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
210
436
|
|
|
211
437
|
```ruby
|
|
212
438
|
require "llm"
|
|
@@ -238,9 +464,37 @@ ctx.talk("Run `date` and `uname -a`.")
|
|
|
238
464
|
ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
|
|
239
465
|
```
|
|
240
466
|
|
|
467
|
+
#### Reasoning
|
|
468
|
+
|
|
469
|
+
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with the OpenAI Responses API so reasoning output is streamed separately from visible assistant output. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
470
|
+
|
|
471
|
+
```ruby
|
|
472
|
+
require "llm"
|
|
473
|
+
|
|
474
|
+
class Stream < LLM::Stream
|
|
475
|
+
def on_content(content)
|
|
476
|
+
$stdout << content
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
def on_reasoning_content(content)
|
|
480
|
+
$stderr << content
|
|
481
|
+
end
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
485
|
+
ctx = LLM::Context.new(
|
|
486
|
+
llm,
|
|
487
|
+
model: "gpt-5.4-mini",
|
|
488
|
+
mode: :responses,
|
|
489
|
+
reasoning: {effort: "medium"},
|
|
490
|
+
stream: Stream.new
|
|
491
|
+
)
|
|
492
|
+
ctx.talk("Solve 17 * 19 and show your work.")
|
|
493
|
+
```
|
|
494
|
+
|
|
241
495
|
#### Request Cancellation
|
|
242
496
|
|
|
243
|
-
Need to cancel a stream? llm.rb has you covered through [`LLM::Context#interrupt!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#interrupt-21-instance_method). <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
|
|
497
|
+
Need to cancel a stream? llm.rb has you covered through [`LLM::Context#interrupt!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#interrupt-21-instance_method). <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
244
498
|
|
|
245
499
|
```ruby
|
|
246
500
|
require "llm"
|
|
@@ -260,7 +514,7 @@ worker.join
|
|
|
260
514
|
|
|
261
515
|
#### Sequel (ORM)
|
|
262
516
|
|
|
263
|
-
The `plugin :llm` integration wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) on a `Sequel::Model` and keeps tool execution explicit. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
|
|
517
|
+
The `plugin :llm` integration wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) on a `Sequel::Model` and keeps tool execution explicit. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
264
518
|
|
|
265
519
|
```ruby
|
|
266
520
|
require "llm"
|
|
@@ -280,7 +534,7 @@ puts ctx.talk("What is my favorite language?").content
|
|
|
280
534
|
#### ActiveRecord (ORM): acts_as_llm
|
|
281
535
|
|
|
282
536
|
The `acts_as_llm` method wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) and
|
|
283
|
-
provides full control over tool execution. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
|
|
537
|
+
provides full control over tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
284
538
|
|
|
285
539
|
```ruby
|
|
286
540
|
require "llm"
|
|
@@ -300,7 +554,7 @@ puts ctx.talk("What is my favorite language?").content
|
|
|
300
554
|
#### ActiveRecord (ORM): acts_as_agent
|
|
301
555
|
|
|
302
556
|
The `acts_as_agent` method wraps [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) and
|
|
303
|
-
manages tool execution for you. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
|
|
557
|
+
manages tool execution for you. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
304
558
|
|
|
305
559
|
```ruby
|
|
306
560
|
require "llm"
|
|
@@ -309,12 +563,11 @@ require "active_record"
|
|
|
309
563
|
require "llm/active_record"
|
|
310
564
|
|
|
311
565
|
class Ticket < ApplicationRecord
|
|
312
|
-
acts_as_agent provider: :set_provider
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
end
|
|
566
|
+
acts_as_agent provider: :set_provider
|
|
567
|
+
model "gpt-5.4-mini"
|
|
568
|
+
instructions "You are a concise support assistant."
|
|
569
|
+
tools SearchDocs, Escalate
|
|
570
|
+
concurrency :thread
|
|
318
571
|
|
|
319
572
|
private
|
|
320
573
|
|
|
@@ -327,28 +580,9 @@ ticket = Ticket.create!(provider: "openai", model: "gpt-5.4-mini")
|
|
|
327
580
|
puts ticket.talk("How do I rotate my API key?").content
|
|
328
581
|
```
|
|
329
582
|
|
|
330
|
-
#### Agent
|
|
331
|
-
|
|
332
|
-
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
|
|
333
|
-
|
|
334
|
-
```ruby
|
|
335
|
-
require "llm"
|
|
336
|
-
|
|
337
|
-
class ShellAgent < LLM::Agent
|
|
338
|
-
model "gpt-5.4-mini"
|
|
339
|
-
instructions "You are a Linux system assistant."
|
|
340
|
-
tools Shell
|
|
341
|
-
concurrency :thread
|
|
342
|
-
end
|
|
343
|
-
|
|
344
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
345
|
-
agent = ShellAgent.new(llm)
|
|
346
|
-
puts agent.talk("What time is it on this system?").content
|
|
347
|
-
```
|
|
348
|
-
|
|
349
583
|
#### MCP
|
|
350
584
|
|
|
351
|
-
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools.
|
|
585
|
+
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
352
586
|
|
|
353
587
|
```ruby
|
|
354
588
|
require "llm"
|
|
@@ -379,8 +613,8 @@ how capable the runtime can be in a real application:
|
|
|
379
613
|
|
|
380
614
|
## Resources
|
|
381
615
|
|
|
382
|
-
- [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html)
|
|
383
|
-
examples guide.
|
|
616
|
+
- [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) and
|
|
617
|
+
[deepdive (markdown)](resources/deepdive.md) are the examples guide.
|
|
384
618
|
- [relay](https://github.com/llmrb/relay) shows a real application built on
|
|
385
619
|
top of llm.rb.
|
|
386
620
|
- [doc site](https://0x1eef.github.io/x/llm.rb?rebuild=1) has the API docs.
|
data/data/anthropic.json
CHANGED
|
@@ -213,7 +213,7 @@
|
|
|
213
213
|
"reasoning": true,
|
|
214
214
|
"tool_call": true,
|
|
215
215
|
"temperature": true,
|
|
216
|
-
"knowledge": "2025-08",
|
|
216
|
+
"knowledge": "2025-08-31",
|
|
217
217
|
"release_date": "2026-02-17",
|
|
218
218
|
"last_updated": "2026-03-13",
|
|
219
219
|
"modalities": {
|
|
@@ -271,6 +271,39 @@
|
|
|
271
271
|
"output": 32000
|
|
272
272
|
}
|
|
273
273
|
},
|
|
274
|
+
"claude-opus-4-7": {
|
|
275
|
+
"id": "claude-opus-4-7",
|
|
276
|
+
"name": "Claude Opus 4.7",
|
|
277
|
+
"family": "claude-opus",
|
|
278
|
+
"attachment": true,
|
|
279
|
+
"reasoning": true,
|
|
280
|
+
"tool_call": true,
|
|
281
|
+
"temperature": false,
|
|
282
|
+
"knowledge": "2026-01-31",
|
|
283
|
+
"release_date": "2026-04-16",
|
|
284
|
+
"last_updated": "2026-04-16",
|
|
285
|
+
"modalities": {
|
|
286
|
+
"input": [
|
|
287
|
+
"text",
|
|
288
|
+
"image",
|
|
289
|
+
"pdf"
|
|
290
|
+
],
|
|
291
|
+
"output": [
|
|
292
|
+
"text"
|
|
293
|
+
]
|
|
294
|
+
},
|
|
295
|
+
"open_weights": false,
|
|
296
|
+
"cost": {
|
|
297
|
+
"input": 5,
|
|
298
|
+
"output": 25,
|
|
299
|
+
"cache_read": 0.5,
|
|
300
|
+
"cache_write": 6.25
|
|
301
|
+
},
|
|
302
|
+
"limit": {
|
|
303
|
+
"context": 1000000,
|
|
304
|
+
"output": 128000
|
|
305
|
+
}
|
|
306
|
+
},
|
|
274
307
|
"claude-3-haiku-20240307": {
|
|
275
308
|
"id": "claude-3-haiku-20240307",
|
|
276
309
|
"name": "Claude Haiku 3",
|
|
@@ -609,7 +642,7 @@
|
|
|
609
642
|
"reasoning": true,
|
|
610
643
|
"tool_call": true,
|
|
611
644
|
"temperature": true,
|
|
612
|
-
"knowledge": "2025-05",
|
|
645
|
+
"knowledge": "2025-05-31",
|
|
613
646
|
"release_date": "2026-02-05",
|
|
614
647
|
"last_updated": "2026-03-13",
|
|
615
648
|
"modalities": {
|
data/data/google.json
CHANGED
|
@@ -594,7 +594,12 @@
|
|
|
594
594
|
"cost": {
|
|
595
595
|
"input": 1.25,
|
|
596
596
|
"output": 10,
|
|
597
|
-
"cache_read": 0.
|
|
597
|
+
"cache_read": 0.125,
|
|
598
|
+
"context_over_200k": {
|
|
599
|
+
"input": 2.5,
|
|
600
|
+
"output": 15,
|
|
601
|
+
"cache_read": 0.25
|
|
602
|
+
}
|
|
598
603
|
},
|
|
599
604
|
"limit": {
|
|
600
605
|
"context": 1048576,
|
|
@@ -824,7 +829,7 @@
|
|
|
824
829
|
"cost": {
|
|
825
830
|
"input": 0.3,
|
|
826
831
|
"output": 2.5,
|
|
827
|
-
"cache_read": 0.
|
|
832
|
+
"cache_read": 0.03,
|
|
828
833
|
"input_audio": 1
|
|
829
834
|
},
|
|
830
835
|
"limit": {
|
data/data/openai.json
CHANGED
|
@@ -1066,36 +1066,6 @@
|
|
|
1066
1066
|
"output": 100000
|
|
1067
1067
|
}
|
|
1068
1068
|
},
|
|
1069
|
-
"codex-mini-latest": {
|
|
1070
|
-
"id": "codex-mini-latest",
|
|
1071
|
-
"name": "Codex Mini",
|
|
1072
|
-
"family": "gpt-codex-mini",
|
|
1073
|
-
"attachment": true,
|
|
1074
|
-
"reasoning": true,
|
|
1075
|
-
"tool_call": true,
|
|
1076
|
-
"temperature": false,
|
|
1077
|
-
"knowledge": "2024-04",
|
|
1078
|
-
"release_date": "2025-05-16",
|
|
1079
|
-
"last_updated": "2025-05-16",
|
|
1080
|
-
"modalities": {
|
|
1081
|
-
"input": [
|
|
1082
|
-
"text"
|
|
1083
|
-
],
|
|
1084
|
-
"output": [
|
|
1085
|
-
"text"
|
|
1086
|
-
]
|
|
1087
|
-
},
|
|
1088
|
-
"open_weights": false,
|
|
1089
|
-
"cost": {
|
|
1090
|
-
"input": 1.5,
|
|
1091
|
-
"output": 6,
|
|
1092
|
-
"cache_read": 0.375
|
|
1093
|
-
},
|
|
1094
|
-
"limit": {
|
|
1095
|
-
"context": 200000,
|
|
1096
|
-
"output": 100000
|
|
1097
|
-
}
|
|
1098
|
-
},
|
|
1099
1069
|
"gpt-4": {
|
|
1100
1070
|
"id": "gpt-4",
|
|
1101
1071
|
"name": "GPT-4",
|