llm.rb 4.21.0 → 4.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +78 -0
- data/README.md +290 -59
- data/data/anthropic.json +35 -2
- data/data/google.json +7 -2
- data/data/openai.json +0 -30
- data/lib/llm/active_record/acts_as_agent.rb +11 -64
- data/lib/llm/active_record/acts_as_llm.rb +81 -61
- data/lib/llm/agent.rb +15 -3
- data/lib/llm/buffer.rb +10 -0
- data/lib/llm/compactor.rb +128 -0
- data/lib/llm/context.rb +31 -2
- data/lib/llm/function.rb +2 -1
- data/lib/llm/sequel/agent.rb +4 -17
- data/lib/llm/sequel/plugin.rb +82 -60
- data/lib/llm/skill.rb +29 -14
- data/lib/llm/stream.rb +20 -1
- data/lib/llm/tool.rb +14 -0
- data/lib/llm/version.rb +1 -1
- data/llm.gemspec +3 -0
- metadata +44 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 49ed8077a6283802d4141dcb9ec037c7fc46920ebd3273b30c55624b575f3156
|
|
4
|
+
data.tar.gz: e2289baf740ba9603ed1c308414e632ddda296356659c8714bf3a1744c216104
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b6b0d72baa785a6bf25cbfd3f2581d7f6a5850a0fa61dea29668596e19eb8a1142330f8acfea7f04a1bc76461c02c0af681588332d955aae2b5c6808f2fc0610
|
|
7
|
+
data.tar.gz: 836fc45489b9d86c7bde3ed2b94d2813be5bdaea1ebf7697f7e7eca5962f5374343e371188e40ced180ed50e053cd74ec8fcec8dea08c164291ee8577301f195
|
data/CHANGELOG.md
CHANGED
|
@@ -2,8 +2,86 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
Changes since `v4.23.0`.
|
|
6
|
+
|
|
7
|
+
## v4.23.0
|
|
8
|
+
|
|
9
|
+
Changes since `v4.22.0`.
|
|
10
|
+
|
|
11
|
+
This release expands llm.rb's runtime surface for long-lived contexts and
|
|
12
|
+
stateful tools. It adds built-in context compaction through `LLM::Compactor`,
|
|
13
|
+
lets explicit `tools:` arrays accept bound `LLM::Tool` instances, and fixes
|
|
14
|
+
OpenAI-compatible no-arg tool schemas for stricter providers such as xAI.
|
|
15
|
+
|
|
16
|
+
### Change
|
|
17
|
+
|
|
18
|
+
* **Add `LLM::Compactor` for long-lived contexts** <br>
|
|
19
|
+
Add built-in context compaction through `LLM::Compactor`, so older history
|
|
20
|
+
can be summarized, retained windows can stay bounded, compaction can run on
|
|
21
|
+
its own `model:`, and `LLM::Stream` can observe the lifecycle through
|
|
22
|
+
`on_compaction` and `on_compaction_finish`.
|
|
23
|
+
|
|
24
|
+
* **Allow bound tool instances in explicit tool lists** <br>
|
|
25
|
+
Let explicit `tools:` arrays accept `LLM::Tool` instances such as
|
|
26
|
+
`MyTool.new(foo: 1)`, so tools can carry bound state without changing the
|
|
27
|
+
global tool registry model.
|
|
28
|
+
|
|
29
|
+
### Fix
|
|
30
|
+
|
|
31
|
+
* **Fix xAI/OpenAI-compatible no-arg tool schemas** <br>
|
|
32
|
+
Send an empty object schema for tools without declared parameters instead
|
|
33
|
+
of `null`, so stricter providers such as xAI accept mixed tool sets that
|
|
34
|
+
include no-arg tools.
|
|
35
|
+
|
|
36
|
+
## v4.22.0
|
|
37
|
+
|
|
5
38
|
Changes since `v4.21.0`.
|
|
6
39
|
|
|
40
|
+
This release deepens the runtime shape of llm.rb. It reduces helper-method
|
|
41
|
+
surface on persisted ORM models, expands real ORM coverage, and makes skills
|
|
42
|
+
behave more like bounded sub-agents with inherited recent context and proper
|
|
43
|
+
instruction injection.
|
|
44
|
+
|
|
45
|
+
### Change
|
|
46
|
+
|
|
47
|
+
* **Reduce ActiveRecord wrapper model surface** <br>
|
|
48
|
+
Move helper methods such as option resolution, column mapping,
|
|
49
|
+
serialization, and persistence into `Utils` for the ActiveRecord
|
|
50
|
+
wrappers so wrapped models include fewer internal helper methods.
|
|
51
|
+
|
|
52
|
+
* **Reduce Sequel wrapper model surface** <br>
|
|
53
|
+
Move helper methods such as option resolution, column mapping,
|
|
54
|
+
serialization, and persistence into `Utils` for the Sequel wrappers
|
|
55
|
+
so wrapped models include fewer internal helper methods.
|
|
56
|
+
|
|
57
|
+
* **Expand ORM integration coverage** <br>
|
|
58
|
+
Add broader ActiveRecord and Sequel coverage for persisted context and
|
|
59
|
+
agent wrappers, including real SQLite-backed records and cassette-backed
|
|
60
|
+
OpenAI persistence paths.
|
|
61
|
+
|
|
62
|
+
* **Make skills inherit recent parent context** <br>
|
|
63
|
+
Run `LLM::Skill` with a curated slice of recent parent user and assistant
|
|
64
|
+
messages, prefixed with `Recent context:`, so skills behave more like
|
|
65
|
+
task-scoped sub-agents instead of instruction-only helpers.
|
|
66
|
+
|
|
67
|
+
### Fix
|
|
68
|
+
|
|
69
|
+
* **Fix Sequel `plugin :agent` load order** <br>
|
|
70
|
+
Require the shared Sequel plugin support from `LLM::Sequel::Agent` so
|
|
71
|
+
`plugin :agent` can load independently without raising
|
|
72
|
+
`uninitialized constant LLM::Sequel::Plugin`.
|
|
73
|
+
|
|
74
|
+
* **Make skill execution inherit parent context request settings** <br>
|
|
75
|
+
Run `LLM::Skill` through a parent `LLM::Context` instead of a bare
|
|
76
|
+
provider so nested skill agents inherit context-level settings such as
|
|
77
|
+
`mode: :responses`, `store: false`, streaming, and other request defaults,
|
|
78
|
+
while still keeping skill-local tools and avoiding parent schemas.
|
|
79
|
+
|
|
80
|
+
* **Keep agent instructions when history is preseeded** <br>
|
|
81
|
+
Inject `LLM::Agent` instructions once unless a system message is already
|
|
82
|
+
present, so agents and nested skills still get their instructions when
|
|
83
|
+
they start with inherited non-system context.
|
|
84
|
+
|
|
7
85
|
## v4.21.0
|
|
8
86
|
|
|
9
87
|
Changes since `v4.20.2`.
|
data/README.md
CHANGED
|
@@ -4,23 +4,14 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.23.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
11
11
|
|
|
12
|
-
llm.rb is
|
|
12
|
+
llm.rb is the most capable runtime for building AI systems in Ruby.
|
|
13
13
|
<br>
|
|
14
14
|
|
|
15
|
-
It is also the most capable AI Ruby runtime that exists _today_, and that claim is
|
|
16
|
-
backed up by research. Maybe it won't always be true, and that would be good news too -
|
|
17
|
-
because it would mean the Ruby ecosystem is getting stronger.
|
|
18
|
-
|
|
19
|
-
llm.rb is not just an API wrapper: it gives you one runtime for providers,
|
|
20
|
-
contexts, agents, tools, skills, MCP servers, streaming, schemas, files, and
|
|
21
|
-
persisted state, so real systems can be built out of one coherent execution
|
|
22
|
-
model instead of a pile of adapters.
|
|
23
|
-
|
|
24
15
|
llm.rb is designed for Ruby, and although it works great in Rails, it is not tightly
|
|
25
16
|
coupled to it. It runs on the standard library by default (zero dependencies),
|
|
26
17
|
loads optional pieces only when needed, includes built-in ActiveRecord support through
|
|
@@ -29,6 +20,10 @@ loads optional pieces only when needed, includes built-in ActiveRecord support t
|
|
|
29
20
|
long-lived, tool-capable, stateful AI workflows instead of just
|
|
30
21
|
request/response helpers.
|
|
31
22
|
|
|
23
|
+
It provides one runtime for providers, agents, tools, skills, MCP servers, streaming,
|
|
24
|
+
schemas, files, and persisted state, so real systems can be built out of one coherent
|
|
25
|
+
execution model instead of a pile of adapters.
|
|
26
|
+
|
|
32
27
|
Want to see some code? Jump to [the examples](#examples) section. <br>
|
|
33
28
|
Want a taste of what llm.rb can build? See [the screencast](#screencast).
|
|
34
29
|
|
|
@@ -53,6 +48,197 @@ It holds:
|
|
|
53
48
|
Instead of switching abstractions for each feature, everything builds on the
|
|
54
49
|
same context object.
|
|
55
50
|
|
|
51
|
+
## Standout features
|
|
52
|
+
|
|
53
|
+
The following list is **not exhaustive**, but it covers a lot of ground.
|
|
54
|
+
|
|
55
|
+
#### Skills
|
|
56
|
+
|
|
57
|
+
Skills are reusable, directory-backed capabilities loaded from `SKILL.md`.
|
|
58
|
+
They run through the same runtime as tools, agents, and MCP. They do not
|
|
59
|
+
require a second orchestration layer or a parallel abstraction. If you've
|
|
60
|
+
used Claude or Codex, you know the general idea of skills, and llm.rb
|
|
61
|
+
supports that same concept with the same execution model as the rest of the
|
|
62
|
+
system.
|
|
63
|
+
|
|
64
|
+
In llm.rb, a skill has frontmatter and instructions. The frontmatter can
|
|
65
|
+
define `name`, `description`, and `tools`. The `tools` entries are tool names,
|
|
66
|
+
and each name must resolve to a subclass of
|
|
67
|
+
[`LLM::Tool`](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) that is already
|
|
68
|
+
loaded in the runtime.
|
|
69
|
+
|
|
70
|
+
If you want Claude/Codex-like skills that can drive scripts or shell
|
|
71
|
+
commands, you would typically pair the skill with a tool that can execute
|
|
72
|
+
system commands.
|
|
73
|
+
|
|
74
|
+
```yaml
|
|
75
|
+
---
|
|
76
|
+
name: release
|
|
77
|
+
description: Prepare a release
|
|
78
|
+
tools:
|
|
79
|
+
- search_docs
|
|
80
|
+
- git
|
|
81
|
+
---
|
|
82
|
+
Review the release state, summarize what changed, and prepare the release.
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
class Agent < LLM::Agent
|
|
87
|
+
model "gpt-5.4-mini"
|
|
88
|
+
skills "./skills/release"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
92
|
+
Agent.new(llm, stream: $stdout).talk("Let's prepare the release!")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
#### ORM
|
|
96
|
+
|
|
97
|
+
Any ActiveRecord model or Sequel model can become an agent-capable model,
|
|
98
|
+
including existing business and domain models, without forcing you into a
|
|
99
|
+
separate agent table or a second persistence layer.
|
|
100
|
+
|
|
101
|
+
`acts_as_agent` extends a model with agent capabilities: the same runtime
|
|
102
|
+
surface as [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
103
|
+
because it actually wraps an `LLM::Agent`, plus persistence through a text,
|
|
104
|
+
JSON, or JSONB-backed column on the same table.
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
```ruby
|
|
108
|
+
class Ticket < ApplicationRecord
|
|
109
|
+
acts_as_agent provider: :set_provider
|
|
110
|
+
model "gpt-5.4-mini"
|
|
111
|
+
instructions "You are a support assistant."
|
|
112
|
+
|
|
113
|
+
private
|
|
114
|
+
|
|
115
|
+
def set_provider
|
|
116
|
+
{ key: ENV["#{provider.upcase}_SECRET"], persistent: true }
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
#### Agentic Patterns
|
|
122
|
+
|
|
123
|
+
llm.rb is especially strong when you want to build agentic systems in a Ruby
|
|
124
|
+
way. Agents can be ordinary application models with state, associations,
|
|
125
|
+
tools, skills, and persistence, which makes it much easier to build systems
|
|
126
|
+
where users have their own specialized agents instead of treating agents as
|
|
127
|
+
something outside the app.
|
|
128
|
+
|
|
129
|
+
That pattern works so well in llm.rb because
|
|
130
|
+
[`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
131
|
+
`acts_as_agent`, `plugin :agent`, skills, tools, and persisted runtime state
|
|
132
|
+
all fit the same execution model. The runtime stays small enough that the
|
|
133
|
+
main design work becomes application design, not orchestration glue.
|
|
134
|
+
|
|
135
|
+
For a concrete example, see
|
|
136
|
+
[How to build a platform of agents](https://0x1eef.github.io/posts/how-to-build-a-platform-of-agents).
|
|
137
|
+
|
|
138
|
+
#### Persistence
|
|
139
|
+
|
|
140
|
+
The same runtime can be serialized to disk, restored later, persisted in JSON
|
|
141
|
+
or JSONB-backed ORM columns, resumed across process boundaries, or shared
|
|
142
|
+
across long-lived workflows.
|
|
143
|
+
|
|
144
|
+
```ruby
|
|
145
|
+
ctx = LLM::Context.new(llm)
|
|
146
|
+
ctx.talk("Remember that my favorite language is Ruby.")
|
|
147
|
+
ctx.save(path: "context.json")
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
#### Context Compaction
|
|
151
|
+
|
|
152
|
+
Long-lived contexts can compact older history into a summary instead of
|
|
153
|
+
growing forever. Compaction is built into [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
154
|
+
through [`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html),
|
|
155
|
+
and when a stream is present it emits `on_compaction` and
|
|
156
|
+
`on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
157
|
+
The compactor can also use a different model from the main context, which is
|
|
158
|
+
useful when you want summarization to run on a cheaper or faster model.
|
|
159
|
+
|
|
160
|
+
```ruby
|
|
161
|
+
ctx = LLM::Context.new(
|
|
162
|
+
llm,
|
|
163
|
+
compactor: {
|
|
164
|
+
message_threshold: 200,
|
|
165
|
+
retention_window: 8,
|
|
166
|
+
model: "gpt-5.4-mini"
|
|
167
|
+
}
|
|
168
|
+
)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
#### LLM::Stream
|
|
172
|
+
|
|
173
|
+
`LLM::Stream` is not just for printing tokens. It supports `on_content`,
|
|
174
|
+
`on_reasoning_content`, `on_tool_call`, `on_tool_return`, `on_compaction`,
|
|
175
|
+
and `on_compaction_finish`, which means visible output, reasoning output, tool
|
|
176
|
+
execution, and context compaction can all be driven through the same
|
|
177
|
+
execution path.
|
|
178
|
+
|
|
179
|
+
```ruby
|
|
180
|
+
class Stream < LLM::Stream
|
|
181
|
+
def on_tool_call(tool, error)
|
|
182
|
+
queue << tool.spawn(:thread)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def on_tool_return(tool, result)
|
|
186
|
+
puts(result.value)
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
#### Concurrency
|
|
192
|
+
|
|
193
|
+
Tool execution can run sequentially with `:call` or concurrently through
|
|
194
|
+
`:thread`, `:task`, `:fiber`, and experimental `:ractor`, without rewriting
|
|
195
|
+
your tool layer.
|
|
196
|
+
|
|
197
|
+
```ruby
|
|
198
|
+
class Agent < LLM::Agent
|
|
199
|
+
model "gpt-5.4-mini"
|
|
200
|
+
tools FetchWeather, FetchNews, FetchStock
|
|
201
|
+
concurrency :thread
|
|
202
|
+
end
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
#### MCP
|
|
206
|
+
|
|
207
|
+
Remote MCP tools and prompts are not bolted on as a separate integration
|
|
208
|
+
stack. They adapt into the same tool and prompt path used by local tools,
|
|
209
|
+
skills, contexts, and agents.
|
|
210
|
+
|
|
211
|
+
```ruby
|
|
212
|
+
begin
|
|
213
|
+
mcp = LLM::MCP.http(url: "https://api.githubcopilot.com/mcp/").persistent
|
|
214
|
+
mcp.start
|
|
215
|
+
ctx = LLM::Context.new(llm, tools: mcp.tools)
|
|
216
|
+
ensure
|
|
217
|
+
mcp.stop
|
|
218
|
+
end
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
#### Cancellation
|
|
222
|
+
|
|
223
|
+
Cancellation is one of the harder problems to get right, and while llm.rb
|
|
224
|
+
makes it possible, it still requires careful engineering to use effectively.
|
|
225
|
+
The point though is that it is possible to stop in-flight provider work cleanly
|
|
226
|
+
through the same runtime, and the model used by llm.rb is directly inspired by
|
|
227
|
+
Go's context package. In fact, llm.rb is heavily inspired by Go but with a Ruby
|
|
228
|
+
twist.
|
|
229
|
+
|
|
230
|
+
```ruby
|
|
231
|
+
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
232
|
+
worker = Thread.new do
|
|
233
|
+
ctx.talk("Write a very long essay about network protocols.")
|
|
234
|
+
rescue LLM::Interrupt
|
|
235
|
+
puts "Request was interrupted!"
|
|
236
|
+
end
|
|
237
|
+
STDIN.getch
|
|
238
|
+
ctx.interrupt!
|
|
239
|
+
worker.join
|
|
240
|
+
```
|
|
241
|
+
|
|
56
242
|
## Differentiators
|
|
57
243
|
|
|
58
244
|
### Execution Model
|
|
@@ -137,11 +323,11 @@ same context object.
|
|
|
137
323
|
- **Tools are explicit** <br>
|
|
138
324
|
Run local tools, provider-native tools, and MCP tools through the same path
|
|
139
325
|
with fewer special cases.
|
|
140
|
-
- **Skills
|
|
326
|
+
- **Skills become bounded runtime capabilities** <br>
|
|
141
327
|
Point llm.rb at directories with a `SKILL.md`, resolve named tools through
|
|
142
|
-
the registry, and
|
|
143
|
-
|
|
144
|
-
|
|
328
|
+
the registry, and adapt each skill into its own callable capability through
|
|
329
|
+
the normal runtime. Unlike a generic skill-discovery tool, each skill runs
|
|
330
|
+
with its own bounded tool subset and behaves like a task-scoped sub-agent.
|
|
145
331
|
- **Providers are normalized, not flattened** <br>
|
|
146
332
|
Share one API surface across providers without losing access to provider-
|
|
147
333
|
specific capabilities where they matter.
|
|
@@ -173,24 +359,32 @@ same context object.
|
|
|
173
359
|
|
|
174
360
|
## Capabilities
|
|
175
361
|
|
|
362
|
+
Execution:
|
|
176
363
|
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
177
364
|
- **Context Serialization** — save and restore state across processes or time
|
|
178
365
|
- **Streaming** — visible output, reasoning output, tool-call events
|
|
179
366
|
- **Request Interruption** — stop in-flight provider work cleanly
|
|
367
|
+
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
368
|
+
|
|
369
|
+
Runtime Building Blocks:
|
|
180
370
|
- **Tool Calling** — class-based tools and closure-based functions
|
|
181
371
|
- **Run Tools While Streaming** — overlap model output with tool latency
|
|
182
|
-
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
183
372
|
- **Agents** — reusable assistants with tool auto-execution
|
|
184
373
|
- **Skills** — directory-backed capabilities loaded from `SKILL.md`
|
|
374
|
+
- **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
|
|
375
|
+
- **Context Compaction** — summarize older history in long-lived contexts
|
|
376
|
+
|
|
377
|
+
Data and Structure:
|
|
185
378
|
- **Structured Outputs** — JSON Schema-based responses
|
|
186
379
|
- **Responses API** — stateful response workflows where providers support them
|
|
187
|
-
- **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
|
|
188
380
|
- **Multimodal Inputs** — text, images, audio, documents, URLs
|
|
189
381
|
- **Audio** — speech generation, transcription, translation
|
|
190
382
|
- **Images** — generation and editing
|
|
191
383
|
- **Files API** — upload and reference files in prompts
|
|
192
384
|
- **Embeddings** — vector generation for search and RAG
|
|
193
385
|
- **Vector Stores** — retrieval workflows
|
|
386
|
+
|
|
387
|
+
Operations:
|
|
194
388
|
- **Cost Tracking** — local cost estimation without extra API calls
|
|
195
389
|
- **Observability** — tracing, logging, telemetry
|
|
196
390
|
- **Model Registry** — local metadata for capabilities, limits, pricing
|
|
@@ -221,6 +415,44 @@ loop do
|
|
|
221
415
|
end
|
|
222
416
|
```
|
|
223
417
|
|
|
418
|
+
#### Agent
|
|
419
|
+
|
|
420
|
+
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
421
|
+
|
|
422
|
+
```ruby
|
|
423
|
+
require "llm"
|
|
424
|
+
|
|
425
|
+
class ShellAgent < LLM::Agent
|
|
426
|
+
model "gpt-5.4-mini"
|
|
427
|
+
instructions "You are a Linux system assistant."
|
|
428
|
+
tools Shell
|
|
429
|
+
concurrency :thread
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
433
|
+
agent = ShellAgent.new(llm)
|
|
434
|
+
puts agent.talk("What time is it on this system?").content
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
#### Skills
|
|
438
|
+
|
|
439
|
+
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. In llm.rb, a skill is exposed as a tool in the runtime. When that tool is called, it spawns a sub-agent with relevant context plus the instructions and tool subset declared in its own `SKILL.md`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
440
|
+
|
|
441
|
+
Each skill runs only with the tools declared in its own frontmatter.
|
|
442
|
+
|
|
443
|
+
```ruby
|
|
444
|
+
require "llm"
|
|
445
|
+
|
|
446
|
+
class Agent < LLM::Agent
|
|
447
|
+
model "gpt-5.4-mini"
|
|
448
|
+
instructions "You are a concise release assistant."
|
|
449
|
+
skills "./skills/release", "./skills/review"
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
453
|
+
puts Agent.new(llm).talk("Use the review skill.").content
|
|
454
|
+
```
|
|
455
|
+
|
|
224
456
|
#### Streaming
|
|
225
457
|
|
|
226
458
|
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
@@ -255,6 +487,42 @@ ctx.talk("Run `date` and `uname -a`.")
|
|
|
255
487
|
ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
|
|
256
488
|
```
|
|
257
489
|
|
|
490
|
+
#### Context Compaction
|
|
491
|
+
|
|
492
|
+
This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
|
|
493
|
+
[`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html), and
|
|
494
|
+
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) together so
|
|
495
|
+
long-lived contexts can summarize older history and expose the lifecycle
|
|
496
|
+
through stream hooks. This approach is inspired by General Intelligence
|
|
497
|
+
Systems' [Brute](https://github.com/general-intelligence-systems/brute). The
|
|
498
|
+
compactor can also use its own `model:` if you want summarization to run on a
|
|
499
|
+
different model from the main context. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
500
|
+
|
|
501
|
+
```ruby
|
|
502
|
+
require "llm"
|
|
503
|
+
|
|
504
|
+
class Stream < LLM::Stream
|
|
505
|
+
def on_compaction(ctx, compactor)
|
|
506
|
+
puts "Compacting #{ctx.messages.size} messages..."
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
def on_compaction_finish(ctx, compactor)
|
|
510
|
+
puts "Compacted to #{ctx.messages.size} messages."
|
|
511
|
+
end
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
515
|
+
ctx = LLM::Context.new(
|
|
516
|
+
llm,
|
|
517
|
+
stream: Stream.new,
|
|
518
|
+
compactor: {
|
|
519
|
+
message_threshold: 200,
|
|
520
|
+
retention_window: 8,
|
|
521
|
+
model: "gpt-5.4-mini"
|
|
522
|
+
}
|
|
523
|
+
)
|
|
524
|
+
```
|
|
525
|
+
|
|
258
526
|
#### Reasoning
|
|
259
527
|
|
|
260
528
|
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with the OpenAI Responses API so reasoning output is streamed separately from visible assistant output. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
@@ -354,12 +622,11 @@ require "active_record"
|
|
|
354
622
|
require "llm/active_record"
|
|
355
623
|
|
|
356
624
|
class Ticket < ApplicationRecord
|
|
357
|
-
acts_as_agent provider: :set_provider
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
end
|
|
625
|
+
acts_as_agent provider: :set_provider
|
|
626
|
+
model "gpt-5.4-mini"
|
|
627
|
+
instructions "You are a concise support assistant."
|
|
628
|
+
tools SearchDocs, Escalate
|
|
629
|
+
concurrency :thread
|
|
363
630
|
|
|
364
631
|
private
|
|
365
632
|
|
|
@@ -372,42 +639,6 @@ ticket = Ticket.create!(provider: "openai", model: "gpt-5.4-mini")
|
|
|
372
639
|
puts ticket.talk("How do I rotate my API key?").content
|
|
373
640
|
```
|
|
374
641
|
|
|
375
|
-
#### Agent
|
|
376
|
-
|
|
377
|
-
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
378
|
-
|
|
379
|
-
```ruby
|
|
380
|
-
require "llm"
|
|
381
|
-
|
|
382
|
-
class ShellAgent < LLM::Agent
|
|
383
|
-
model "gpt-5.4-mini"
|
|
384
|
-
instructions "You are a Linux system assistant."
|
|
385
|
-
tools Shell
|
|
386
|
-
concurrency :thread
|
|
387
|
-
end
|
|
388
|
-
|
|
389
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
390
|
-
agent = ShellAgent.new(llm)
|
|
391
|
-
puts agent.talk("What time is it on this system?").content
|
|
392
|
-
```
|
|
393
|
-
|
|
394
|
-
#### Skills
|
|
395
|
-
|
|
396
|
-
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. If you have used skills in Claude or Codex, this is the same kind of building block. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
397
|
-
|
|
398
|
-
```ruby
|
|
399
|
-
require "llm"
|
|
400
|
-
|
|
401
|
-
class Agent < LLM::Agent
|
|
402
|
-
model "gpt-5.4-mini"
|
|
403
|
-
instructions "You are a concise release assistant."
|
|
404
|
-
skills "./skills/release", "./skills/review"
|
|
405
|
-
end
|
|
406
|
-
|
|
407
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
408
|
-
puts Agent.new(llm).talk("Use the review skill.").content
|
|
409
|
-
```
|
|
410
|
-
|
|
411
642
|
#### MCP
|
|
412
643
|
|
|
413
644
|
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
data/data/anthropic.json
CHANGED
|
@@ -213,7 +213,7 @@
|
|
|
213
213
|
"reasoning": true,
|
|
214
214
|
"tool_call": true,
|
|
215
215
|
"temperature": true,
|
|
216
|
-
"knowledge": "2025-08",
|
|
216
|
+
"knowledge": "2025-08-31",
|
|
217
217
|
"release_date": "2026-02-17",
|
|
218
218
|
"last_updated": "2026-03-13",
|
|
219
219
|
"modalities": {
|
|
@@ -271,6 +271,39 @@
|
|
|
271
271
|
"output": 32000
|
|
272
272
|
}
|
|
273
273
|
},
|
|
274
|
+
"claude-opus-4-7": {
|
|
275
|
+
"id": "claude-opus-4-7",
|
|
276
|
+
"name": "Claude Opus 4.7",
|
|
277
|
+
"family": "claude-opus",
|
|
278
|
+
"attachment": true,
|
|
279
|
+
"reasoning": true,
|
|
280
|
+
"tool_call": true,
|
|
281
|
+
"temperature": false,
|
|
282
|
+
"knowledge": "2026-01-31",
|
|
283
|
+
"release_date": "2026-04-16",
|
|
284
|
+
"last_updated": "2026-04-16",
|
|
285
|
+
"modalities": {
|
|
286
|
+
"input": [
|
|
287
|
+
"text",
|
|
288
|
+
"image",
|
|
289
|
+
"pdf"
|
|
290
|
+
],
|
|
291
|
+
"output": [
|
|
292
|
+
"text"
|
|
293
|
+
]
|
|
294
|
+
},
|
|
295
|
+
"open_weights": false,
|
|
296
|
+
"cost": {
|
|
297
|
+
"input": 5,
|
|
298
|
+
"output": 25,
|
|
299
|
+
"cache_read": 0.5,
|
|
300
|
+
"cache_write": 6.25
|
|
301
|
+
},
|
|
302
|
+
"limit": {
|
|
303
|
+
"context": 1000000,
|
|
304
|
+
"output": 128000
|
|
305
|
+
}
|
|
306
|
+
},
|
|
274
307
|
"claude-3-haiku-20240307": {
|
|
275
308
|
"id": "claude-3-haiku-20240307",
|
|
276
309
|
"name": "Claude Haiku 3",
|
|
@@ -609,7 +642,7 @@
|
|
|
609
642
|
"reasoning": true,
|
|
610
643
|
"tool_call": true,
|
|
611
644
|
"temperature": true,
|
|
612
|
-
"knowledge": "2025-05",
|
|
645
|
+
"knowledge": "2025-05-31",
|
|
613
646
|
"release_date": "2026-02-05",
|
|
614
647
|
"last_updated": "2026-03-13",
|
|
615
648
|
"modalities": {
|
data/data/google.json
CHANGED
|
@@ -594,7 +594,12 @@
|
|
|
594
594
|
"cost": {
|
|
595
595
|
"input": 1.25,
|
|
596
596
|
"output": 10,
|
|
597
|
-
"cache_read": 0.
|
|
597
|
+
"cache_read": 0.125,
|
|
598
|
+
"context_over_200k": {
|
|
599
|
+
"input": 2.5,
|
|
600
|
+
"output": 15,
|
|
601
|
+
"cache_read": 0.25
|
|
602
|
+
}
|
|
598
603
|
},
|
|
599
604
|
"limit": {
|
|
600
605
|
"context": 1048576,
|
|
@@ -824,7 +829,7 @@
|
|
|
824
829
|
"cost": {
|
|
825
830
|
"input": 0.3,
|
|
826
831
|
"output": 2.5,
|
|
827
|
-
"cache_read": 0.
|
|
832
|
+
"cache_read": 0.03,
|
|
828
833
|
"input_audio": 1
|
|
829
834
|
},
|
|
830
835
|
"limit": {
|
data/data/openai.json
CHANGED
|
@@ -1066,36 +1066,6 @@
|
|
|
1066
1066
|
"output": 100000
|
|
1067
1067
|
}
|
|
1068
1068
|
},
|
|
1069
|
-
"codex-mini-latest": {
|
|
1070
|
-
"id": "codex-mini-latest",
|
|
1071
|
-
"name": "Codex Mini",
|
|
1072
|
-
"family": "gpt-codex-mini",
|
|
1073
|
-
"attachment": true,
|
|
1074
|
-
"reasoning": true,
|
|
1075
|
-
"tool_call": true,
|
|
1076
|
-
"temperature": false,
|
|
1077
|
-
"knowledge": "2024-04",
|
|
1078
|
-
"release_date": "2025-05-16",
|
|
1079
|
-
"last_updated": "2025-05-16",
|
|
1080
|
-
"modalities": {
|
|
1081
|
-
"input": [
|
|
1082
|
-
"text"
|
|
1083
|
-
],
|
|
1084
|
-
"output": [
|
|
1085
|
-
"text"
|
|
1086
|
-
]
|
|
1087
|
-
},
|
|
1088
|
-
"open_weights": false,
|
|
1089
|
-
"cost": {
|
|
1090
|
-
"input": 1.5,
|
|
1091
|
-
"output": 6,
|
|
1092
|
-
"cache_read": 0.375
|
|
1093
|
-
},
|
|
1094
|
-
"limit": {
|
|
1095
|
-
"context": 200000,
|
|
1096
|
-
"output": 100000
|
|
1097
|
-
}
|
|
1098
|
-
},
|
|
1099
1069
|
"gpt-4": {
|
|
1100
1070
|
"id": "gpt-4",
|
|
1101
1071
|
"name": "GPT-4",
|