llm.rb 4.20.2 → 4.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a182d595ad65c1cb2f1a796b83e48cba4f1038031ec140709e902734051a8b46
4
- data.tar.gz: b8cdb2e051bc620f111a97236bd64fe7940ff9f3d5b44c9f07b115641d74abcd
3
+ metadata.gz: 96698cb3af793b0bd83cae7635279cefbff24f86b11f59c9209edd76f76b757c
4
+ data.tar.gz: 389e4372ab3b4a2e90020e6e2e838b5a36516d5a5dd82a71243975dfe6f8f959
5
5
  SHA512:
6
- metadata.gz: a6fd61aaa9479ec34af93a1e732acf553a055e36a4f5e822a2c643ef2bf537923a7d0a968b40c6a8cfa9a09af8186ba31467fe627462da49389f1c6594d7ee41
7
- data.tar.gz: df56a4624eca8f7007ea2054d79812df553df69d867297230c9b38368c87e67c06187dbf03195b5fcaae1b1701b82a79cd7be10ed86364a49802573367910d10
6
+ metadata.gz: 6bd4fa02802333bbb925db2e513913bd1669e8a4d7c85d8cb76b88399e9b0e84bfd5ddf922c7816a2afd0c0d76d6a9f8c873702c789665dfe3205ada01d34203
7
+ data.tar.gz: 0d579386ead2158a4e7ad4991ff0c025758ac51624947d07e5d112779d46cb36bcabdd492ac20bbabc981b3e75e25300d04ba8b86808e4825b5c66e2186e52ae
data/CHANGELOG.md CHANGED
@@ -2,8 +2,78 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ Changes since `v4.22.0`.
6
+
7
+ ## v4.22.0
8
+
9
+ Changes since `v4.21.0`.
10
+
11
+ This release deepens the runtime shape of llm.rb. It reduces helper-method
12
+ surface on persisted ORM models, expands real ORM coverage, and makes skills
13
+ behave more like bounded sub-agents with inherited recent context and proper
14
+ instruction injection.
15
+
16
+ ### Change
17
+
18
+ * **Reduce ActiveRecord wrapper model surface** <br>
19
+ Move helper methods such as option resolution, column mapping,
20
+ serialization, and persistence into `Utils` for the ActiveRecord
21
+ wrappers so wrapped models include fewer internal helper methods.
22
+
23
+ * **Reduce Sequel wrapper model surface** <br>
24
+ Move helper methods such as option resolution, column mapping,
25
+ serialization, and persistence into `Utils` for the Sequel wrappers
26
+ so wrapped models include fewer internal helper methods.
27
+
28
+ * **Expand ORM integration coverage** <br>
29
+ Add broader ActiveRecord and Sequel coverage for persisted context and
30
+ agent wrappers, including real SQLite-backed records and cassette-backed
31
+ OpenAI persistence paths.
32
+
33
+ * **Make skills inherit recent parent context** <br>
34
+ Run `LLM::Skill` with a curated slice of recent parent user and assistant
35
+ messages, prefixed with `Recent context:`, so skills behave more like
36
+ task-scoped sub-agents instead of instruction-only helpers.
37
+
38
+ ### Fix
39
+
40
+ * **Fix Sequel `plugin :agent` load order** <br>
41
+ Require the shared Sequel plugin support from `LLM::Sequel::Agent` so
42
+ `plugin :agent` can load independently without raising
43
+ `uninitialized constant LLM::Sequel::Plugin`.
44
+
45
+ * **Make skill execution inherit parent context request settings** <br>
46
+ Run `LLM::Skill` through a parent `LLM::Context` instead of a bare
47
+ provider so nested skill agents inherit context-level settings such as
48
+ `mode: :responses`, `store: false`, streaming, and other request defaults,
49
+ while still keeping skill-local tools and avoiding parent schemas.
50
+
51
+ * **Keep agent instructions when history is preseeded** <br>
52
+ Inject `LLM::Agent` instructions once unless a system message is already
53
+ present, so agents and nested skills still get their instructions when
54
+ they start with inherited non-system context.
55
+
56
+ ## v4.21.0
57
+
5
58
  Changes since `v4.20.2`.
6
59
 
60
+ This release expands higher-level composition in llm.rb. It adds Sequel agent
61
+ persistence through `plugin :agent` and introduces directory-backed skills
62
+ that load from `SKILL.md`, resolve named tools, and plug directly into
63
+ `LLM::Context` and `LLM::Agent`.
64
+
65
+ ### Change
66
+
67
+ * **Add `plugin :agent` for Sequel models** <br>
68
+ Add Sequel support for `plugin :agent`, similar to ActiveRecord's
69
+ `acts_as_agent`, so models can wrap `LLM::Agent` with built-in
70
+ persistence.
71
+
72
+ * **Load directory-backed skills through `LLM::Context` and `LLM::Agent`** <br>
73
+ Add `skills:` to `LLM::Context` and `skills ...` to `LLM::Agent` so
74
+ directories with `SKILL.md` can be loaded, resolved into tools, and run
75
+ through the normal llm.rb tool path.
76
+
7
77
  ## v4.20.2
8
78
 
9
79
  Changes since `v4.20.1`.
data/README.md CHANGED
@@ -4,25 +4,26 @@
4
4
  <p align="center">
5
5
  <a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
6
6
  <a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
7
- <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.20.2-green.svg?" alt="Version"></a>
7
+ <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.21.0-green.svg?" alt="Version"></a>
8
8
  </p>
9
9
 
10
10
  ## About
11
11
 
12
- llm.rb is a lightweight runtime for building capable AI systems in Ruby.
13
-
14
- It is not just an API wrapper. llm.rb gives you one runtime for providers,
15
- contexts, agents, tools, MCP servers, streaming, schemas, files, and persisted
16
- state, so real systems can be built out of one coherent execution model instead
17
- of a pile of adapters.
12
+ llm.rb is the most capable runtime for building AI systems in Ruby.
13
+ <br>
18
14
 
19
- It stays close to Ruby, runs on the standard library by default, loads optional
20
- pieces only when needed, includes built-in ActiveRecord support through
15
+ llm.rb is designed for Ruby, and although it works great in Rails, it is not tightly
16
+ coupled to it. It runs on the standard library by default (zero dependencies),
17
+ loads optional pieces only when needed, includes built-in ActiveRecord support through
21
18
  `acts_as_llm` and `acts_as_agent`, includes built-in Sequel support through
22
- `plugin :llm`, and is designed for engineers who want control over
19
+ `plugin :llm` and `plugin :agent`, and is designed for engineers who want control over
23
20
  long-lived, tool-capable, stateful AI workflows instead of just
24
21
  request/response helpers.
25
22
 
23
+ It provides one runtime for providers, agents, tools, skills, MCP servers, streaming,
24
+ schemas, files, and persisted state, so real systems can be built out of one coherent
25
+ execution model instead of a pile of adapters.
26
+
26
27
  Want to see some code? Jump to [the examples](#examples) section. <br>
27
28
  Want a taste of what llm.rb can build? See [the screencast](#screencast).
28
29
 
@@ -47,6 +48,175 @@ It holds:
47
48
  Instead of switching abstractions for each feature, everything builds on the
48
49
  same context object.
49
50
 
51
+ ## Standout features
52
+
53
+ The following list is **not exhaustive**, but it covers a lot of ground.
54
+
55
+ #### Skills
56
+
57
+ Skills are reusable, directory-backed capabilities loaded from `SKILL.md`.
58
+ They run through the same runtime as tools, agents, and MCP. They do not
59
+ require a second orchestration layer or a parallel abstraction. If you've
60
+ used Claude or Codex, you know the general idea of skills, and llm.rb
61
+ supports that same concept with the same execution model as the rest of the
62
+ system.
63
+
64
+ In llm.rb, a skill has frontmatter and instructions. The frontmatter can
65
+ define `name`, `description`, and `tools`. The `tools` entries are tool names,
66
+ and each name must resolve to a subclass of
67
+ [`LLM::Tool`](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) that is already
68
+ loaded in the runtime.
69
+
70
+ If you want Claude/Codex-like skills that can drive scripts or shell
71
+ commands, you would typically pair the skill with a tool that can execute
72
+ system commands.
73
+
74
+ ```yaml
75
+ ---
76
+ name: release
77
+ description: Prepare a release
78
+ tools:
79
+ - search_docs
80
+ - git
81
+ ---
82
+ Review the release state, summarize what changed, and prepare the release.
83
+ ```
84
+
85
+ ```ruby
86
+ class Agent < LLM::Agent
87
+ model "gpt-5.4-mini"
88
+ skills "./skills/release"
89
+ end
90
+
91
+ llm = LLM.openai(key: ENV["KEY"])
92
+ Agent.new(llm, stream: $stdout).talk("Let's prepare the release!")
93
+ ```
94
+
95
+ #### ORM
96
+
97
+ Any ActiveRecord model or Sequel model can become an agent-capable model,
98
+ including existing business and domain models, without forcing you into a
99
+ separate agent table or a second persistence layer.
100
+
101
+ `acts_as_agent` extends a model with agent capabilities: the same runtime
102
+ surface as [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
103
+ because it actually wraps an `LLM::Agent`, plus persistence through a text,
104
+ JSON, or JSONB-backed column on the same table.
105
+
106
+
107
+ ```ruby
108
+ class Ticket < ApplicationRecord
109
+ acts_as_agent provider: :set_provider
110
+ model "gpt-5.4-mini"
111
+ instructions "You are a support assistant."
112
+
113
+ private
114
+
115
+ def set_provider
116
+ { key: ENV["#{provider.upcase}_SECRET"], persistent: true }
117
+ end
118
+ end
119
+ ```
120
+
121
+ #### Agentic Patterns
122
+
123
+ llm.rb is especially strong when you want to build agentic systems in a Ruby
124
+ way. Agents can be ordinary application models with state, associations,
125
+ tools, skills, and persistence, which makes it much easier to build systems
126
+ where users have their own specialized agents instead of treating agents as
127
+ something outside the app.
128
+
129
+ That pattern works so well in llm.rb because
130
+ [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
131
+ `acts_as_agent`, `plugin :agent`, skills, tools, and persisted runtime state
132
+ all fit the same execution model. The runtime stays small enough that the
133
+ main design work becomes application design, not orchestration glue.
134
+
135
+ For a concrete example, see
136
+ [How to build a platform of agents](https://0x1eef.github.io/posts/how-to-build-a-platform-of-agents).
137
+
138
+ #### Persistence
139
+
140
+ The same runtime can be serialized to disk, restored later, persisted in JSON
141
+ or JSONB-backed ORM columns, resumed across process boundaries, or shared
142
+ across long-lived workflows.
143
+
144
+ ```ruby
145
+ ctx = LLM::Context.new(llm)
146
+ ctx.talk("Remember that my favorite language is Ruby.")
147
+ ctx.save(path: "context.json")
148
+ ```
149
+
150
+ #### LLM::Stream
151
+
152
+ `LLM::Stream` is not just for printing tokens. It supports `on_content`,
153
+ `on_reasoning_content`, `on_tool_call`, and `on_tool_return`, which means
154
+ visible output, reasoning output, and tool execution can all be driven through
155
+ the same execution path.
156
+
157
+ ```ruby
158
+ class Stream < LLM::Stream
159
+ def on_tool_call(tool, error)
160
+ queue << tool.spawn(:thread)
161
+ end
162
+
163
+ def on_tool_return(tool, result)
164
+ puts(result.value)
165
+ end
166
+ end
167
+ ```
168
+
169
+ #### Concurrency
170
+
171
+ Tool execution can run sequentially with `:call` or concurrently through
172
+ `:thread`, `:task`, `:fiber`, and experimental `:ractor`, without rewriting
173
+ your tool layer.
174
+
175
+ ```ruby
176
+ class Agent < LLM::Agent
177
+ model "gpt-5.4-mini"
178
+ tools FetchWeather, FetchNews, FetchStock
179
+ concurrency :thread
180
+ end
181
+ ```
182
+
183
+ #### MCP
184
+
185
+ Remote MCP tools and prompts are not bolted on as a separate integration
186
+ stack. They adapt into the same tool and prompt path used by local tools,
187
+ skills, contexts, and agents.
188
+
189
+ ```ruby
190
+ begin
191
+ mcp = LLM::MCP.http(url: "https://api.githubcopilot.com/mcp/").persistent
192
+ mcp.start
193
+ ctx = LLM::Context.new(llm, tools: mcp.tools)
194
+ ensure
195
+ mcp.stop
196
+ end
197
+ ```
198
+
199
+ #### Cancellation
200
+
201
+ Cancellation is one of the harder problems to get right, and while llm.rb
202
+ makes it possible, it still requires careful engineering to use effectively.
203
+ The point though is that it is possible to stop in-flight provider work cleanly
204
+ through the same runtime, and the model used by llm.rb is directly inspired by
205
+ Go's context package. In fact, llm.rb is heavily inspired by Go but with a Ruby
206
+ twist.
207
+
208
+ ```ruby
209
+ ctx = LLM::Context.new(llm, stream: $stdout)
210
+ worker = Thread.new do
211
+ ctx.talk("Write a very long essay about network protocols.")
212
+ rescue LLM::Interrupt
213
+ puts "Request was interrupted!"
214
+ end
215
+ STDIN.getch
216
+ ctx.interrupt!
217
+ worker.join
218
+ ```
219
+
50
220
  ## Differentiators
51
221
 
52
222
  ### Execution Model
@@ -101,13 +271,18 @@ same context object.
101
271
  integration stack.
102
272
  - **ActiveRecord and Sequel persistence are built in** <br>
103
273
  llm.rb includes built-in ActiveRecord support through `acts_as_llm` and
104
- `acts_as_agent`, plus built-in Sequel support through `plugin :llm`.
274
+ `acts_as_agent`, plus built-in Sequel support through `plugin :llm` and
275
+ `plugin :agent`.
105
276
  Use `acts_as_llm` when you want to wrap `LLM::Context`, `acts_as_agent`
106
- when you want to wrap `LLM::Agent`, or `plugin :llm` on Sequel models to
107
- persist `LLM::Context` state with sensible default columns. These
108
- integrations support `provider:` and `context:` hooks, plus `format:
109
- :string` for text columns or `format: :jsonb` for native PostgreSQL JSON
110
- storage when ORM JSON typecasting support is enabled.
277
+ when you want to wrap `LLM::Agent`, `plugin :llm` when you want a
278
+ `LLM::Context` on a Sequel model, or `plugin :agent` when you want an
279
+ `LLM::Agent`. These integrations support `provider:` and `context:` hooks,
280
+ plus `format: :string` for text columns or `format: :jsonb` for native
281
+ PostgreSQL JSON storage when ORM JSON typecasting support is enabled.
282
+ - **ORM models can become persistent agents** <br>
283
+ Turn an ActiveRecord or Sequel model into an agent-capable model with
284
+ built-in persistence, stored on the same table, with `jsonb` support when
285
+ your ORM and database support native JSON columns.
111
286
  - **Persistent HTTP pooling is shared process-wide** <br>
112
287
  When enabled, separate
113
288
  [`LLM::Provider`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
@@ -126,6 +301,11 @@ same context object.
126
301
  - **Tools are explicit** <br>
127
302
  Run local tools, provider-native tools, and MCP tools through the same path
128
303
  with fewer special cases.
304
+ - **Skills become bounded runtime capabilities** <br>
305
+ Point llm.rb at directories with a `SKILL.md`, resolve named tools through
306
+ the registry, and adapt each skill into its own callable capability through
307
+ the normal runtime. Unlike a generic skill-discovery tool, each skill runs
308
+ with its own bounded tool subset and behaves like a task-scoped sub-agent.
129
309
  - **Providers are normalized, not flattened** <br>
130
310
  Share one API surface across providers without losing access to provider-
131
311
  specific capabilities where they matter.
@@ -157,23 +337,31 @@ same context object.
157
337
 
158
338
  ## Capabilities
159
339
 
340
+ Execution:
160
341
  - **Chat & Contexts** — stateless and stateful interactions with persistence
161
342
  - **Context Serialization** — save and restore state across processes or time
162
343
  - **Streaming** — visible output, reasoning output, tool-call events
163
344
  - **Request Interruption** — stop in-flight provider work cleanly
345
+ - **Concurrent Execution** — threads, async tasks, and fibers
346
+
347
+ Runtime Building Blocks:
164
348
  - **Tool Calling** — class-based tools and closure-based functions
165
349
  - **Run Tools While Streaming** — overlap model output with tool latency
166
- - **Concurrent Execution** — threads, async tasks, and fibers
167
350
  - **Agents** — reusable assistants with tool auto-execution
351
+ - **Skills** — directory-backed capabilities loaded from `SKILL.md`
352
+ - **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
353
+
354
+ Data and Structure:
168
355
  - **Structured Outputs** — JSON Schema-based responses
169
356
  - **Responses API** — stateful response workflows where providers support them
170
- - **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
171
357
  - **Multimodal Inputs** — text, images, audio, documents, URLs
172
358
  - **Audio** — speech generation, transcription, translation
173
359
  - **Images** — generation and editing
174
360
  - **Files API** — upload and reference files in prompts
175
361
  - **Embeddings** — vector generation for search and RAG
176
362
  - **Vector Stores** — retrieval workflows
363
+
364
+ Operations:
177
365
  - **Cost Tracking** — local cost estimation without extra API calls
178
366
  - **Observability** — tracing, logging, telemetry
179
367
  - **Model Registry** — local metadata for capabilities, limits, pricing
@@ -189,7 +377,7 @@ gem install llm.rb
189
377
 
190
378
  #### REPL
191
379
 
192
- This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) directly for an interactive REPL. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
380
+ This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) directly for an interactive REPL. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
193
381
 
194
382
  ```ruby
195
383
  require "llm"
@@ -204,9 +392,47 @@ loop do
204
392
  end
205
393
  ```
206
394
 
395
+ #### Agent
396
+
397
+ This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
398
+
399
+ ```ruby
400
+ require "llm"
401
+
402
+ class ShellAgent < LLM::Agent
403
+ model "gpt-5.4-mini"
404
+ instructions "You are a Linux system assistant."
405
+ tools Shell
406
+ concurrency :thread
407
+ end
408
+
409
+ llm = LLM.openai(key: ENV["KEY"])
410
+ agent = ShellAgent.new(llm)
411
+ puts agent.talk("What time is it on this system?").content
412
+ ```
413
+
414
+ #### Skills
415
+
416
+ This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. In llm.rb, a skill is exposed as a tool in the runtime. When that tool is called, it spawns a sub-agent with relevant context plus the instructions and tool subset declared in its own `SKILL.md`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
417
+
418
+ Each skill runs only with the tools declared in its own frontmatter.
419
+
420
+ ```ruby
421
+ require "llm"
422
+
423
+ class Agent < LLM::Agent
424
+ model "gpt-5.4-mini"
425
+ instructions "You are a concise release assistant."
426
+ skills "./skills/release", "./skills/review"
427
+ end
428
+
429
+ llm = LLM.openai(key: ENV["KEY"])
430
+ puts Agent.new(llm).talk("Use the review skill.").content
431
+ ```
432
+
207
433
  #### Streaming
208
434
 
209
- This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
435
+ This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
210
436
 
211
437
  ```ruby
212
438
  require "llm"
@@ -238,9 +464,37 @@ ctx.talk("Run `date` and `uname -a`.")
238
464
  ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
239
465
  ```
240
466
 
467
+ #### Reasoning
468
+
469
+ This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with the OpenAI Responses API so reasoning output is streamed separately from visible assistant output. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
470
+
471
+ ```ruby
472
+ require "llm"
473
+
474
+ class Stream < LLM::Stream
475
+ def on_content(content)
476
+ $stdout << content
477
+ end
478
+
479
+ def on_reasoning_content(content)
480
+ $stderr << content
481
+ end
482
+ end
483
+
484
+ llm = LLM.openai(key: ENV["KEY"])
485
+ ctx = LLM::Context.new(
486
+ llm,
487
+ model: "gpt-5.4-mini",
488
+ mode: :responses,
489
+ reasoning: {effort: "medium"},
490
+ stream: Stream.new
491
+ )
492
+ ctx.talk("Solve 17 * 19 and show your work.")
493
+ ```
494
+
241
495
  #### Request Cancellation
242
496
 
243
- Need to cancel a stream? llm.rb has you covered through [`LLM::Context#interrupt!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#interrupt-21-instance_method). <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
497
+ Need to cancel a stream? llm.rb has you covered through [`LLM::Context#interrupt!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#interrupt-21-instance_method). <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
244
498
 
245
499
  ```ruby
246
500
  require "llm"
@@ -260,7 +514,7 @@ worker.join
260
514
 
261
515
  #### Sequel (ORM)
262
516
 
263
- The `plugin :llm` integration wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) on a `Sequel::Model` and keeps tool execution explicit. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
517
+ The `plugin :llm` integration wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) on a `Sequel::Model` and keeps tool execution explicit. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
264
518
 
265
519
  ```ruby
266
520
  require "llm"
@@ -280,7 +534,7 @@ puts ctx.talk("What is my favorite language?").content
280
534
  #### ActiveRecord (ORM): acts_as_llm
281
535
 
282
536
  The `acts_as_llm` method wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) and
283
- provides full control over tool execution. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
537
+ provides full control over tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
284
538
 
285
539
  ```ruby
286
540
  require "llm"
@@ -300,7 +554,7 @@ puts ctx.talk("What is my favorite language?").content
300
554
  #### ActiveRecord (ORM): acts_as_agent
301
555
 
302
556
  The `acts_as_agent` method wraps [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) and
303
- manages tool execution for you. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
557
+ manages tool execution for you. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
304
558
 
305
559
  ```ruby
306
560
  require "llm"
@@ -309,12 +563,11 @@ require "active_record"
309
563
  require "llm/active_record"
310
564
 
311
565
  class Ticket < ApplicationRecord
312
- acts_as_agent provider: :set_provider do
313
- model "gpt-5.4-mini"
314
- instructions "You are a concise support assistant."
315
- tools SearchDocs, Escalate
316
- concurrency :thread
317
- end
566
+ acts_as_agent provider: :set_provider
567
+ model "gpt-5.4-mini"
568
+ instructions "You are a concise support assistant."
569
+ tools SearchDocs, Escalate
570
+ concurrency :thread
318
571
 
319
572
  private
320
573
 
@@ -327,28 +580,9 @@ ticket = Ticket.create!(provider: "openai", model: "gpt-5.4-mini")
327
580
  puts ticket.talk("How do I rotate my API key?").content
328
581
  ```
329
582
 
330
- #### Agent
331
-
332
- This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
333
-
334
- ```ruby
335
- require "llm"
336
-
337
- class ShellAgent < LLM::Agent
338
- model "gpt-5.4-mini"
339
- instructions "You are a Linux system assistant."
340
- tools Shell
341
- concurrency :thread
342
- end
343
-
344
- llm = LLM.openai(key: ENV["KEY"])
345
- agent = ShellAgent.new(llm)
346
- puts agent.talk("What time is it on this system?").content
347
- ```
348
-
349
583
  #### MCP
350
584
 
351
- This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools. <br> See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
585
+ This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
352
586
 
353
587
  ```ruby
354
588
  require "llm"
@@ -379,8 +613,8 @@ how capable the runtime can be in a real application:
379
613
 
380
614
  ## Resources
381
615
 
382
- - [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) is the
383
- examples guide.
616
+ - [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) and
617
+ [deepdive (markdown)](resources/deepdive.md) are the examples guide.
384
618
  - [relay](https://github.com/llmrb/relay) shows a real application built on
385
619
  top of llm.rb.
386
620
  - [doc site](https://0x1eef.github.io/x/llm.rb?rebuild=1) has the API docs.
data/data/anthropic.json CHANGED
@@ -213,7 +213,7 @@
213
213
  "reasoning": true,
214
214
  "tool_call": true,
215
215
  "temperature": true,
216
- "knowledge": "2025-08",
216
+ "knowledge": "2025-08-31",
217
217
  "release_date": "2026-02-17",
218
218
  "last_updated": "2026-03-13",
219
219
  "modalities": {
@@ -271,6 +271,39 @@
271
271
  "output": 32000
272
272
  }
273
273
  },
274
+ "claude-opus-4-7": {
275
+ "id": "claude-opus-4-7",
276
+ "name": "Claude Opus 4.7",
277
+ "family": "claude-opus",
278
+ "attachment": true,
279
+ "reasoning": true,
280
+ "tool_call": true,
281
+ "temperature": false,
282
+ "knowledge": "2026-01-31",
283
+ "release_date": "2026-04-16",
284
+ "last_updated": "2026-04-16",
285
+ "modalities": {
286
+ "input": [
287
+ "text",
288
+ "image",
289
+ "pdf"
290
+ ],
291
+ "output": [
292
+ "text"
293
+ ]
294
+ },
295
+ "open_weights": false,
296
+ "cost": {
297
+ "input": 5,
298
+ "output": 25,
299
+ "cache_read": 0.5,
300
+ "cache_write": 6.25
301
+ },
302
+ "limit": {
303
+ "context": 1000000,
304
+ "output": 128000
305
+ }
306
+ },
274
307
  "claude-3-haiku-20240307": {
275
308
  "id": "claude-3-haiku-20240307",
276
309
  "name": "Claude Haiku 3",
@@ -609,7 +642,7 @@
609
642
  "reasoning": true,
610
643
  "tool_call": true,
611
644
  "temperature": true,
612
- "knowledge": "2025-05",
645
+ "knowledge": "2025-05-31",
613
646
  "release_date": "2026-02-05",
614
647
  "last_updated": "2026-03-13",
615
648
  "modalities": {
data/data/google.json CHANGED
@@ -594,7 +594,12 @@
594
594
  "cost": {
595
595
  "input": 1.25,
596
596
  "output": 10,
597
- "cache_read": 0.31
597
+ "cache_read": 0.125,
598
+ "context_over_200k": {
599
+ "input": 2.5,
600
+ "output": 15,
601
+ "cache_read": 0.25
602
+ }
598
603
  },
599
604
  "limit": {
600
605
  "context": 1048576,
@@ -824,7 +829,7 @@
824
829
  "cost": {
825
830
  "input": 0.3,
826
831
  "output": 2.5,
827
- "cache_read": 0.075,
832
+ "cache_read": 0.03,
828
833
  "input_audio": 1
829
834
  },
830
835
  "limit": {
data/data/openai.json CHANGED
@@ -1066,36 +1066,6 @@
1066
1066
  "output": 100000
1067
1067
  }
1068
1068
  },
1069
- "codex-mini-latest": {
1070
- "id": "codex-mini-latest",
1071
- "name": "Codex Mini",
1072
- "family": "gpt-codex-mini",
1073
- "attachment": true,
1074
- "reasoning": true,
1075
- "tool_call": true,
1076
- "temperature": false,
1077
- "knowledge": "2024-04",
1078
- "release_date": "2025-05-16",
1079
- "last_updated": "2025-05-16",
1080
- "modalities": {
1081
- "input": [
1082
- "text"
1083
- ],
1084
- "output": [
1085
- "text"
1086
- ]
1087
- },
1088
- "open_weights": false,
1089
- "cost": {
1090
- "input": 1.5,
1091
- "output": 6,
1092
- "cache_read": 0.375
1093
- },
1094
- "limit": {
1095
- "context": 200000,
1096
- "output": 100000
1097
- }
1098
- },
1099
1069
  "gpt-4": {
1100
1070
  "id": "gpt-4",
1101
1071
  "name": "GPT-4",