llm.rb 4.21.0 → 4.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f0bca66b2bd8873cf39abb3be19dc99ca20d558e40ef3e9f475bf1f33faef6b6
4
- data.tar.gz: c73a2c5093e7e09557242919feb5a377f25b0fa8a11249a9f346673ad7d3a921
3
+ metadata.gz: 49ed8077a6283802d4141dcb9ec037c7fc46920ebd3273b30c55624b575f3156
4
+ data.tar.gz: e2289baf740ba9603ed1c308414e632ddda296356659c8714bf3a1744c216104
5
5
  SHA512:
6
- metadata.gz: 2a00191aaab47702a794f9fa86d782f21832be2a7ef309bd558aa482100d7c66ddbdf3320e89c80af2942c6e33295f10d387702130162fbac7cc98fd9b24c9a8
7
- data.tar.gz: a6709f6fd265af673da771f635f34c68e28e490405700c1a59b18253391dbbcae09ce677a4251994d898a851ec08dc598c5ff858e516e25b1206948f509abf67
6
+ metadata.gz: b6b0d72baa785a6bf25cbfd3f2581d7f6a5850a0fa61dea29668596e19eb8a1142330f8acfea7f04a1bc76461c02c0af681588332d955aae2b5c6808f2fc0610
7
+ data.tar.gz: 836fc45489b9d86c7bde3ed2b94d2813be5bdaea1ebf7697f7e7eca5962f5374343e371188e40ced180ed50e053cd74ec8fcec8dea08c164291ee8577301f195
data/CHANGELOG.md CHANGED
@@ -2,8 +2,86 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ Changes since `v4.23.0`.
6
+
7
+ ## v4.23.0
8
+
9
+ Changes since `v4.22.0`.
10
+
11
+ This release expands llm.rb's runtime surface for long-lived contexts and
12
+ stateful tools. It adds built-in context compaction through `LLM::Compactor`,
13
+ lets explicit `tools:` arrays accept bound `LLM::Tool` instances, and fixes
14
+ OpenAI-compatible no-arg tool schemas for stricter providers such as xAI.
15
+
16
+ ### Change
17
+
18
+ * **Add `LLM::Compactor` for long-lived contexts** <br>
19
+ Add built-in context compaction through `LLM::Compactor`, so older history
20
+ can be summarized, retained windows can stay bounded, compaction can run on
21
+ its own `model:`, and `LLM::Stream` can observe the lifecycle through
22
+ `on_compaction` and `on_compaction_finish`.
23
+
24
+ * **Allow bound tool instances in explicit tool lists** <br>
25
+ Let explicit `tools:` arrays accept `LLM::Tool` instances such as
26
+ `MyTool.new(foo: 1)`, so tools can carry bound state without changing the
27
+ global tool registry model.
28
+
29
+ ### Fix
30
+
31
+ * **Fix xAI/OpenAI-compatible no-arg tool schemas** <br>
32
+ Send an empty object schema for tools without declared parameters instead
33
+ of `null`, so stricter providers such as xAI accept mixed tool sets that
34
+ include no-arg tools.
35
+
36
+ ## v4.22.0
37
+
5
38
  Changes since `v4.21.0`.
6
39
 
40
+ This release deepens the runtime shape of llm.rb. It reduces helper-method
41
+ surface on persisted ORM models, expands real ORM coverage, and makes skills
42
+ behave more like bounded sub-agents with inherited recent context and proper
43
+ instruction injection.
44
+
45
+ ### Change
46
+
47
+ * **Reduce ActiveRecord wrapper model surface** <br>
48
+ Move helper methods such as option resolution, column mapping,
49
+ serialization, and persistence into `Utils` for the ActiveRecord
50
+ wrappers so wrapped models include fewer internal helper methods.
51
+
52
+ * **Reduce Sequel wrapper model surface** <br>
53
+ Move helper methods such as option resolution, column mapping,
54
+ serialization, and persistence into `Utils` for the Sequel wrappers
55
+ so wrapped models include fewer internal helper methods.
56
+
57
+ * **Expand ORM integration coverage** <br>
58
+ Add broader ActiveRecord and Sequel coverage for persisted context and
59
+ agent wrappers, including real SQLite-backed records and cassette-backed
60
+ OpenAI persistence paths.
61
+
62
+ * **Make skills inherit recent parent context** <br>
63
+ Run `LLM::Skill` with a curated slice of recent parent user and assistant
64
+ messages, prefixed with `Recent context:`, so skills behave more like
65
+ task-scoped sub-agents instead of instruction-only helpers.
66
+
67
+ ### Fix
68
+
69
+ * **Fix Sequel `plugin :agent` load order** <br>
70
+ Require the shared Sequel plugin support from `LLM::Sequel::Agent` so
71
+ `plugin :agent` can load independently without raising
72
+ `uninitialized constant LLM::Sequel::Plugin`.
73
+
74
+ * **Make skill execution inherit parent context request settings** <br>
75
+ Run `LLM::Skill` through a parent `LLM::Context` instead of a bare
76
+ provider so nested skill agents inherit context-level settings such as
77
+ `mode: :responses`, `store: false`, streaming, and other request defaults,
78
+ while still keeping skill-local tools and avoiding parent schemas.
79
+
80
+ * **Keep agent instructions when history is preseeded** <br>
81
+ Inject `LLM::Agent` instructions once unless a system message is already
82
+ present, so agents and nested skills still get their instructions when
83
+ they start with inherited non-system context.
84
+
7
85
  ## v4.21.0
8
86
 
9
87
  Changes since `v4.20.2`.
data/README.md CHANGED
@@ -4,23 +4,14 @@
4
4
  <p align="center">
5
5
  <a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
6
6
  <a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
7
- <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.21.0-green.svg?" alt="Version"></a>
7
+ <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.23.0-green.svg?" alt="Version"></a>
8
8
  </p>
9
9
 
10
10
  ## About
11
11
 
12
- llm.rb is a lightweight runtime for building capable AI systems in Ruby.
12
+ llm.rb is the most capable runtime for building AI systems in Ruby.
13
13
  <br>
14
14
 
15
- It is also the most capable AI Ruby runtime that exists _today_, and that claim is
16
- backed up by research. Maybe it won't always be true, and that would be good news too -
17
- because it would mean the Ruby ecosystem is getting stronger.
18
-
19
- llm.rb is not just an API wrapper: it gives you one runtime for providers,
20
- contexts, agents, tools, skills, MCP servers, streaming, schemas, files, and
21
- persisted state, so real systems can be built out of one coherent execution
22
- model instead of a pile of adapters.
23
-
24
15
  llm.rb is designed for Ruby, and although it works great in Rails, it is not tightly
25
16
  coupled to it. It runs on the standard library by default (zero dependencies),
26
17
  loads optional pieces only when needed, includes built-in ActiveRecord support through
@@ -29,6 +20,10 @@ loads optional pieces only when needed, includes built-in ActiveRecord support t
29
20
  long-lived, tool-capable, stateful AI workflows instead of just
30
21
  request/response helpers.
31
22
 
23
+ It provides one runtime for providers, agents, tools, skills, MCP servers, streaming,
24
+ schemas, files, and persisted state, so real systems can be built out of one coherent
25
+ execution model instead of a pile of adapters.
26
+
32
27
  Want to see some code? Jump to [the examples](#examples) section. <br>
33
28
  Want a taste of what llm.rb can build? See [the screencast](#screencast).
34
29
 
@@ -53,6 +48,197 @@ It holds:
53
48
  Instead of switching abstractions for each feature, everything builds on the
54
49
  same context object.
55
50
 
51
+ ## Standout features
52
+
53
+ The following list is **not exhaustive**, but it covers a lot of ground.
54
+
55
+ #### Skills
56
+
57
+ Skills are reusable, directory-backed capabilities loaded from `SKILL.md`.
58
+ They run through the same runtime as tools, agents, and MCP. They do not
59
+ require a second orchestration layer or a parallel abstraction. If you've
60
+ used Claude or Codex, you know the general idea of skills, and llm.rb
61
+ supports that same concept with the same execution model as the rest of the
62
+ system.
63
+
64
+ In llm.rb, a skill has frontmatter and instructions. The frontmatter can
65
+ define `name`, `description`, and `tools`. The `tools` entries are tool names,
66
+ and each name must resolve to a subclass of
67
+ [`LLM::Tool`](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) that is already
68
+ loaded in the runtime.
69
+
70
+ If you want Claude/Codex-like skills that can drive scripts or shell
71
+ commands, you would typically pair the skill with a tool that can execute
72
+ system commands.
73
+
74
+ ```yaml
75
+ ---
76
+ name: release
77
+ description: Prepare a release
78
+ tools:
79
+ - search_docs
80
+ - git
81
+ ---
82
+ Review the release state, summarize what changed, and prepare the release.
83
+ ```
84
+
85
+ ```ruby
86
+ class Agent < LLM::Agent
87
+ model "gpt-5.4-mini"
88
+ skills "./skills/release"
89
+ end
90
+
91
+ llm = LLM.openai(key: ENV["KEY"])
92
+ Agent.new(llm, stream: $stdout).talk("Let's prepare the release!")
93
+ ```
94
+
95
+ #### ORM
96
+
97
+ Any ActiveRecord model or Sequel model can become an agent-capable model,
98
+ including existing business and domain models, without forcing you into a
99
+ separate agent table or a second persistence layer.
100
+
101
+ `acts_as_agent` extends a model with agent capabilities: the same runtime
102
+ surface as [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
103
+ because it actually wraps an `LLM::Agent`, plus persistence through a text,
104
+ JSON, or JSONB-backed column on the same table.
105
+
106
+
107
+ ```ruby
108
+ class Ticket < ApplicationRecord
109
+ acts_as_agent provider: :set_provider
110
+ model "gpt-5.4-mini"
111
+ instructions "You are a support assistant."
112
+
113
+ private
114
+
115
+ def set_provider
116
+ { key: ENV["#{provider.upcase}_SECRET"], persistent: true }
117
+ end
118
+ end
119
+ ```
120
+
121
+ #### Agentic Patterns
122
+
123
+ llm.rb is especially strong when you want to build agentic systems in a Ruby
124
+ way. Agents can be ordinary application models with state, associations,
125
+ tools, skills, and persistence, which makes it much easier to build systems
126
+ where users have their own specialized agents instead of treating agents as
127
+ something outside the app.
128
+
129
+ That pattern works so well in llm.rb because
130
+ [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
131
+ `acts_as_agent`, `plugin :agent`, skills, tools, and persisted runtime state
132
+ all fit the same execution model. The runtime stays small enough that the
133
+ main design work becomes application design, not orchestration glue.
134
+
135
+ For a concrete example, see
136
+ [How to build a platform of agents](https://0x1eef.github.io/posts/how-to-build-a-platform-of-agents).
137
+
138
+ #### Persistence
139
+
140
+ The same runtime can be serialized to disk, restored later, persisted in JSON
141
+ or JSONB-backed ORM columns, resumed across process boundaries, or shared
142
+ across long-lived workflows.
143
+
144
+ ```ruby
145
+ ctx = LLM::Context.new(llm)
146
+ ctx.talk("Remember that my favorite language is Ruby.")
147
+ ctx.save(path: "context.json")
148
+ ```
149
+
150
+ #### Context Compaction
151
+
152
+ Long-lived contexts can compact older history into a summary instead of
153
+ growing forever. Compaction is built into [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
154
+ through [`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html),
155
+ and when a stream is present it emits `on_compaction` and
156
+ `on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
157
+ The compactor can also use a different model from the main context, which is
158
+ useful when you want summarization to run on a cheaper or faster model.
159
+
160
+ ```ruby
161
+ ctx = LLM::Context.new(
162
+ llm,
163
+ compactor: {
164
+ message_threshold: 200,
165
+ retention_window: 8,
166
+ model: "gpt-5.4-mini"
167
+ }
168
+ )
169
+ ```
170
+
171
+ #### LLM::Stream
172
+
173
+ `LLM::Stream` is not just for printing tokens. It supports `on_content`,
174
+ `on_reasoning_content`, `on_tool_call`, `on_tool_return`, `on_compaction`,
175
+ and `on_compaction_finish`, which means visible output, reasoning output, tool
176
+ execution, and context compaction can all be driven through the same
177
+ execution path.
178
+
179
+ ```ruby
180
+ class Stream < LLM::Stream
181
+ def on_tool_call(tool, error)
182
+ queue << tool.spawn(:thread)
183
+ end
184
+
185
+ def on_tool_return(tool, result)
186
+ puts(result.value)
187
+ end
188
+ end
189
+ ```
190
+
191
+ #### Concurrency
192
+
193
+ Tool execution can run sequentially with `:call` or concurrently through
194
+ `:thread`, `:task`, `:fiber`, and experimental `:ractor`, without rewriting
195
+ your tool layer.
196
+
197
+ ```ruby
198
+ class Agent < LLM::Agent
199
+ model "gpt-5.4-mini"
200
+ tools FetchWeather, FetchNews, FetchStock
201
+ concurrency :thread
202
+ end
203
+ ```
204
+
205
+ #### MCP
206
+
207
+ Remote MCP tools and prompts are not bolted on as a separate integration
208
+ stack. They adapt into the same tool and prompt path used by local tools,
209
+ skills, contexts, and agents.
210
+
211
+ ```ruby
212
+ begin
213
+ mcp = LLM::MCP.http(url: "https://api.githubcopilot.com/mcp/").persistent
214
+ mcp.start
215
+ ctx = LLM::Context.new(llm, tools: mcp.tools)
216
+ ensure
217
+ mcp.stop
218
+ end
219
+ ```
220
+
221
+ #### Cancellation
222
+
223
+ Cancellation is one of the harder problems to get right, and while llm.rb
224
+ makes it possible, it still requires careful engineering to use effectively.
225
+ The point though is that it is possible to stop in-flight provider work cleanly
226
+ through the same runtime, and the model used by llm.rb is directly inspired by
227
+ Go's context package. In fact, llm.rb is heavily inspired by Go but with a Ruby
228
+ twist.
229
+
230
+ ```ruby
231
+ ctx = LLM::Context.new(llm, stream: $stdout)
232
+ worker = Thread.new do
233
+ ctx.talk("Write a very long essay about network protocols.")
234
+ rescue LLM::Interrupt
235
+ puts "Request was interrupted!"
236
+ end
237
+ STDIN.getch
238
+ ctx.interrupt!
239
+ worker.join
240
+ ```
241
+
56
242
  ## Differentiators
57
243
 
58
244
  ### Execution Model
@@ -137,11 +323,11 @@ same context object.
137
323
  - **Tools are explicit** <br>
138
324
  Run local tools, provider-native tools, and MCP tools through the same path
139
325
  with fewer special cases.
140
- - **Skills are just tools loaded from directories** <br>
326
+ - **Skills become bounded runtime capabilities** <br>
141
327
  Point llm.rb at directories with a `SKILL.md`, resolve named tools through
142
- the registry, and run those skills through `LLM::Context` or `LLM::Agent`
143
- without creating a second execution model. If you are familiar with skills
144
- in Claude or Codex, llm.rb supports the same general idea.
328
+ the registry, and adapt each skill into its own callable capability through
329
+ the normal runtime. Unlike a generic skill-discovery tool, each skill runs
330
+ with its own bounded tool subset and behaves like a task-scoped sub-agent.
145
331
  - **Providers are normalized, not flattened** <br>
146
332
  Share one API surface across providers without losing access to provider-
147
333
  specific capabilities where they matter.
@@ -173,24 +359,32 @@ same context object.
173
359
 
174
360
  ## Capabilities
175
361
 
362
+ Execution:
176
363
  - **Chat & Contexts** — stateless and stateful interactions with persistence
177
364
  - **Context Serialization** — save and restore state across processes or time
178
365
  - **Streaming** — visible output, reasoning output, tool-call events
179
366
  - **Request Interruption** — stop in-flight provider work cleanly
367
+ - **Concurrent Execution** — threads, async tasks, and fibers
368
+
369
+ Runtime Building Blocks:
180
370
  - **Tool Calling** — class-based tools and closure-based functions
181
371
  - **Run Tools While Streaming** — overlap model output with tool latency
182
- - **Concurrent Execution** — threads, async tasks, and fibers
183
372
  - **Agents** — reusable assistants with tool auto-execution
184
373
  - **Skills** — directory-backed capabilities loaded from `SKILL.md`
374
+ - **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
375
+ - **Context Compaction** — summarize older history in long-lived contexts
376
+
377
+ Data and Structure:
185
378
  - **Structured Outputs** — JSON Schema-based responses
186
379
  - **Responses API** — stateful response workflows where providers support them
187
- - **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
188
380
  - **Multimodal Inputs** — text, images, audio, documents, URLs
189
381
  - **Audio** — speech generation, transcription, translation
190
382
  - **Images** — generation and editing
191
383
  - **Files API** — upload and reference files in prompts
192
384
  - **Embeddings** — vector generation for search and RAG
193
385
  - **Vector Stores** — retrieval workflows
386
+
387
+ Operations:
194
388
  - **Cost Tracking** — local cost estimation without extra API calls
195
389
  - **Observability** — tracing, logging, telemetry
196
390
  - **Model Registry** — local metadata for capabilities, limits, pricing
@@ -221,6 +415,44 @@ loop do
221
415
  end
222
416
  ```
223
417
 
418
+ #### Agent
419
+
420
+ This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
421
+
422
+ ```ruby
423
+ require "llm"
424
+
425
+ class ShellAgent < LLM::Agent
426
+ model "gpt-5.4-mini"
427
+ instructions "You are a Linux system assistant."
428
+ tools Shell
429
+ concurrency :thread
430
+ end
431
+
432
+ llm = LLM.openai(key: ENV["KEY"])
433
+ agent = ShellAgent.new(llm)
434
+ puts agent.talk("What time is it on this system?").content
435
+ ```
436
+
437
+ #### Skills
438
+
439
+ This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. In llm.rb, a skill is exposed as a tool in the runtime. When that tool is called, it spawns a sub-agent with relevant context plus the instructions and tool subset declared in its own `SKILL.md`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
440
+
441
+ Each skill runs only with the tools declared in its own frontmatter.
442
+
443
+ ```ruby
444
+ require "llm"
445
+
446
+ class Agent < LLM::Agent
447
+ model "gpt-5.4-mini"
448
+ instructions "You are a concise release assistant."
449
+ skills "./skills/release", "./skills/review"
450
+ end
451
+
452
+ llm = LLM.openai(key: ENV["KEY"])
453
+ puts Agent.new(llm).talk("Use the review skill.").content
454
+ ```
455
+
224
456
  #### Streaming
225
457
 
226
458
  This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) directly so visible output and tool execution can happen together. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
@@ -255,6 +487,42 @@ ctx.talk("Run `date` and `uname -a`.")
255
487
  ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
256
488
  ```
257
489
 
490
+ #### Context Compaction
491
+
492
+ This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
493
+ [`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html), and
494
+ [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) together so
495
+ long-lived contexts can summarize older history and expose the lifecycle
496
+ through stream hooks. This approach is inspired by General Intelligence
497
+ Systems' [Brute](https://github.com/general-intelligence-systems/brute). The
498
+ compactor can also use its own `model:` if you want summarization to run on a
499
+ different model from the main context. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
500
+
501
+ ```ruby
502
+ require "llm"
503
+
504
+ class Stream < LLM::Stream
505
+ def on_compaction(ctx, compactor)
506
+ puts "Compacting #{ctx.messages.size} messages..."
507
+ end
508
+
509
+ def on_compaction_finish(ctx, compactor)
510
+ puts "Compacted to #{ctx.messages.size} messages."
511
+ end
512
+ end
513
+
514
+ llm = LLM.openai(key: ENV["KEY"])
515
+ ctx = LLM::Context.new(
516
+ llm,
517
+ stream: Stream.new,
518
+ compactor: {
519
+ message_threshold: 200,
520
+ retention_window: 8,
521
+ model: "gpt-5.4-mini"
522
+ }
523
+ )
524
+ ```
525
+
258
526
  #### Reasoning
259
527
 
260
528
  This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with the OpenAI Responses API so reasoning output is streamed separately from visible assistant output. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
@@ -354,12 +622,11 @@ require "active_record"
354
622
  require "llm/active_record"
355
623
 
356
624
  class Ticket < ApplicationRecord
357
- acts_as_agent provider: :set_provider do
358
- model "gpt-5.4-mini"
359
- instructions "You are a concise support assistant."
360
- tools SearchDocs, Escalate
361
- concurrency :thread
362
- end
625
+ acts_as_agent provider: :set_provider
626
+ model "gpt-5.4-mini"
627
+ instructions "You are a concise support assistant."
628
+ tools SearchDocs, Escalate
629
+ concurrency :thread
363
630
 
364
631
  private
365
632
 
@@ -372,42 +639,6 @@ ticket = Ticket.create!(provider: "openai", model: "gpt-5.4-mini")
372
639
  puts ticket.talk("How do I rotate my API key?").content
373
640
  ```
374
641
 
375
- #### Agent
376
-
377
- This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) directly and lets the agent manage tool execution. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
378
-
379
- ```ruby
380
- require "llm"
381
-
382
- class ShellAgent < LLM::Agent
383
- model "gpt-5.4-mini"
384
- instructions "You are a Linux system assistant."
385
- tools Shell
386
- concurrency :thread
387
- end
388
-
389
- llm = LLM.openai(key: ENV["KEY"])
390
- agent = ShellAgent.new(llm)
391
- puts agent.talk("What time is it on this system?").content
392
- ```
393
-
394
- #### Skills
395
-
396
- This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) with directory-backed skills so `SKILL.md` capabilities run through the normal tool path. If you have used skills in Claude or Codex, this is the same kind of building block. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
397
-
398
- ```ruby
399
- require "llm"
400
-
401
- class Agent < LLM::Agent
402
- model "gpt-5.4-mini"
403
- instructions "You are a concise release assistant."
404
- skills "./skills/release", "./skills/review"
405
- end
406
-
407
- llm = LLM.openai(key: ENV["KEY"])
408
- puts Agent.new(llm).talk("Use the review skill.").content
409
- ```
410
-
411
642
  #### MCP
412
643
 
413
644
  This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
data/data/anthropic.json CHANGED
@@ -213,7 +213,7 @@
213
213
  "reasoning": true,
214
214
  "tool_call": true,
215
215
  "temperature": true,
216
- "knowledge": "2025-08",
216
+ "knowledge": "2025-08-31",
217
217
  "release_date": "2026-02-17",
218
218
  "last_updated": "2026-03-13",
219
219
  "modalities": {
@@ -271,6 +271,39 @@
271
271
  "output": 32000
272
272
  }
273
273
  },
274
+ "claude-opus-4-7": {
275
+ "id": "claude-opus-4-7",
276
+ "name": "Claude Opus 4.7",
277
+ "family": "claude-opus",
278
+ "attachment": true,
279
+ "reasoning": true,
280
+ "tool_call": true,
281
+ "temperature": false,
282
+ "knowledge": "2026-01-31",
283
+ "release_date": "2026-04-16",
284
+ "last_updated": "2026-04-16",
285
+ "modalities": {
286
+ "input": [
287
+ "text",
288
+ "image",
289
+ "pdf"
290
+ ],
291
+ "output": [
292
+ "text"
293
+ ]
294
+ },
295
+ "open_weights": false,
296
+ "cost": {
297
+ "input": 5,
298
+ "output": 25,
299
+ "cache_read": 0.5,
300
+ "cache_write": 6.25
301
+ },
302
+ "limit": {
303
+ "context": 1000000,
304
+ "output": 128000
305
+ }
306
+ },
274
307
  "claude-3-haiku-20240307": {
275
308
  "id": "claude-3-haiku-20240307",
276
309
  "name": "Claude Haiku 3",
@@ -609,7 +642,7 @@
609
642
  "reasoning": true,
610
643
  "tool_call": true,
611
644
  "temperature": true,
612
- "knowledge": "2025-05",
645
+ "knowledge": "2025-05-31",
613
646
  "release_date": "2026-02-05",
614
647
  "last_updated": "2026-03-13",
615
648
  "modalities": {
data/data/google.json CHANGED
@@ -594,7 +594,12 @@
594
594
  "cost": {
595
595
  "input": 1.25,
596
596
  "output": 10,
597
- "cache_read": 0.31
597
+ "cache_read": 0.125,
598
+ "context_over_200k": {
599
+ "input": 2.5,
600
+ "output": 15,
601
+ "cache_read": 0.25
602
+ }
598
603
  },
599
604
  "limit": {
600
605
  "context": 1048576,
@@ -824,7 +829,7 @@
824
829
  "cost": {
825
830
  "input": 0.3,
826
831
  "output": 2.5,
827
- "cache_read": 0.075,
832
+ "cache_read": 0.03,
828
833
  "input_audio": 1
829
834
  },
830
835
  "limit": {
data/data/openai.json CHANGED
@@ -1066,36 +1066,6 @@
1066
1066
  "output": 100000
1067
1067
  }
1068
1068
  },
1069
- "codex-mini-latest": {
1070
- "id": "codex-mini-latest",
1071
- "name": "Codex Mini",
1072
- "family": "gpt-codex-mini",
1073
- "attachment": true,
1074
- "reasoning": true,
1075
- "tool_call": true,
1076
- "temperature": false,
1077
- "knowledge": "2024-04",
1078
- "release_date": "2025-05-16",
1079
- "last_updated": "2025-05-16",
1080
- "modalities": {
1081
- "input": [
1082
- "text"
1083
- ],
1084
- "output": [
1085
- "text"
1086
- ]
1087
- },
1088
- "open_weights": false,
1089
- "cost": {
1090
- "input": 1.5,
1091
- "output": 6,
1092
- "cache_read": 0.375
1093
- },
1094
- "limit": {
1095
- "context": 200000,
1096
- "output": 100000
1097
- }
1098
- },
1099
1069
  "gpt-4": {
1100
1070
  "id": "gpt-4",
1101
1071
  "name": "GPT-4",