llm.rb 11.2.0 → 11.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +57 -11
- data/README.md +78 -66
- data/lib/llm/agent.rb +25 -4
- data/lib/llm/error.rb +4 -0
- data/lib/llm/providers/anthropic/error_handler.rb +2 -0
- data/lib/llm/providers/bedrock/error_handler.rb +1 -1
- data/lib/llm/providers/google/error_handler.rb +2 -0
- data/lib/llm/providers/ollama/error_handler.rb +2 -0
- data/lib/llm/providers/openai/error_handler.rb +2 -0
- data/lib/llm/version.rb +1 -1
- data/llm.gemspec +3 -2
- metadata +18 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 314712380b36e57b1492cef3850f5c4c2397522b74d3cc913fc0d09a796d8973
|
|
4
|
+
data.tar.gz: aefda31d90067a0a49ada778c6658243595b6698cc11ecf342a11e26f69ad93b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3a998015696027d232e0865c60ff840d11155206b705443035f6af7dcbb18f52d0e82b019cc82379a7ca919b60e3e50bf4156c8c4388beb8ba47a5d57775354a
|
|
7
|
+
data.tar.gz: 83faf786980a3307a760aec9698e29129dd34f8a838fa8f596caa60498f029bf3b5b4ac9400dd662c70a67a8c1eb748266d89b777773c8185025c8b8c86754bd
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,52 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## v11.3.0
|
|
6
|
+
|
|
7
|
+
Changes since `v11.2.0`.
|
|
8
|
+
|
|
9
|
+
This release promotes `LLM::Agent` as the default high-level runtime,
|
|
10
|
+
raises `LLM::NotFoundError` for provider 404 responses, and adds
|
|
11
|
+
Symbol resolution to `LLM::Agent.confirm` and `LLM::Agent.skills` for
|
|
12
|
+
dynamic tool confirmation and skill lists.
|
|
13
|
+
|
|
14
|
+
### Add
|
|
15
|
+
|
|
16
|
+
* **Raise `LLM::NotFoundError` for provider 404 responses** <br>
|
|
17
|
+
Raise `LLM::NotFoundError` when a provider returns HTTP 404. One
|
|
18
|
+
example is calling the embeddings API on DeepSeek
|
|
19
|
+
(`LLM.deepseek(...).embed(["foobar"])`), which returns 404 because
|
|
20
|
+
DeepSeek does not implement that endpoint.
|
|
21
|
+
|
|
22
|
+
* **Add Symbol resolution to `LLM::Agent.confirm`** <br>
|
|
23
|
+
When `confirm` receives a single Symbol argument, it stores it
|
|
24
|
+
as-is instead of converting it to a string array. At initialization
|
|
25
|
+
time, `resolve_option` resolves the Symbol by calling the method
|
|
26
|
+
with that name on the agent instance, and the result is converted
|
|
27
|
+
to strings. This allows dynamic tool confirmation lists:
|
|
28
|
+
|
|
29
|
+
class MyAgent < LLM::Agent
|
|
30
|
+
confirm :tools_that_need_confirmation
|
|
31
|
+
|
|
32
|
+
def tools_that_need_confirmation
|
|
33
|
+
some_condition ? %w[delete destroy] : %w[delete]
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
Ported from llmrb/mruby-llm@89a232e3 and @2dd04e2d.
|
|
38
|
+
|
|
39
|
+
Extend the same pattern to `LLM::Agent.skills` so the skills DSL
|
|
40
|
+
accepts a Symbol that resolves through the agent instance at
|
|
41
|
+
initialization time.
|
|
42
|
+
|
|
43
|
+
### Change
|
|
44
|
+
|
|
45
|
+
* **Clarify `LLM::Agent` as the default high-level runtime** <br>
|
|
46
|
+
Document that `LLM::Context` remains at the heart of llm.rb, but
|
|
47
|
+
`LLM::Agent` is the better default unless an application needs advanced
|
|
48
|
+
manual tool loops. `LLM::Agent` manages the tool loop for callers and
|
|
49
|
+
enables guards against runaway or repeated tool-call loops.
|
|
50
|
+
|
|
5
51
|
## v11.2.0
|
|
6
52
|
|
|
7
53
|
Changes since `v11.1.0`.
|
|
@@ -222,7 +268,7 @@ requests outside `#session`, `LLM::Function#def` as a short alias for
|
|
|
222
268
|
Fix block-form `model { ... }`, `tools { ... }`, and
|
|
223
269
|
`schema { ... }` declarations in the ActiveRecord and Sequel agent
|
|
224
270
|
wrappers so persisted agent models configure the internal agent class
|
|
225
|
-
the same way
|
|
271
|
+
the same way `LLM::Agent` does.
|
|
226
272
|
|
|
227
273
|
* **Fix missing `skills` in ORM agent wrappers** <br>
|
|
228
274
|
Fix the ActiveRecord and Sequel agent wrappers to expose `skills`, so
|
|
@@ -465,7 +511,7 @@ DSML tool-marker filtering in streamed text.
|
|
|
465
511
|
blocks that Bedrock rejects.
|
|
466
512
|
|
|
467
513
|
* **Suppress Bedrock DSML tool markers in streamed text** <br>
|
|
468
|
-
Filter
|
|
514
|
+
Filter `"\u003c\u003cDSML\u003efunction_calls\u003e\u003e"` markers out of streamed Bedrock
|
|
469
515
|
assistant text so tool-call sentinels do not leak into user-visible
|
|
470
516
|
output.
|
|
471
517
|
|
|
@@ -475,7 +521,7 @@ Changes since `v7.0.0`.
|
|
|
475
521
|
|
|
476
522
|
This release adds Unix-fork concurrency for process-isolated tool
|
|
477
523
|
execution, extends `LLM::Object` with `#merge` and `#delete`, and drops
|
|
478
|
-
Ruby 3.2 support due to
|
|
524
|
+
Ruby 3.2 support due to a segfault observed with the `:fork` path. It
|
|
479
525
|
promotes `LLM::Pipe` to the top-level namespace and adds
|
|
480
526
|
`persistent: true` on `LLM::MCP.http` for direct persistent transport
|
|
481
527
|
configuration. `LLM::Function#runner` is exposed as public API, agent
|
|
@@ -616,7 +662,7 @@ provider usage has been recorded yet.
|
|
|
616
662
|
buffer API.
|
|
617
663
|
|
|
618
664
|
* **Support percentage compaction token thresholds** <br>
|
|
619
|
-
Let `LLM::Compactor` accept `token_threshold:` values like
|
|
665
|
+
Let `LLM::Compactor` accept `token_threshold:` values like `"90%"` so
|
|
620
666
|
compaction can trigger at a percentage of the active model context
|
|
621
667
|
window.
|
|
622
668
|
|
|
@@ -775,7 +821,7 @@ interruption use the active per-call stream correctly.
|
|
|
775
821
|
|
|
776
822
|
* **Refresh provider model metadata** <br>
|
|
777
823
|
Add current DeepSeek and OpenAI model metadata to `data/` and update the
|
|
778
|
-
Google
|
|
824
|
+
Google Gemini model entry to match the current provider naming.
|
|
779
825
|
|
|
780
826
|
### Fix
|
|
781
827
|
|
|
@@ -1216,12 +1262,12 @@ Changes since `v4.14.0`.
|
|
|
1216
1262
|
storage when Sequel JSON typecasting is enabled.
|
|
1217
1263
|
|
|
1218
1264
|
* **Improve streaming parser performance** <br>
|
|
1219
|
-
In the local replay-based `stream_parser` benchmark versus
|
|
1220
|
-
|
|
1265
|
+
In the local replay-based `stream_parser` benchmark versus `v4.14.0`
|
|
1266
|
+
(median of 20 samples, 5000 iterations), plain Ruby is a
|
|
1221
1267
|
small overall win: the generic eventstream path is about 0.4%
|
|
1222
1268
|
faster, the OpenAI stream parser is about 0.5% faster, and the
|
|
1223
1269
|
OpenAI Responses parser is about 1.6% faster, with unchanged
|
|
1224
|
-
allocations. Under YJIT on the same benchmark, the generic
|
|
1270
|
+
allocations. Under YJIT on the same benchmark harness, the generic
|
|
1225
1271
|
eventstream path is about 0.9% faster and the OpenAI stream parser
|
|
1226
1272
|
is about 0.4% faster, while the OpenAI Responses parser is about
|
|
1227
1273
|
0.7% slower, also with unchanged allocations.
|
|
@@ -1263,7 +1309,7 @@ parallel tool calls can safely share one connection.
|
|
|
1263
1309
|
* **Reduce provider streaming allocations** <br>
|
|
1264
1310
|
Decode streamed provider payloads directly in
|
|
1265
1311
|
`LLM::Provider::Transport::HTTP` before handing them to provider
|
|
1266
|
-
parsers, which cuts allocation churn and gives a
|
|
1312
|
+
parsers, which cuts allocation churn and gives a small streaming
|
|
1267
1313
|
speed bump.
|
|
1268
1314
|
|
|
1269
1315
|
* **Reduce generic SSE parser allocations** <br>
|
|
@@ -1399,7 +1445,7 @@ Changes since `v4.9.0`.
|
|
|
1399
1445
|
|
|
1400
1446
|
- Add HTTP transport for MCP with `LLM::MCP::Transport::HTTP` for remote servers
|
|
1401
1447
|
- Add JSON Schema union types (`any_of`, `all_of`, `one_of`) with parser integration
|
|
1402
|
-
- Add JSON Schema type array union support (e.g.,
|
|
1448
|
+
- Add JSON Schema type array union support (e.g., `"type": ["object", "null"]`)
|
|
1403
1449
|
- Add JSON Schema type inference from `const`, `enum`, or `default` fields
|
|
1404
1450
|
|
|
1405
1451
|
### Change
|
|
@@ -1500,7 +1546,7 @@ Notable merged work in this range includes:
|
|
|
1500
1546
|
- `Add rack + websocket example (#130)`
|
|
1501
1547
|
- `feat(gemspec): add changelog URI (#136)`
|
|
1502
1548
|
- `feat(function): alias ThreadGroup#wait as ThreadGroup#value (#62)`
|
|
1503
|
-
- README and screencast refresh across `#66`, `#68`, `#71`, and
|
|
1549
|
+
- `README and screencast refresh across `#66`, `#68`, `#71`, and
|
|
1504
1550
|
`#72`
|
|
1505
1551
|
- `chore(bot): update deprecation warning from v5.0 to v6.0`
|
|
1506
1552
|
- `fix(deepseek): tolerate malformed tool arguments`
|
data/README.md
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
<img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License">
|
|
12
12
|
</a>
|
|
13
13
|
<a href="https://github.com/llmrb/llm.rb/tags">
|
|
14
|
-
<img src="https://img.shields.io/badge/version-11.
|
|
14
|
+
<img src="https://img.shields.io/badge/version-11.3.0-green.svg?" alt="Version">
|
|
15
15
|
</a>
|
|
16
16
|
</p>
|
|
17
17
|
|
|
@@ -30,10 +30,27 @@ also includes built-in ActiveRecord and Sequel support, plus concurrent
|
|
|
30
30
|
tool execution through threads, tasks (via async gem), fibers, ractors,
|
|
31
31
|
and fork (via xchan.rb gem).
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
## Services
|
|
34
|
+
|
|
35
|
+
The llm.rb runtime and its forks
|
|
36
|
+
([mruby-llm](https://github.com/llmrb/mruby-llm),
|
|
37
|
+
[wasm-llm](https://github.com/llmrb/wasm-llm))
|
|
38
|
+
power a growing family of AI applications, and
|
|
39
|
+
services. The following applications are publicly
|
|
40
|
+
accessible over SSH and are free to try. No account
|
|
41
|
+
required. Nothing to install.
|
|
42
|
+
|
|
43
|
+
#### matz - the mruby expert
|
|
44
|
+
|
|
45
|
+
> ssh matz@r.uby.dev
|
|
46
|
+
|
|
47
|
+
See [https://r.uby.dev/matz](https://r.uby.dev/matz) for more information.
|
|
48
|
+
|
|
49
|
+
#### robert - the freebsd expert
|
|
50
|
+
|
|
51
|
+
> ssh robert@4.4bsd.dev
|
|
52
|
+
|
|
53
|
+
See [https://4.4bsd.dev/robert](https://4.4bsd.dev/robert) for more information.
|
|
37
54
|
|
|
38
55
|
## Quick start
|
|
39
56
|
|
|
@@ -138,10 +155,10 @@ to either
|
|
|
138
155
|
or
|
|
139
156
|
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html).
|
|
140
157
|
In this example, the MCP server runs over stdio and
|
|
141
|
-
[LLM::
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
158
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
159
|
+
manages the tool loop. For **stdio**, `mcp.session` is the preferred
|
|
160
|
+
pattern because it keeps one MCP session alive across discovery and
|
|
161
|
+
tool calls:
|
|
145
162
|
|
|
146
163
|
```ruby
|
|
147
164
|
require "llm"
|
|
@@ -150,9 +167,8 @@ llm = LLM.openai(key: ENV["KEY"])
|
|
|
150
167
|
mcp = LLM::MCP.stdio(argv: ["ruby", "server.rb"])
|
|
151
168
|
|
|
152
169
|
mcp.session do
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
170
|
+
agent = LLM::Agent.new(llm, stream: $stdout, tools: mcp.tools)
|
|
171
|
+
agent.talk "Use the available tools to inspect the environment."
|
|
156
172
|
end
|
|
157
173
|
```
|
|
158
174
|
|
|
@@ -167,9 +183,8 @@ require "llm"
|
|
|
167
183
|
llm = LLM.openai(key: ENV["KEY"])
|
|
168
184
|
mcp = LLM::MCP.stdio(argv: ["ruby", "server.rb"])
|
|
169
185
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
186
|
+
agent = LLM::Agent.new(llm, tools: mcp.tools)
|
|
187
|
+
agent.talk("Use the available tools to inspect the environment.")
|
|
173
188
|
```
|
|
174
189
|
|
|
175
190
|
The HTTP transport can be used with or without the `session` method,
|
|
@@ -188,9 +203,8 @@ mcp = LLM::MCP.http(
|
|
|
188
203
|
transport: :net_http_persistent
|
|
189
204
|
)
|
|
190
205
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
206
|
+
agent = LLM::Agent.new(llm, tools: mcp.tools)
|
|
207
|
+
agent.talk("Use the available tools to inspect the environment.")
|
|
194
208
|
```
|
|
195
209
|
|
|
196
210
|
#### A2A (Agent 2 Agent)
|
|
@@ -214,9 +228,8 @@ a2a = LLM::A2A.rest(
|
|
|
214
228
|
headers: {"Authorization" => "Bearer token"}
|
|
215
229
|
)
|
|
216
230
|
llm = LLM.openai(key: ENV["KEY"])
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
231
|
+
agent = LLM::Agent.new(llm, tools: a2a.skills)
|
|
232
|
+
agent.talk "Analyze this CSV and summarize the trends."
|
|
220
233
|
```
|
|
221
234
|
|
|
222
235
|
Use persistent HTTP connections:
|
|
@@ -317,8 +330,8 @@ class Stream < LLM::Stream
|
|
|
317
330
|
end
|
|
318
331
|
|
|
319
332
|
llm = LLM.openai(key: ENV["KEY"])
|
|
320
|
-
|
|
321
|
-
|
|
333
|
+
agent = LLM::Agent.new(llm, stream: Stream.new)
|
|
334
|
+
agent.talk "Write a haiku about Ruby."
|
|
322
335
|
```
|
|
323
336
|
|
|
324
337
|
#### LLM::Stream (advanced)
|
|
@@ -375,30 +388,31 @@ agent.talk "Read README.md and CHANGELOG.md and compare them."
|
|
|
375
388
|
|
|
376
389
|
#### Serialization
|
|
377
390
|
|
|
378
|
-
The [`LLM::
|
|
391
|
+
The [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
379
392
|
object can be serialized to JSON, which makes it suitable for storing
|
|
380
393
|
in a file, a database column, or a Redis queue. The built-in
|
|
381
|
-
ActiveRecord and Sequel plugins are built on top of
|
|
394
|
+
ActiveRecord and Sequel plugins are built on top of the same underlying
|
|
395
|
+
serialization feature:
|
|
382
396
|
|
|
383
397
|
```ruby
|
|
384
398
|
require "llm"
|
|
385
399
|
|
|
386
400
|
llm = LLM.openai(key: ENV["KEY"])
|
|
387
401
|
|
|
388
|
-
# Serialize
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
string =
|
|
402
|
+
# Serialize an agent
|
|
403
|
+
agent1 = LLM::Agent.new(llm)
|
|
404
|
+
agent1.talk "Remember that my favorite language is Ruby"
|
|
405
|
+
string = agent1.to_json
|
|
392
406
|
|
|
393
|
-
# Restore
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
407
|
+
# Restore an agent (from JSON)
|
|
408
|
+
agent2 = LLM::Agent.new(llm, stream: $stdout)
|
|
409
|
+
agent2.restore(string:)
|
|
410
|
+
agent2.talk "What is my favorite language?"
|
|
397
411
|
```
|
|
398
412
|
|
|
399
413
|
#### ask
|
|
400
414
|
|
|
401
|
-
[`LLM::
|
|
415
|
+
[`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
402
416
|
also provides `ask`, a convenience interface that is compatible with
|
|
403
417
|
RubyLLM's `ask` method. It accepts a prompt, an optional `with:`
|
|
404
418
|
attachment path or paths, an optional `stream:` target, and an optional
|
|
@@ -410,11 +424,11 @@ so use `.content` when you want the text directly:
|
|
|
410
424
|
require "llm"
|
|
411
425
|
|
|
412
426
|
llm = LLM.openai(key: ENV["KEY"])
|
|
413
|
-
|
|
427
|
+
agent = LLM::Agent.new(llm)
|
|
414
428
|
|
|
415
|
-
puts
|
|
416
|
-
puts
|
|
417
|
-
|
|
429
|
+
puts agent.ask("Hello world").content
|
|
430
|
+
puts agent.ask("Summarize this document.", with: "README.md").content
|
|
431
|
+
agent.ask("Stream this reply.") { $stdout << _1 }
|
|
418
432
|
```
|
|
419
433
|
|
|
420
434
|
## Installation
|
|
@@ -427,8 +441,8 @@ gem install llm.rb
|
|
|
427
441
|
|
|
428
442
|
#### REPL
|
|
429
443
|
|
|
430
|
-
This example uses [`LLM::
|
|
431
|
-
|
|
444
|
+
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
445
|
+
for an interactive REPL. <br> See the
|
|
432
446
|
[deepdive (web)](https://llmrb.github.io/llm.rb/) or
|
|
433
447
|
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
434
448
|
|
|
@@ -436,11 +450,11 @@ directly for an interactive REPL. <br> See the
|
|
|
436
450
|
require "llm"
|
|
437
451
|
|
|
438
452
|
llm = LLM.openai(key: ENV["KEY"])
|
|
439
|
-
|
|
453
|
+
agent = LLM::Agent.new(llm, stream: $stdout)
|
|
440
454
|
|
|
441
455
|
loop do
|
|
442
456
|
print "> "
|
|
443
|
-
|
|
457
|
+
agent.talk(STDIN.gets || break)
|
|
444
458
|
puts
|
|
445
459
|
end
|
|
446
460
|
```
|
|
@@ -449,36 +463,36 @@ end
|
|
|
449
463
|
|
|
450
464
|
In llm.rb, a prompt can be a string, an [`LLM::Prompt`](https://0x1eef.github.io/x/llm.rb/LLM/Prompt.html), or an array.
|
|
451
465
|
When you use an array, each element can be plain text or a tagged object such as
|
|
452
|
-
[`
|
|
453
|
-
[`
|
|
454
|
-
or [`
|
|
466
|
+
[`agent.image_url(...)`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#image_url-instance_method),
|
|
467
|
+
[`agent.local_file(...)`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#local_file-instance_method),
|
|
468
|
+
or [`agent.remote_file(...)`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#remote_file-instance_method).
|
|
455
469
|
Those tagged objects carry the metadata the provider adapter needs to turn one
|
|
456
470
|
Ruby prompt into the provider-specific multimodal request schema.
|
|
457
471
|
|
|
458
472
|
If the model understands that file type, you can attach a local file directly
|
|
459
|
-
with `
|
|
473
|
+
with `agent.ask(..., with: path)` instead of uploading it first through a
|
|
460
474
|
provider Files API. Under the hood, llm.rb tags the path as a
|
|
461
|
-
[`
|
|
475
|
+
[`agent.local_file(...)`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#local_file-instance_method)
|
|
462
476
|
object:
|
|
463
477
|
|
|
464
478
|
```ruby
|
|
465
479
|
require "llm"
|
|
466
480
|
|
|
467
481
|
llm = LLM.openai(key: ENV["KEY"])
|
|
468
|
-
|
|
469
|
-
puts
|
|
482
|
+
agent = LLM::Agent.new(llm)
|
|
483
|
+
puts agent.ask("Summarize this document.", with: "README.md").content
|
|
470
484
|
```
|
|
471
485
|
|
|
472
486
|
#### Context Compaction
|
|
473
487
|
|
|
474
|
-
This example uses [`LLM::
|
|
488
|
+
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
475
489
|
[`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html), and
|
|
476
490
|
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) together so
|
|
477
|
-
long-lived
|
|
491
|
+
long-lived conversations can summarize older history and expose the lifecycle
|
|
478
492
|
through stream hooks. This approach is inspired by General Intelligence
|
|
479
493
|
Systems. The
|
|
480
494
|
compactor can also use its own `model:` if you want summarization to run on a
|
|
481
|
-
different model from the main
|
|
495
|
+
different model from the main conversation. `token_threshold:` accepts either a
|
|
482
496
|
fixed token count or a percentage string like `"90%"`, which resolves
|
|
483
497
|
against the active model context window and triggers compaction once total
|
|
484
498
|
token usage goes over that percentage. See the
|
|
@@ -499,7 +513,7 @@ class Stream < LLM::Stream
|
|
|
499
513
|
end
|
|
500
514
|
|
|
501
515
|
llm = LLM.openai(key: ENV["KEY"])
|
|
502
|
-
|
|
516
|
+
agent = LLM::Agent.new(
|
|
503
517
|
llm,
|
|
504
518
|
stream: Stream.new,
|
|
505
519
|
compactor: {
|
|
@@ -518,9 +532,8 @@ visible assistant output. See the
|
|
|
518
532
|
[deepdive (web)](https://llmrb.github.io/llm.rb/) or
|
|
519
533
|
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
520
534
|
|
|
521
|
-
To use the Responses API (OpenAI-specific), initialize
|
|
522
|
-
|
|
523
|
-
`talk` for turns.
|
|
535
|
+
To use the Responses API (OpenAI-specific), initialize an agent with
|
|
536
|
+
`mode: :responses` and keep using `talk` for turns.
|
|
524
537
|
|
|
525
538
|
```ruby
|
|
526
539
|
require "llm"
|
|
@@ -536,20 +549,20 @@ class Stream < LLM::Stream
|
|
|
536
549
|
end
|
|
537
550
|
|
|
538
551
|
llm = LLM.openai(key: ENV["KEY"])
|
|
539
|
-
|
|
552
|
+
agent = LLM::Agent.new(
|
|
540
553
|
llm,
|
|
541
554
|
model: "gpt-5.4-mini",
|
|
542
555
|
mode: :responses,
|
|
543
556
|
reasoning: {effort: "medium"},
|
|
544
557
|
stream: Stream.new
|
|
545
558
|
)
|
|
546
|
-
|
|
559
|
+
agent.talk("Solve 17 * 19 and show your work.")
|
|
547
560
|
```
|
|
548
561
|
|
|
549
562
|
#### Request Cancellation
|
|
550
563
|
|
|
551
564
|
Need to cancel a stream? llm.rb has you covered through
|
|
552
|
-
[`LLM::
|
|
565
|
+
[`LLM::Agent#interrupt!`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#interrupt-21-instance_method).
|
|
553
566
|
<br> See the [deepdive (web)](https://llmrb.github.io/llm.rb/)
|
|
554
567
|
or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
555
568
|
|
|
@@ -558,15 +571,15 @@ require "llm"
|
|
|
558
571
|
require "io/console"
|
|
559
572
|
|
|
560
573
|
llm = LLM.openai(key: ENV["KEY"])
|
|
561
|
-
|
|
574
|
+
agent = LLM::Agent.new(llm, stream: $stdout)
|
|
562
575
|
worker = Thread.new do
|
|
563
|
-
|
|
576
|
+
agent.talk("Write a very long essay about network protocols.")
|
|
564
577
|
rescue LLM::Interrupt
|
|
565
578
|
puts "Request was interrupted!"
|
|
566
579
|
end
|
|
567
580
|
|
|
568
581
|
STDIN.getch
|
|
569
|
-
|
|
582
|
+
agent.interrupt!
|
|
570
583
|
worker.join
|
|
571
584
|
```
|
|
572
585
|
|
|
@@ -727,7 +740,7 @@ end
|
|
|
727
740
|
|
|
728
741
|
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
729
742
|
over HTTP so remote GitHub MCP tools run through the same
|
|
730
|
-
`LLM::
|
|
743
|
+
`LLM::Agent` tool path as local tools. It expects a GitHub token in
|
|
731
744
|
`ENV["GITHUB_PAT"]`. See the
|
|
732
745
|
[deepdive (web)](https://llmrb.github.io/llm.rb/) or
|
|
733
746
|
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
@@ -743,9 +756,8 @@ mcp = LLM::MCP.http(
|
|
|
743
756
|
persistent: true
|
|
744
757
|
)
|
|
745
758
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
759
|
+
agent = LLM::Agent.new(llm, stream: $stdout, tools: mcp.tools)
|
|
760
|
+
agent.talk("Pull information about my GitHub account.")
|
|
749
761
|
```
|
|
750
762
|
|
|
751
763
|
## Resources
|
data/lib/llm/agent.rb
CHANGED
|
@@ -72,7 +72,11 @@ module LLM
|
|
|
72
72
|
# Returns the current skills when no argument is provided
|
|
73
73
|
def self.skills(*skills, &block)
|
|
74
74
|
return @skills if skills.empty? && !block
|
|
75
|
-
|
|
75
|
+
if skills.size == 1 and skills.grep(Symbol).any?
|
|
76
|
+
@skills = skills.first
|
|
77
|
+
else
|
|
78
|
+
@skills = block || skills.flatten
|
|
79
|
+
end
|
|
76
80
|
end
|
|
77
81
|
|
|
78
82
|
##
|
|
@@ -160,14 +164,31 @@ module LLM
|
|
|
160
164
|
##
|
|
161
165
|
# Set or get the tool names that require confirmation before they can run.
|
|
162
166
|
#
|
|
167
|
+
# When a single Symbol is given, it is stored as-is and resolved at
|
|
168
|
+
# initialization time by calling the method with that name on the agent
|
|
169
|
+
# instance. This allows dynamic tool confirmation lists.
|
|
170
|
+
#
|
|
171
|
+
# @example
|
|
172
|
+
# class MyAgent < LLM::Agent
|
|
173
|
+
# confirm :tools_that_need_confirmation
|
|
174
|
+
#
|
|
175
|
+
# def tools_that_need_confirmation
|
|
176
|
+
# some_condition ? %w[delete destroy] : %w[delete]
|
|
177
|
+
# end
|
|
178
|
+
# end
|
|
179
|
+
#
|
|
163
180
|
# @param [String, Symbol, Array<String, Symbol>, Proc] tool_names
|
|
164
181
|
# One or more tool names.
|
|
165
182
|
# @param [Proc] block
|
|
166
183
|
# An optional, lazy-evaluated Proc
|
|
167
|
-
# @return [Array<String>, Proc, nil]
|
|
184
|
+
# @return [Array<String>, Proc, Symbol, nil]
|
|
168
185
|
def self.confirm(*tool_names, &block)
|
|
169
186
|
return @confirm if tool_names.empty? && !block
|
|
170
|
-
|
|
187
|
+
if tool_names.size == 1 && tool_names.grep(Symbol).any?
|
|
188
|
+
@confirm = tool_names.first
|
|
189
|
+
else
|
|
190
|
+
@confirm = block || tool_names.flatten.map(&:to_s)
|
|
191
|
+
end
|
|
171
192
|
end
|
|
172
193
|
|
|
173
194
|
##
|
|
@@ -190,7 +211,7 @@ module LLM
|
|
|
190
211
|
fields_ivar = %i[tracer concurrency instructions confirm]
|
|
191
212
|
fields.each do |field|
|
|
192
213
|
resolvable = params.key?(field) ? params.delete(field) : self.class.public_send(field)
|
|
193
|
-
resolve_symbol = !%i[concurrency
|
|
214
|
+
resolve_symbol = !%i[concurrency].include?(field)
|
|
194
215
|
resolved = resolvable != nil ? resolve_option(self, resolvable, resolve_symbol:) : resolvable
|
|
195
216
|
resolved = [*resolved].map(&:to_s) if field == :confirm && resolved
|
|
196
217
|
if field == :model
|
data/lib/llm/error.rb
CHANGED
|
@@ -49,6 +49,8 @@ class LLM::Anthropic
|
|
|
49
49
|
LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
|
|
50
50
|
elsif res.rate_limited?
|
|
51
51
|
LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
|
|
52
|
+
elsif res.not_found?
|
|
53
|
+
LLM::NotFoundError.new("Server response: not found (404)").tap { _1.response = res }
|
|
52
54
|
else
|
|
53
55
|
LLM::Error.new("Unexpected response").tap { _1.response = res }
|
|
54
56
|
end
|
|
@@ -53,7 +53,7 @@ class LLM::Bedrock
|
|
|
53
53
|
elsif res.rate_limited?
|
|
54
54
|
LLM::RateLimitError.new(message).tap { _1.response = res }
|
|
55
55
|
elsif res.not_found?
|
|
56
|
-
LLM::
|
|
56
|
+
LLM::NotFoundError.new("Server response: not found (404)").tap { _1.response = res }
|
|
57
57
|
else
|
|
58
58
|
LLM::Error.new(message).tap { _1.response = res }
|
|
59
59
|
end
|
|
@@ -60,6 +60,8 @@ class LLM::Google
|
|
|
60
60
|
end
|
|
61
61
|
elsif res.rate_limited?
|
|
62
62
|
LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
|
|
63
|
+
elsif res.not_found?
|
|
64
|
+
LLM::NotFoundError.new("Server response: not found (404)").tap { _1.response = res }
|
|
63
65
|
else
|
|
64
66
|
LLM::Error.new("Unexpected response").tap { _1.response = res }
|
|
65
67
|
end
|
|
@@ -49,6 +49,8 @@ class LLM::Ollama
|
|
|
49
49
|
LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
|
|
50
50
|
elsif res.rate_limited?
|
|
51
51
|
LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
|
|
52
|
+
elsif res.not_found?
|
|
53
|
+
LLM::NotFoundError.new("Server response: not found (404)").tap { _1.response = res }
|
|
52
54
|
else
|
|
53
55
|
LLM::Error.new("Unexpected response").tap { _1.response = res }
|
|
54
56
|
end
|
|
@@ -55,6 +55,8 @@ class LLM::OpenAI
|
|
|
55
55
|
LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
|
|
56
56
|
elsif res.rate_limited?
|
|
57
57
|
LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
|
|
58
|
+
elsif res.not_found?
|
|
59
|
+
LLM::NotFoundError.new("Server response: not found (404)").tap { _1.response = res }
|
|
58
60
|
else
|
|
59
61
|
error = body["error"] || {}
|
|
60
62
|
case error["type"]
|
data/lib/llm/version.rb
CHANGED
data/llm.gemspec
CHANGED
|
@@ -5,8 +5,8 @@ require_relative "lib/llm/version"
|
|
|
5
5
|
Gem::Specification.new do |spec|
|
|
6
6
|
spec.name = "llm.rb"
|
|
7
7
|
spec.version = LLM::VERSION
|
|
8
|
-
spec.authors = ["
|
|
9
|
-
spec.email = ["
|
|
8
|
+
spec.authors = ["0x1eef (Robert)", "Antar Azri", "Rodrigo Serrano", "Christos Maris"]
|
|
9
|
+
spec.email = ["robert@4.4bsd.dev"]
|
|
10
10
|
|
|
11
11
|
spec.summary = "Ruby's most capable AI runtime"
|
|
12
12
|
spec.description = <<~DESC
|
|
@@ -60,4 +60,5 @@ Gem::Specification.new do |spec|
|
|
|
60
60
|
spec.add_development_dependency "sqlite3", "~> 2.0"
|
|
61
61
|
spec.add_development_dependency "xchan.rb", "~> 0.20"
|
|
62
62
|
spec.add_development_dependency "pg", "~> 1.5"
|
|
63
|
+
spec.add_development_dependency "irb", "~> 1.18"
|
|
63
64
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llm.rb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 11.
|
|
4
|
+
version: 11.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
|
+
- 0x1eef (Robert)
|
|
7
8
|
- Antar Azri
|
|
8
|
-
- '0x1eef'
|
|
9
|
-
- Christos Maris
|
|
10
9
|
- Rodrigo Serrano
|
|
10
|
+
- Christos Maris
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
13
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
@@ -264,6 +264,20 @@ dependencies:
|
|
|
264
264
|
- - "~>"
|
|
265
265
|
- !ruby/object:Gem::Version
|
|
266
266
|
version: '1.5'
|
|
267
|
+
- !ruby/object:Gem::Dependency
|
|
268
|
+
name: irb
|
|
269
|
+
requirement: !ruby/object:Gem::Requirement
|
|
270
|
+
requirements:
|
|
271
|
+
- - "~>"
|
|
272
|
+
- !ruby/object:Gem::Version
|
|
273
|
+
version: '1.18'
|
|
274
|
+
type: :development
|
|
275
|
+
prerelease: false
|
|
276
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
277
|
+
requirements:
|
|
278
|
+
- - "~>"
|
|
279
|
+
- !ruby/object:Gem::Version
|
|
280
|
+
version: '1.18'
|
|
267
281
|
description: |
|
|
268
282
|
llm.rb is Ruby's most capable AI runtime.
|
|
269
283
|
|
|
@@ -279,8 +293,7 @@ description: |
|
|
|
279
293
|
tool execution through threads, tasks (via async gem), fibers, ractors,
|
|
280
294
|
and fork (via xchan.rb gem).
|
|
281
295
|
email:
|
|
282
|
-
-
|
|
283
|
-
- 0x1eef@hardenedbsd.org
|
|
296
|
+
- robert@4.4bsd.dev
|
|
284
297
|
executables: []
|
|
285
298
|
extensions: []
|
|
286
299
|
extra_rdoc_files: []
|