llm.rb 11.1.0 → 11.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +141 -12
- data/README.md +104 -69
- data/lib/llm/a2a/transport/http.rb +9 -8
- data/lib/llm/a2a.rb +14 -7
- data/lib/llm/agent.rb +31 -7
- data/lib/llm/context.rb +20 -6
- data/lib/llm/error.rb +4 -0
- data/lib/llm/function/array.rb +6 -0
- data/lib/llm/function.rb +26 -0
- data/lib/llm/json_adapter.rb +8 -2
- data/lib/llm/mcp/transport/http.rb +7 -5
- data/lib/llm/mcp.rb +6 -7
- data/lib/llm/provider.rb +1 -18
- data/lib/llm/providers/anthropic/error_handler.rb +2 -0
- data/lib/llm/providers/anthropic/files.rb +6 -6
- data/lib/llm/providers/anthropic/models.rb +1 -1
- data/lib/llm/providers/anthropic.rb +1 -1
- data/lib/llm/providers/bedrock/error_handler.rb +1 -1
- data/lib/llm/providers/bedrock/models.rb +4 -4
- data/lib/llm/providers/bedrock/signature.rb +3 -3
- data/lib/llm/providers/bedrock.rb +1 -1
- data/lib/llm/providers/google/error_handler.rb +2 -0
- data/lib/llm/providers/google/files.rb +5 -5
- data/lib/llm/providers/google/images.rb +1 -1
- data/lib/llm/providers/google/models.rb +1 -1
- data/lib/llm/providers/google.rb +2 -2
- data/lib/llm/providers/ollama/error_handler.rb +2 -0
- data/lib/llm/providers/ollama/models.rb +1 -1
- data/lib/llm/providers/ollama.rb +2 -2
- data/lib/llm/providers/openai/audio.rb +3 -3
- data/lib/llm/providers/openai/error_handler.rb +2 -0
- data/lib/llm/providers/openai/files.rb +5 -5
- data/lib/llm/providers/openai/images.rb +3 -3
- data/lib/llm/providers/openai/models.rb +1 -1
- data/lib/llm/providers/openai/moderations.rb +1 -1
- data/lib/llm/providers/openai/responses.rb +3 -3
- data/lib/llm/providers/openai/vector_stores.rb +11 -11
- data/lib/llm/providers/openai.rb +2 -2
- data/lib/llm/skill.rb +1 -1
- data/lib/llm/tool.rb +21 -0
- data/lib/llm/transport/curb.rb +246 -0
- data/lib/llm/transport/execution.rb +1 -1
- data/lib/llm/transport/http.rb +9 -4
- data/lib/llm/transport/net_http_adapter.rb +61 -0
- data/lib/llm/transport/persistent_http.rb +10 -5
- data/lib/llm/transport/request.rb +121 -0
- data/lib/llm/transport/response/curb.rb +112 -0
- data/lib/llm/transport/response.rb +1 -0
- data/lib/llm/transport/utils.rb +42 -17
- data/lib/llm/transport.rb +17 -45
- data/lib/llm/version.rb +1 -1
- data/llm.gemspec +6 -5
- metadata +25 -8
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 314712380b36e57b1492cef3850f5c4c2397522b74d3cc913fc0d09a796d8973
|
|
4
|
+
data.tar.gz: aefda31d90067a0a49ada778c6658243595b6698cc11ecf342a11e26f69ad93b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3a998015696027d232e0865c60ff840d11155206b705443035f6af7dcbb18f52d0e82b019cc82379a7ca919b60e3e50bf4156c8c4388beb8ba47a5d57775354a
|
|
7
|
+
data.tar.gz: 83faf786980a3307a760aec9698e29129dd34f8a838fa8f596caa60498f029bf3b5b4ac9400dd662c70a67a8c1eb748266d89b777773c8185025c8b8c86754bd
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,135 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
## v11.3.0
|
|
6
|
+
|
|
7
|
+
Changes since `v11.2.0`.
|
|
8
|
+
|
|
9
|
+
This release promotes `LLM::Agent` as the default high-level runtime,
|
|
10
|
+
raises `LLM::NotFoundError` for provider 404 responses, and adds
|
|
11
|
+
Symbol resolution to `LLM::Agent.confirm` and `LLM::Agent.skills` for
|
|
12
|
+
dynamic tool confirmation and skill lists.
|
|
13
|
+
|
|
14
|
+
### Add
|
|
15
|
+
|
|
16
|
+
* **Raise `LLM::NotFoundError` for provider 404 responses** <br>
|
|
17
|
+
Raise `LLM::NotFoundError` when a provider returns HTTP 404. One
|
|
18
|
+
example is calling the embeddings API on DeepSeek
|
|
19
|
+
(`LLM.deepseek(...).embed(["foobar"])`), which returns 404 because
|
|
20
|
+
DeepSeek does not implement that endpoint.
|
|
21
|
+
|
|
22
|
+
* **Add Symbol resolution to `LLM::Agent.confirm`** <br>
|
|
23
|
+
When `confirm` receives a single Symbol argument, it stores it
|
|
24
|
+
as-is instead of converting it to a string array. At initialization
|
|
25
|
+
time, `resolve_option` resolves the Symbol by calling the method
|
|
26
|
+
with that name on the agent instance, and the result is converted
|
|
27
|
+
to strings. This allows dynamic tool confirmation lists:
|
|
28
|
+
|
|
29
|
+
class MyAgent < LLM::Agent
|
|
30
|
+
confirm :tools_that_need_confirmation
|
|
31
|
+
|
|
32
|
+
def tools_that_need_confirmation
|
|
33
|
+
some_condition ? %w[delete destroy] : %w[delete]
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
Ported from llmrb/mruby-llm@89a232e3 and @2dd04e2d.
|
|
38
|
+
|
|
39
|
+
Extend the same pattern to `LLM::Agent.skills` so the skills DSL
|
|
40
|
+
accepts a Symbol that resolves through the agent instance at
|
|
41
|
+
initialization time.
|
|
42
|
+
|
|
43
|
+
### Change
|
|
44
|
+
|
|
45
|
+
* **Clarify `LLM::Agent` as the default high-level runtime** <br>
|
|
46
|
+
Document that `LLM::Context` remains at the heart of llm.rb, but
|
|
47
|
+
`LLM::Agent` is the better default unless an application needs advanced
|
|
48
|
+
manual tool loops. `LLM::Agent` manages the tool loop for callers and
|
|
49
|
+
enables guards against runaway or repeated tool-call loops.
|
|
50
|
+
|
|
51
|
+
## v11.2.0
|
|
52
|
+
|
|
53
|
+
Changes since `v11.1.0`.
|
|
54
|
+
|
|
55
|
+
This release adds `LLM::Function#skill?` and `LLM::Tool#skill?` so
|
|
56
|
+
callers can inspect whether a function or tool is backed by a skill.
|
|
57
|
+
|
|
58
|
+
It introduces `LLM::Transport::Request` as a transport-agnostic request
|
|
59
|
+
object so providers no longer depend directly on `Net::HTTP` request
|
|
60
|
+
classes, and adds an optional Curb (libcurl) backend alongside symbolic
|
|
61
|
+
transport shortcuts such as `transport: :curb`.
|
|
62
|
+
|
|
63
|
+
MCP and A2A clients now accept `persistent: true` matching provider configuration.
|
|
64
|
+
Several fixes land for tool return callback emission, function comparison by
|
|
65
|
+
tool call ID, function array filtering, skill tool inheritance, and JSON generator
|
|
66
|
+
state compatibility on Ruby 4.
|
|
67
|
+
|
|
68
|
+
### Add
|
|
69
|
+
|
|
70
|
+
* **Add `LLM::Function#skill?`** <br>
|
|
71
|
+
Add `skill?` to `LLM::Function` so callers can check whether a
|
|
72
|
+
function is backed by a skill tool.
|
|
73
|
+
|
|
74
|
+
* **Add `LLM::Tool.skill?` and `LLM::Tool#skill?`** <br>
|
|
75
|
+
Add class-level `skill?` and instance-level `skill?` to
|
|
76
|
+
`LLM::Tool`, matching the existing `mcp?` and `a2a?` pattern.
|
|
77
|
+
|
|
78
|
+
* **Add `LLM::Transport::Request`** <br>
|
|
79
|
+
Add `LLM::Transport::Request` as a transport-agnostic request object
|
|
80
|
+
and update providers to build requests without depending directly on
|
|
81
|
+
Net::HTTP request classes. The built-in Net::HTTP transports still
|
|
82
|
+
accept existing Net::HTTP request objects through a compatibility
|
|
83
|
+
bridge, while alternative transports can handle the generic request
|
|
84
|
+
shape directly.
|
|
85
|
+
|
|
86
|
+
* **Add optional Curb transport support** <br>
|
|
87
|
+
Add `LLM::Transport::Curb`, an optional libcurl-backed transport
|
|
88
|
+
that can be selected with `transport: :curb`. Providers already
|
|
89
|
+
emit `LLM::Transport::Request` objects, so the Curb backend can
|
|
90
|
+
execute requests without routing through Net::HTTP.
|
|
91
|
+
|
|
92
|
+
* **Add symbolic transport shortcuts** <br>
|
|
93
|
+
Allow providers, MCP HTTP clients, and A2A HTTP clients to accept
|
|
94
|
+
transport shortcuts such as `transport: :curb` and
|
|
95
|
+
`transport: :net_http_persistent`.
|
|
96
|
+
|
|
97
|
+
* **Add persistent HTTP selection to MCP and A2A clients** <br>
|
|
98
|
+
Allow MCP and A2A HTTP clients to accept `persistent: true`, matching
|
|
99
|
+
provider configuration and selecting the persistent Net::HTTP
|
|
100
|
+
transport by default.
|
|
101
|
+
|
|
102
|
+
### Fix
|
|
103
|
+
|
|
104
|
+
* **Support JSON generation state on Ruby 4** <br>
|
|
105
|
+
Handle JSON generator state objects in the standard JSON adapter so
|
|
106
|
+
schema objects serialize correctly when Ruby 4 calls custom `to_json`
|
|
107
|
+
methods during provider request generation.
|
|
108
|
+
|
|
109
|
+
* **Emit tool return callbacks for direct context waits** <br>
|
|
110
|
+
Emit `LLM::Stream#on_tool_return` when `LLM::Context#wait` executes
|
|
111
|
+
pending tool work directly instead of draining `LLM::Stream::Queue`.
|
|
112
|
+
|
|
113
|
+
* **Emit confirmed tool return callbacks once** <br>
|
|
114
|
+
Emit `LLM::Stream#on_tool_return` for confirmed and cancelled tool
|
|
115
|
+
calls, and exclude confirmed functions from later waits so mixed
|
|
116
|
+
confirmed and unconfirmed tool batches do not execute confirmed tools
|
|
117
|
+
twice.
|
|
118
|
+
|
|
119
|
+
* **Compare functions by tool call ID** <br>
|
|
120
|
+
Add `LLM::Function#==`, `#eql?`, and `#hash` so pending function
|
|
121
|
+
collections can compare tool calls by provider-assigned ID instead of
|
|
122
|
+
object identity.
|
|
123
|
+
|
|
124
|
+
* **Preserve function array behavior after filtering** <br>
|
|
125
|
+
Preserve `LLM::Function::Array` behavior when subtracting function
|
|
126
|
+
arrays so filtered tool batches can still spawn through the normal
|
|
127
|
+
function array API.
|
|
128
|
+
|
|
129
|
+
* **Prevent skills from inheriting skill-backed tools** <br>
|
|
130
|
+
Exclude skill-backed tools when a skill sub-agent uses `tools:
|
|
131
|
+
inherit`, preventing skills loaded through a parent context from
|
|
132
|
+
being recursively exposed to nested skill agents.
|
|
133
|
+
|
|
5
134
|
## v11.1.0
|
|
6
135
|
|
|
7
136
|
Changes since `v11.0.0`.
|
|
@@ -133,13 +262,13 @@ requests outside `#session`, `LLM::Function#def` as a short alias for
|
|
|
133
262
|
|
|
134
263
|
* **Fix context and agent JSON serialization through `LLM.json`** <br>
|
|
135
264
|
Fix `LLM::Context#to_json` and `LLM::Agent#to_json` to serialize
|
|
136
|
-
through `LLM.json.dump(...)` instead of plain `to_json`.
|
|
265
|
+
through `LLM.json.dump(...)` instead of plain `to_json`.
|
|
137
266
|
|
|
138
267
|
* **Fix block-form ORM agent DSL forwarding** <br>
|
|
139
268
|
Fix block-form `model { ... }`, `tools { ... }`, and
|
|
140
269
|
`schema { ... }` declarations in the ActiveRecord and Sequel agent
|
|
141
270
|
wrappers so persisted agent models configure the internal agent class
|
|
142
|
-
the same way
|
|
271
|
+
the same way `LLM::Agent` does.
|
|
143
272
|
|
|
144
273
|
* **Fix missing `skills` in ORM agent wrappers** <br>
|
|
145
274
|
Fix the ActiveRecord and Sequel agent wrappers to expose `skills`, so
|
|
@@ -382,7 +511,7 @@ DSML tool-marker filtering in streamed text.
|
|
|
382
511
|
blocks that Bedrock rejects.
|
|
383
512
|
|
|
384
513
|
* **Suppress Bedrock DSML tool markers in streamed text** <br>
|
|
385
|
-
Filter
|
|
514
|
+
Filter `"\u003c\u003cDSML\u003efunction_calls\u003e\u003e"` markers out of streamed Bedrock
|
|
386
515
|
assistant text so tool-call sentinels do not leak into user-visible
|
|
387
516
|
output.
|
|
388
517
|
|
|
@@ -392,7 +521,7 @@ Changes since `v7.0.0`.
|
|
|
392
521
|
|
|
393
522
|
This release adds Unix-fork concurrency for process-isolated tool
|
|
394
523
|
execution, extends `LLM::Object` with `#merge` and `#delete`, and drops
|
|
395
|
-
Ruby 3.2 support due to
|
|
524
|
+
Ruby 3.2 support due to a segfault observed with the `:fork` path. It
|
|
396
525
|
promotes `LLM::Pipe` to the top-level namespace and adds
|
|
397
526
|
`persistent: true` on `LLM::MCP.http` for direct persistent transport
|
|
398
527
|
configuration. `LLM::Function#runner` is exposed as public API, agent
|
|
@@ -533,7 +662,7 @@ provider usage has been recorded yet.
|
|
|
533
662
|
buffer API.
|
|
534
663
|
|
|
535
664
|
* **Support percentage compaction token thresholds** <br>
|
|
536
|
-
Let `LLM::Compactor` accept `token_threshold:` values like
|
|
665
|
+
Let `LLM::Compactor` accept `token_threshold:` values like `"90%"` so
|
|
537
666
|
compaction can trigger at a percentage of the active model context
|
|
538
667
|
window.
|
|
539
668
|
|
|
@@ -692,7 +821,7 @@ interruption use the active per-call stream correctly.
|
|
|
692
821
|
|
|
693
822
|
* **Refresh provider model metadata** <br>
|
|
694
823
|
Add current DeepSeek and OpenAI model metadata to `data/` and update the
|
|
695
|
-
Google
|
|
824
|
+
Google Gemini model entry to match the current provider naming.
|
|
696
825
|
|
|
697
826
|
### Fix
|
|
698
827
|
|
|
@@ -1133,12 +1262,12 @@ Changes since `v4.14.0`.
|
|
|
1133
1262
|
storage when Sequel JSON typecasting is enabled.
|
|
1134
1263
|
|
|
1135
1264
|
* **Improve streaming parser performance** <br>
|
|
1136
|
-
In the local replay-based `stream_parser` benchmark versus
|
|
1137
|
-
|
|
1265
|
+
In the local replay-based `stream_parser` benchmark versus `v4.14.0`
|
|
1266
|
+
(median of 20 samples, 5000 iterations), plain Ruby is a
|
|
1138
1267
|
small overall win: the generic eventstream path is about 0.4%
|
|
1139
1268
|
faster, the OpenAI stream parser is about 0.5% faster, and the
|
|
1140
1269
|
OpenAI Responses parser is about 1.6% faster, with unchanged
|
|
1141
|
-
allocations. Under YJIT on the same benchmark, the generic
|
|
1270
|
+
allocations. Under YJIT on the same benchmark harness, the generic
|
|
1142
1271
|
eventstream path is about 0.9% faster and the OpenAI stream parser
|
|
1143
1272
|
is about 0.4% faster, while the OpenAI Responses parser is about
|
|
1144
1273
|
0.7% slower, also with unchanged allocations.
|
|
@@ -1180,7 +1309,7 @@ parallel tool calls can safely share one connection.
|
|
|
1180
1309
|
* **Reduce provider streaming allocations** <br>
|
|
1181
1310
|
Decode streamed provider payloads directly in
|
|
1182
1311
|
`LLM::Provider::Transport::HTTP` before handing them to provider
|
|
1183
|
-
parsers, which cuts allocation churn and gives a
|
|
1312
|
+
parsers, which cuts allocation churn and gives a small streaming
|
|
1184
1313
|
speed bump.
|
|
1185
1314
|
|
|
1186
1315
|
* **Reduce generic SSE parser allocations** <br>
|
|
@@ -1316,7 +1445,7 @@ Changes since `v4.9.0`.
|
|
|
1316
1445
|
|
|
1317
1446
|
- Add HTTP transport for MCP with `LLM::MCP::Transport::HTTP` for remote servers
|
|
1318
1447
|
- Add JSON Schema union types (`any_of`, `all_of`, `one_of`) with parser integration
|
|
1319
|
-
- Add JSON Schema type array union support (e.g.,
|
|
1448
|
+
- Add JSON Schema type array union support (e.g., `"type": ["object", "null"]`)
|
|
1320
1449
|
- Add JSON Schema type inference from `const`, `enum`, or `default` fields
|
|
1321
1450
|
|
|
1322
1451
|
### Change
|
|
@@ -1417,7 +1546,7 @@ Notable merged work in this range includes:
|
|
|
1417
1546
|
- `Add rack + websocket example (#130)`
|
|
1418
1547
|
- `feat(gemspec): add changelog URI (#136)`
|
|
1419
1548
|
- `feat(function): alias ThreadGroup#wait as ThreadGroup#value (#62)`
|
|
1420
|
-
- README and screencast refresh across `#66`, `#68`, `#71`, and
|
|
1549
|
+
- `README and screencast refresh across `#66`, `#68`, `#71`, and
|
|
1421
1550
|
`#72`
|
|
1422
1551
|
- `chore(bot): update deprecation warning from v5.0 to v6.0`
|
|
1423
1552
|
- `fix(deepseek): tolerate malformed tool arguments`
|
data/README.md
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
<img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License">
|
|
12
12
|
</a>
|
|
13
13
|
<a href="https://github.com/llmrb/llm.rb/tags">
|
|
14
|
-
<img src="https://img.shields.io/badge/version-11.
|
|
14
|
+
<img src="https://img.shields.io/badge/version-11.3.0-green.svg?" alt="Version">
|
|
15
15
|
</a>
|
|
16
16
|
</p>
|
|
17
17
|
|
|
@@ -30,10 +30,27 @@ also includes built-in ActiveRecord and Sequel support, plus concurrent
|
|
|
30
30
|
tool execution through threads, tasks (via async gem), fibers, ractors,
|
|
31
31
|
and fork (via xchan.rb gem).
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
## Services
|
|
34
|
+
|
|
35
|
+
The llm.rb runtime and its forks
|
|
36
|
+
([mruby-llm](https://github.com/llmrb/mruby-llm),
|
|
37
|
+
[wasm-llm](https://github.com/llmrb/wasm-llm))
|
|
38
|
+
power a growing family of AI applications, and
|
|
39
|
+
services. The following applications are publicly
|
|
40
|
+
accessible over SSH and are free to try. No account
|
|
41
|
+
required. Nothing to install.
|
|
42
|
+
|
|
43
|
+
#### matz - the mruby expert
|
|
44
|
+
|
|
45
|
+
> ssh matz@r.uby.dev
|
|
46
|
+
|
|
47
|
+
See [https://r.uby.dev/matz](https://r.uby.dev/matz) for more information.
|
|
48
|
+
|
|
49
|
+
#### robert - the freebsd expert
|
|
50
|
+
|
|
51
|
+
> ssh robert@4.4bsd.dev
|
|
52
|
+
|
|
53
|
+
See [https://4.4bsd.dev/robert](https://4.4bsd.dev/robert) for more information.
|
|
37
54
|
|
|
38
55
|
## Quick start
|
|
39
56
|
|
|
@@ -138,10 +155,10 @@ to either
|
|
|
138
155
|
or
|
|
139
156
|
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html).
|
|
140
157
|
In this example, the MCP server runs over stdio and
|
|
141
|
-
[LLM::
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
158
|
+
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
159
|
+
manages the tool loop. For **stdio**, `mcp.session` is the preferred
|
|
160
|
+
pattern because it keeps one MCP session alive across discovery and
|
|
161
|
+
tool calls:
|
|
145
162
|
|
|
146
163
|
```ruby
|
|
147
164
|
require "llm"
|
|
@@ -150,9 +167,8 @@ llm = LLM.openai(key: ENV["KEY"])
|
|
|
150
167
|
mcp = LLM::MCP.stdio(argv: ["ruby", "server.rb"])
|
|
151
168
|
|
|
152
169
|
mcp.session do
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
170
|
+
agent = LLM::Agent.new(llm, stream: $stdout, tools: mcp.tools)
|
|
171
|
+
agent.talk "Use the available tools to inspect the environment."
|
|
156
172
|
end
|
|
157
173
|
```
|
|
158
174
|
|
|
@@ -167,15 +183,16 @@ require "llm"
|
|
|
167
183
|
llm = LLM.openai(key: ENV["KEY"])
|
|
168
184
|
mcp = LLM::MCP.stdio(argv: ["ruby", "server.rb"])
|
|
169
185
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
186
|
+
agent = LLM::Agent.new(llm, tools: mcp.tools)
|
|
187
|
+
agent.talk("Use the available tools to inspect the environment.")
|
|
173
188
|
```
|
|
174
189
|
|
|
175
190
|
The HTTP transport can be used with or without the `session` method,
|
|
176
191
|
and unlike the stdio transport it can remain efficient without the
|
|
177
192
|
`session` method through a persistent connection pool that is available
|
|
178
|
-
through the
|
|
193
|
+
through the
|
|
194
|
+
[LLM::Transport.net_http_persistent](https://0x1eef.github.io/x/llm.rb/LLM/Transport.html#method-c-net_http_persistent)
|
|
195
|
+
transport:
|
|
179
196
|
|
|
180
197
|
```ruby
|
|
181
198
|
require "llm"
|
|
@@ -183,12 +200,11 @@ require "llm"
|
|
|
183
200
|
llm = LLM.openai(key: ENV["KEY"])
|
|
184
201
|
mcp = LLM::MCP.http(
|
|
185
202
|
url: "https://remote-mcp.example.com",
|
|
186
|
-
transport:
|
|
203
|
+
transport: :net_http_persistent
|
|
187
204
|
)
|
|
188
205
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
206
|
+
agent = LLM::Agent.new(llm, tools: mcp.tools)
|
|
207
|
+
agent.talk("Use the available tools to inspect the environment.")
|
|
192
208
|
```
|
|
193
209
|
|
|
194
210
|
#### A2A (Agent 2 Agent)
|
|
@@ -212,9 +228,8 @@ a2a = LLM::A2A.rest(
|
|
|
212
228
|
headers: {"Authorization" => "Bearer token"}
|
|
213
229
|
)
|
|
214
230
|
llm = LLM.openai(key: ENV["KEY"])
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
231
|
+
agent = LLM::Agent.new(llm, tools: a2a.skills)
|
|
232
|
+
agent.talk "Analyze this CSV and summarize the trends."
|
|
218
233
|
```
|
|
219
234
|
|
|
220
235
|
Use persistent HTTP connections:
|
|
@@ -224,7 +239,7 @@ require "llm"
|
|
|
224
239
|
|
|
225
240
|
a2a = LLM::A2A.rest(
|
|
226
241
|
url: "https://remote-agent.example.com",
|
|
227
|
-
transport:
|
|
242
|
+
transport: :net_http_persistent
|
|
228
243
|
)
|
|
229
244
|
```
|
|
230
245
|
|
|
@@ -232,6 +247,27 @@ For more on direct messaging, task operations, push notification
|
|
|
232
247
|
configs, and JSON-RPC, see the
|
|
233
248
|
[LLM::A2A API docs](https://0x1eef.github.io/x/llm.rb/LLM/A2A.html).
|
|
234
249
|
|
|
250
|
+
#### Transports
|
|
251
|
+
|
|
252
|
+
Providers use Ruby's standard library Net::HTTP transport by default.
|
|
253
|
+
You can opt into persistent Net::HTTP connections with `persistent: true`,
|
|
254
|
+
or provide a transport shortcut when you want a different backend.
|
|
255
|
+
`transport: :curb` uses libcurl through the optional `curb` gem.
|
|
256
|
+
|
|
257
|
+
Custom transports can implement the
|
|
258
|
+
[LLM::Transport](https://0x1eef.github.io/x/llm.rb/LLM/Transport.html)
|
|
259
|
+
interface and receive transport-agnostic
|
|
260
|
+
[LLM::Transport::Request](https://0x1eef.github.io/x/llm.rb/LLM/Transport/Request.html)
|
|
261
|
+
objects from providers.
|
|
262
|
+
|
|
263
|
+
```ruby
|
|
264
|
+
require "llm"
|
|
265
|
+
|
|
266
|
+
llm = LLM.openai(key: ENV["KEY"], persistent: true)
|
|
267
|
+
llm = LLM.openai(key: ENV["KEY"], transport: :net_http_persistent)
|
|
268
|
+
llm = LLM.openai(key: ENV["KEY"], transport: :curb)
|
|
269
|
+
```
|
|
270
|
+
|
|
235
271
|
#### Skills
|
|
236
272
|
|
|
237
273
|
Skills are reusable instructions loaded from a `SKILL.md` directory. They let
|
|
@@ -294,8 +330,8 @@ class Stream < LLM::Stream
|
|
|
294
330
|
end
|
|
295
331
|
|
|
296
332
|
llm = LLM.openai(key: ENV["KEY"])
|
|
297
|
-
|
|
298
|
-
|
|
333
|
+
agent = LLM::Agent.new(llm, stream: Stream.new)
|
|
334
|
+
agent.talk "Write a haiku about Ruby."
|
|
299
335
|
```
|
|
300
336
|
|
|
301
337
|
#### LLM::Stream (advanced)
|
|
@@ -352,30 +388,31 @@ agent.talk "Read README.md and CHANGELOG.md and compare them."
|
|
|
352
388
|
|
|
353
389
|
#### Serialization
|
|
354
390
|
|
|
355
|
-
The [`LLM::
|
|
391
|
+
The [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
356
392
|
object can be serialized to JSON, which makes it suitable for storing
|
|
357
393
|
in a file, a database column, or a Redis queue. The built-in
|
|
358
|
-
ActiveRecord and Sequel plugins are built on top of
|
|
394
|
+
ActiveRecord and Sequel plugins are built on top of the same underlying
|
|
395
|
+
serialization feature:
|
|
359
396
|
|
|
360
397
|
```ruby
|
|
361
398
|
require "llm"
|
|
362
399
|
|
|
363
400
|
llm = LLM.openai(key: ENV["KEY"])
|
|
364
401
|
|
|
365
|
-
# Serialize
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
string =
|
|
402
|
+
# Serialize an agent
|
|
403
|
+
agent1 = LLM::Agent.new(llm)
|
|
404
|
+
agent1.talk "Remember that my favorite language is Ruby"
|
|
405
|
+
string = agent1.to_json
|
|
369
406
|
|
|
370
|
-
# Restore
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
407
|
+
# Restore an agent (from JSON)
|
|
408
|
+
agent2 = LLM::Agent.new(llm, stream: $stdout)
|
|
409
|
+
agent2.restore(string:)
|
|
410
|
+
agent2.talk "What is my favorite language?"
|
|
374
411
|
```
|
|
375
412
|
|
|
376
413
|
#### ask
|
|
377
414
|
|
|
378
|
-
[`LLM::
|
|
415
|
+
[`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
379
416
|
also provides `ask`, a convenience interface that is compatible with
|
|
380
417
|
RubyLLM's `ask` method. It accepts a prompt, an optional `with:`
|
|
381
418
|
attachment path or paths, an optional `stream:` target, and an optional
|
|
@@ -387,11 +424,11 @@ so use `.content` when you want the text directly:
|
|
|
387
424
|
require "llm"
|
|
388
425
|
|
|
389
426
|
llm = LLM.openai(key: ENV["KEY"])
|
|
390
|
-
|
|
427
|
+
agent = LLM::Agent.new(llm)
|
|
391
428
|
|
|
392
|
-
puts
|
|
393
|
-
puts
|
|
394
|
-
|
|
429
|
+
puts agent.ask("Hello world").content
|
|
430
|
+
puts agent.ask("Summarize this document.", with: "README.md").content
|
|
431
|
+
agent.ask("Stream this reply.") { $stdout << _1 }
|
|
395
432
|
```
|
|
396
433
|
|
|
397
434
|
## Installation
|
|
@@ -404,8 +441,8 @@ gem install llm.rb
|
|
|
404
441
|
|
|
405
442
|
#### REPL
|
|
406
443
|
|
|
407
|
-
This example uses [`LLM::
|
|
408
|
-
|
|
444
|
+
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
445
|
+
for an interactive REPL. <br> See the
|
|
409
446
|
[deepdive (web)](https://llmrb.github.io/llm.rb/) or
|
|
410
447
|
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
411
448
|
|
|
@@ -413,11 +450,11 @@ directly for an interactive REPL. <br> See the
|
|
|
413
450
|
require "llm"
|
|
414
451
|
|
|
415
452
|
llm = LLM.openai(key: ENV["KEY"])
|
|
416
|
-
|
|
453
|
+
agent = LLM::Agent.new(llm, stream: $stdout)
|
|
417
454
|
|
|
418
455
|
loop do
|
|
419
456
|
print "> "
|
|
420
|
-
|
|
457
|
+
agent.talk(STDIN.gets || break)
|
|
421
458
|
puts
|
|
422
459
|
end
|
|
423
460
|
```
|
|
@@ -426,36 +463,36 @@ end
|
|
|
426
463
|
|
|
427
464
|
In llm.rb, a prompt can be a string, an [`LLM::Prompt`](https://0x1eef.github.io/x/llm.rb/LLM/Prompt.html), or an array.
|
|
428
465
|
When you use an array, each element can be plain text or a tagged object such as
|
|
429
|
-
[`
|
|
430
|
-
[`
|
|
431
|
-
or [`
|
|
466
|
+
[`agent.image_url(...)`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#image_url-instance_method),
|
|
467
|
+
[`agent.local_file(...)`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#local_file-instance_method),
|
|
468
|
+
or [`agent.remote_file(...)`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#remote_file-instance_method).
|
|
432
469
|
Those tagged objects carry the metadata the provider adapter needs to turn one
|
|
433
470
|
Ruby prompt into the provider-specific multimodal request schema.
|
|
434
471
|
|
|
435
472
|
If the model understands that file type, you can attach a local file directly
|
|
436
|
-
with `
|
|
473
|
+
with `agent.ask(..., with: path)` instead of uploading it first through a
|
|
437
474
|
provider Files API. Under the hood, llm.rb tags the path as a
|
|
438
|
-
[`
|
|
475
|
+
[`agent.local_file(...)`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#local_file-instance_method)
|
|
439
476
|
object:
|
|
440
477
|
|
|
441
478
|
```ruby
|
|
442
479
|
require "llm"
|
|
443
480
|
|
|
444
481
|
llm = LLM.openai(key: ENV["KEY"])
|
|
445
|
-
|
|
446
|
-
puts
|
|
482
|
+
agent = LLM::Agent.new(llm)
|
|
483
|
+
puts agent.ask("Summarize this document.", with: "README.md").content
|
|
447
484
|
```
|
|
448
485
|
|
|
449
486
|
#### Context Compaction
|
|
450
487
|
|
|
451
|
-
This example uses [`LLM::
|
|
488
|
+
This example uses [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
452
489
|
[`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html), and
|
|
453
490
|
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) together so
|
|
454
|
-
long-lived
|
|
491
|
+
long-lived conversations can summarize older history and expose the lifecycle
|
|
455
492
|
through stream hooks. This approach is inspired by General Intelligence
|
|
456
493
|
Systems. The
|
|
457
494
|
compactor can also use its own `model:` if you want summarization to run on a
|
|
458
|
-
different model from the main
|
|
495
|
+
different model from the main conversation. `token_threshold:` accepts either a
|
|
459
496
|
fixed token count or a percentage string like `"90%"`, which resolves
|
|
460
497
|
against the active model context window and triggers compaction once total
|
|
461
498
|
token usage goes over that percentage. See the
|
|
@@ -476,7 +513,7 @@ class Stream < LLM::Stream
|
|
|
476
513
|
end
|
|
477
514
|
|
|
478
515
|
llm = LLM.openai(key: ENV["KEY"])
|
|
479
|
-
|
|
516
|
+
agent = LLM::Agent.new(
|
|
480
517
|
llm,
|
|
481
518
|
stream: Stream.new,
|
|
482
519
|
compactor: {
|
|
@@ -495,9 +532,8 @@ visible assistant output. See the
|
|
|
495
532
|
[deepdive (web)](https://llmrb.github.io/llm.rb/) or
|
|
496
533
|
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
497
534
|
|
|
498
|
-
To use the Responses API (OpenAI-specific), initialize
|
|
499
|
-
|
|
500
|
-
`talk` for turns.
|
|
535
|
+
To use the Responses API (OpenAI-specific), initialize an agent with
|
|
536
|
+
`mode: :responses` and keep using `talk` for turns.
|
|
501
537
|
|
|
502
538
|
```ruby
|
|
503
539
|
require "llm"
|
|
@@ -513,20 +549,20 @@ class Stream < LLM::Stream
|
|
|
513
549
|
end
|
|
514
550
|
|
|
515
551
|
llm = LLM.openai(key: ENV["KEY"])
|
|
516
|
-
|
|
552
|
+
agent = LLM::Agent.new(
|
|
517
553
|
llm,
|
|
518
554
|
model: "gpt-5.4-mini",
|
|
519
555
|
mode: :responses,
|
|
520
556
|
reasoning: {effort: "medium"},
|
|
521
557
|
stream: Stream.new
|
|
522
558
|
)
|
|
523
|
-
|
|
559
|
+
agent.talk("Solve 17 * 19 and show your work.")
|
|
524
560
|
```
|
|
525
561
|
|
|
526
562
|
#### Request Cancellation
|
|
527
563
|
|
|
528
564
|
Need to cancel a stream? llm.rb has you covered through
|
|
529
|
-
[`LLM::
|
|
565
|
+
[`LLM::Agent#interrupt!`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html#interrupt-21-instance_method).
|
|
530
566
|
<br> See the [deepdive (web)](https://llmrb.github.io/llm.rb/)
|
|
531
567
|
or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
532
568
|
|
|
@@ -535,15 +571,15 @@ require "llm"
|
|
|
535
571
|
require "io/console"
|
|
536
572
|
|
|
537
573
|
llm = LLM.openai(key: ENV["KEY"])
|
|
538
|
-
|
|
574
|
+
agent = LLM::Agent.new(llm, stream: $stdout)
|
|
539
575
|
worker = Thread.new do
|
|
540
|
-
|
|
576
|
+
agent.talk("Write a very long essay about network protocols.")
|
|
541
577
|
rescue LLM::Interrupt
|
|
542
578
|
puts "Request was interrupted!"
|
|
543
579
|
end
|
|
544
580
|
|
|
545
581
|
STDIN.getch
|
|
546
|
-
|
|
582
|
+
agent.interrupt!
|
|
547
583
|
worker.join
|
|
548
584
|
```
|
|
549
585
|
|
|
@@ -704,7 +740,7 @@ end
|
|
|
704
740
|
|
|
705
741
|
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
706
742
|
over HTTP so remote GitHub MCP tools run through the same
|
|
707
|
-
`LLM::
|
|
743
|
+
`LLM::Agent` tool path as local tools. It expects a GitHub token in
|
|
708
744
|
`ENV["GITHUB_PAT"]`. See the
|
|
709
745
|
[deepdive (web)](https://llmrb.github.io/llm.rb/) or
|
|
710
746
|
[deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
@@ -720,9 +756,8 @@ mcp = LLM::MCP.http(
|
|
|
720
756
|
persistent: true
|
|
721
757
|
)
|
|
722
758
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
ctx.talk(ctx.wait(:call)) while ctx.functions?
|
|
759
|
+
agent = LLM::Agent.new(llm, stream: $stdout, tools: mcp.tools)
|
|
760
|
+
agent.talk("Pull information about my GitHub account.")
|
|
726
761
|
```
|
|
727
762
|
|
|
728
763
|
## Resources
|