llm.rb 4.13.0 → 4.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +107 -0
- data/README.md +82 -32
- data/lib/llm/context.rb +25 -10
- data/lib/llm/error.rb +4 -0
- data/lib/llm/eventhandler.rb +16 -12
- data/lib/llm/eventstream/event.rb +15 -5
- data/lib/llm/eventstream/parser.rb +64 -17
- data/lib/llm/mcp/command.rb +1 -1
- data/lib/llm/mcp/mailbox.rb +23 -0
- data/lib/llm/mcp/pipe.rb +1 -1
- data/lib/llm/mcp/router.rb +44 -0
- data/lib/llm/mcp/rpc.rb +29 -18
- data/lib/llm/mcp/transport/http/event_handler.rb +11 -9
- data/lib/llm/mcp/transport/http.rb +2 -2
- data/lib/llm/mcp/transport/stdio.rb +1 -1
- data/lib/llm/mcp.rb +5 -2
- data/lib/llm/provider/transport/http/execution.rb +115 -0
- data/lib/llm/provider/transport/http/interruptible.rb +109 -0
- data/lib/llm/provider/transport/http/stream_decoder.rb +92 -0
- data/lib/llm/provider/transport/http.rb +144 -0
- data/lib/llm/provider.rb +17 -103
- data/lib/llm/providers/anthropic/stream_parser.rb +6 -3
- data/lib/llm/providers/google/stream_parser.rb +6 -3
- data/lib/llm/providers/ollama/stream_parser.rb +3 -2
- data/lib/llm/providers/openai/responses/stream_parser.rb +216 -91
- data/lib/llm/providers/openai/stream_parser.rb +111 -57
- data/lib/llm/response.rb +12 -4
- data/lib/llm/sequel/plugin.rb +252 -0
- data/lib/llm/stream/queue.rb +2 -2
- data/lib/llm/stream.rb +2 -2
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +8 -0
- data/lib/sequel/plugins/llm.rb +8 -0
- metadata +9 -2
- data/lib/llm/client.rb +0 -36
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 40217f9b44b00028739994a8f6f6a278b366d7fa7f4799b86afd2b793f367084
|
|
4
|
+
data.tar.gz: cf7e6d7935cf6ab8479ac864e09ea7a4403a97345f98e30ae03de9ae941c97a0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3850bb93244032d3ee721c1a7bc85efd0e86f605a421960abfbab56b89b9b4c36d97efb68970759561f08f165069e4c2e213132c0bb485b3366c57bebb71e3ad
|
|
7
|
+
data.tar.gz: 2856c27c38e6d6d8d659d8a21c13b4ea514fdc0a0bd024935b2ea2f20d10eac96f324489f1c06a08c9bad6be5f97756eedc38b9f301e7a78533a8422f02cd329
|
data/CHANGELOG.md
CHANGED
|
@@ -2,8 +2,115 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
Changes since `v4.15.0`.
|
|
6
|
+
|
|
7
|
+
## v4.15.0
|
|
8
|
+
|
|
9
|
+
Changes since `v4.14.0`.
|
|
10
|
+
|
|
11
|
+
### Change
|
|
12
|
+
|
|
13
|
+
* **Reduce OpenAI stream parser merge overhead** <br>
|
|
14
|
+
Special-case the most common single-field deltas, streamline
|
|
15
|
+
incremental tool-call merging, and avoid repeated JSON parse attempts
|
|
16
|
+
until streamed tool arguments look complete.
|
|
17
|
+
|
|
18
|
+
* **Cache streaming callback capabilities in parsers** <br>
|
|
19
|
+
Cache callback support checks once at parser initialization time in
|
|
20
|
+
the OpenAI, OpenAI Responses, Anthropic, Google, and Ollama stream
|
|
21
|
+
parsers instead of repeating `respond_to?` checks on hot streaming
|
|
22
|
+
paths.
|
|
23
|
+
|
|
24
|
+
* **Reduce OpenAI Responses parser lookup overhead** <br>
|
|
25
|
+
Special-case the hot Responses API event paths and cache the current
|
|
26
|
+
output item and content part so streamed output text deltas do less
|
|
27
|
+
repeated nested lookup work.
|
|
28
|
+
|
|
29
|
+
* **Add a Sequel context persistence plugin** <br>
|
|
30
|
+
Add `plugin :llm` for Sequel models so apps can persist
|
|
31
|
+
`LLM::Context` state with default columns and pass provider setup
|
|
32
|
+
through `provider:` when needed. The plugin now also supports
|
|
33
|
+
`format: :string`, `:json`, or `:jsonb` for text and native JSON
|
|
34
|
+
storage when Sequel JSON typecasting is enabled.
|
|
35
|
+
|
|
36
|
+
* **Improve streaming parser performance** <br>
|
|
37
|
+
In the local replay-based `stream_parser` benchmark versus
|
|
38
|
+
`v4.14.0` (median of 20 samples, 5000 iterations), plain Ruby is a
|
|
39
|
+
small overall win: the generic eventstream path is about 0.4%
|
|
40
|
+
faster, the OpenAI stream parser is about 0.5% faster, and the
|
|
41
|
+
OpenAI Responses parser is about 1.6% faster, with unchanged
|
|
42
|
+
allocations. Under YJIT on the same benchmark, the generic
|
|
43
|
+
eventstream path is about 0.9% faster and the OpenAI stream parser
|
|
44
|
+
is about 0.4% faster, while the OpenAI Responses parser is about
|
|
45
|
+
0.7% slower, also with unchanged allocations.
|
|
46
|
+
|
|
47
|
+
Compared to `v4.13.0`, the larger `v4.14.0` streaming gains still
|
|
48
|
+
hold. The generic eventstream path remains dramatically faster than
|
|
49
|
+
`v4.13.0`, the OpenAI stream parser remains modestly faster, and the
|
|
50
|
+
OpenAI Responses parser is roughly flat to slightly better depending
|
|
51
|
+
on runtime. In other words, current keeps the large eventstream win
|
|
52
|
+
from `v4.14.0`, adds only small incremental changes beyond that, and
|
|
53
|
+
does not turn the post-`v4.14.0` parser work into another large
|
|
54
|
+
benchmark jump.
|
|
55
|
+
|
|
56
|
+
## v4.14.0
|
|
57
|
+
|
|
5
58
|
Changes since `v4.13.0`.
|
|
6
59
|
|
|
60
|
+
This release adds request interruption for contexts, reworks provider
|
|
61
|
+
HTTP internals for lower-overhead streaming, and fixes MCP clients so
|
|
62
|
+
parallel tool calls can safely share one connection.
|
|
63
|
+
|
|
64
|
+
### Add
|
|
65
|
+
|
|
66
|
+
* **Add request interruption support** <br>
|
|
67
|
+
Add `LLM::Context#interrupt!`, `LLM::Context#cancel!`, and
|
|
68
|
+
`LLM::Interrupt` for interrupting in-flight provider requests,
|
|
69
|
+
inspired by Go's context cancellation.
|
|
70
|
+
|
|
71
|
+
### Change
|
|
72
|
+
|
|
73
|
+
* **Rework provider HTTP transport internals** <br>
|
|
74
|
+
Rework provider HTTP around `LLM::Provider::Transport::HTTP` with
|
|
75
|
+
explicit transient and persistent transport handling.
|
|
76
|
+
|
|
77
|
+
* **Reduce SSE parser overhead** <br>
|
|
78
|
+
Dispatch raw parsed values to registered visitors instead of building
|
|
79
|
+
an `Event` object for every streamed line.
|
|
80
|
+
|
|
81
|
+
* **Reduce provider streaming allocations** <br>
|
|
82
|
+
Decode streamed provider payloads directly in
|
|
83
|
+
`LLM::Provider::Transport::HTTP` before handing them to provider
|
|
84
|
+
parsers, which cuts allocation churn and gives a smaller streaming
|
|
85
|
+
speed bump.
|
|
86
|
+
|
|
87
|
+
* **Reduce generic SSE parser allocations** <br>
|
|
88
|
+
Keep unread event-stream buffer data in place until compaction is
|
|
89
|
+
worthwhile, which lowers allocation churn in the remaining generic
|
|
90
|
+
SSE path.
|
|
91
|
+
|
|
92
|
+
* **Improve streaming parser performance** <br>
|
|
93
|
+
In the local replay-based `stream_parser` benchmark versus `v4.13.0`
|
|
94
|
+
(median of 20 samples, 5000 iterations):
|
|
95
|
+
Plain Ruby: the generic eventstream path is about 53% faster with
|
|
96
|
+
about 32% fewer allocations, the OpenAI stream parser is about 11%
|
|
97
|
+
faster with about 4% fewer allocations, and the OpenAI Responses
|
|
98
|
+
parser is about 3% faster with unchanged allocations.
|
|
99
|
+
YJIT on the current parser benchmark harness: the current tree is
|
|
100
|
+
about 26% faster than non-YJIT on the generic eventstream path,
|
|
101
|
+
about 18% faster on the OpenAI stream parser, and about 16% faster
|
|
102
|
+
on the OpenAI Responses parser, with allocations unchanged.
|
|
103
|
+
|
|
104
|
+
### Fix
|
|
105
|
+
|
|
106
|
+
* **Support parallel MCP tool calls on one client** <br>
|
|
107
|
+
Route MCP responses by JSON-RPC id so concurrent tool calls can
|
|
108
|
+
share one client and transport without mismatching replies.
|
|
109
|
+
|
|
110
|
+
* **Use explicit MCP non-blocking read errors** <br>
|
|
111
|
+
Use `IO::EAGAINWaitReadable` while continuing to retry on
|
|
112
|
+
`IO::WaitReadable`.
|
|
113
|
+
|
|
7
114
|
## v4.13.0
|
|
8
115
|
|
|
9
116
|
Changes since `v4.12.0`.
|
data/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.15.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
@@ -17,9 +17,9 @@ state.
|
|
|
17
17
|
It is built for engineers who want control over how these systems run. llm.rb
|
|
18
18
|
stays close to Ruby, runs on the standard library by default, loads optional
|
|
19
19
|
pieces only when needed, and remains easy to extend. It also works well in
|
|
20
|
-
Rails or ActiveRecord applications,
|
|
21
|
-
|
|
22
|
-
requests, jobs, or retries.
|
|
20
|
+
Rails or ActiveRecord applications, and it includes built-in Sequel plugin
|
|
21
|
+
support, where a small wrapper around context persistence is enough to save
|
|
22
|
+
and restore long-lived conversation state across requests, jobs, or retries.
|
|
23
23
|
|
|
24
24
|
Most LLM libraries stop at request/response APIs. Building real systems means
|
|
25
25
|
stitching together streaming, tools, state, persistence, and external
|
|
@@ -28,22 +28,14 @@ so they compose naturally instead of becoming separate subsystems.
|
|
|
28
28
|
|
|
29
29
|
## Architecture
|
|
30
30
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
└────────── Tools / MCP Layer ───────┘
|
|
35
|
-
│
|
|
36
|
-
llm.rb Contexts
|
|
37
|
-
│
|
|
38
|
-
LLM Providers
|
|
39
|
-
(OpenAI, Anthropic, etc.)
|
|
40
|
-
│
|
|
41
|
-
Your Application
|
|
42
|
-
```
|
|
31
|
+
<p align="center">
|
|
32
|
+
<img src="https://github.com/llmrb/llm.rb/raw/main/resources/architecture.png" alt="llm.rb architecture" width="790">
|
|
33
|
+
</p>
|
|
43
34
|
|
|
44
35
|
## Core Concept
|
|
45
36
|
|
|
46
|
-
`LLM::Context`
|
|
37
|
+
[`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
38
|
+
is the execution boundary in llm.rb.
|
|
47
39
|
|
|
48
40
|
It holds:
|
|
49
41
|
- message history
|
|
@@ -59,54 +51,89 @@ same context object.
|
|
|
59
51
|
|
|
60
52
|
### Execution Model
|
|
61
53
|
|
|
62
|
-
- **A system layer, not just an API wrapper**
|
|
54
|
+
- **A system layer, not just an API wrapper** <br>
|
|
63
55
|
Put providers, tools, MCP servers, and application APIs behind one runtime
|
|
64
56
|
model instead of stitching them together by hand.
|
|
65
|
-
- **Contexts are central**
|
|
57
|
+
- **Contexts are central** <br>
|
|
66
58
|
Keep history, tools, schema, usage, persistence, and execution state in one
|
|
67
59
|
place instead of spreading them across your app.
|
|
68
|
-
- **Contexts can be serialized**
|
|
60
|
+
- **Contexts can be serialized** <br>
|
|
69
61
|
Save and restore live state for jobs, databases, retries, or long-running
|
|
70
62
|
workflows.
|
|
71
63
|
|
|
72
64
|
### Runtime Behavior
|
|
73
65
|
|
|
74
|
-
- **Streaming and tool execution work together**
|
|
66
|
+
- **Streaming and tool execution work together** <br>
|
|
75
67
|
Start tool work while output is still streaming so you can hide latency
|
|
76
68
|
instead of waiting for turns to finish.
|
|
77
|
-
- **
|
|
69
|
+
- **Tool calls have an explicit lifecycle** <br>
|
|
70
|
+
A tool call can be executed, cancelled through
|
|
71
|
+
[`LLM::Function#cancel`](https://0x1eef.github.io/x/llm.rb/LLM/Function.html#cancel-instance_method),
|
|
72
|
+
or left unresolved for manual handling, but the normal runtime contract is
|
|
73
|
+
still that a model-issued tool request is answered with a tool return.
|
|
74
|
+
- **Requests can be interrupted cleanly** <br>
|
|
75
|
+
Stop in-flight provider work through the same runtime instead of treating
|
|
76
|
+
cancellation as a separate concern.
|
|
77
|
+
[`LLM::Context#cancel!`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html#cancel-21-instance_method)
|
|
78
|
+
is inspired by Go's context cancellation model.
|
|
79
|
+
- **Concurrency is a first-class feature** <br>
|
|
78
80
|
Use threads, fibers, or async tasks without rewriting your tool layer.
|
|
79
|
-
- **Advanced workloads are built in, not bolted on**
|
|
81
|
+
- **Advanced workloads are built in, not bolted on** <br>
|
|
80
82
|
Streaming, concurrent tool execution, persistence, tracing, and MCP support
|
|
81
83
|
all fit the same runtime model.
|
|
82
84
|
|
|
83
85
|
### Integration
|
|
84
86
|
|
|
85
|
-
- **MCP is built in**
|
|
87
|
+
- **MCP is built in** <br>
|
|
86
88
|
Connect to MCP servers over stdio or HTTP without bolting on a separate
|
|
87
89
|
integration stack.
|
|
88
|
-
- **
|
|
90
|
+
- **Sequel persistence is built in** <br>
|
|
91
|
+
Use `plugin :llm` to persist `LLM::Context` state on a Sequel model with
|
|
92
|
+
sensible default columns, then pass provider setup through
|
|
93
|
+
`provider:` when you need it. Use `format: :string` for text columns or
|
|
94
|
+
`format: :jsonb` when you want native PostgreSQL JSON storage with Sequel's
|
|
95
|
+
JSON typecasting support enabled.
|
|
96
|
+
- **Persistent HTTP pooling is shared process-wide** <br>
|
|
97
|
+
When enabled, separate
|
|
98
|
+
[`LLM::Provider`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
|
|
99
|
+
instances with the same endpoint settings can share one persistent
|
|
100
|
+
pool, and separate HTTP
|
|
101
|
+
[`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html)
|
|
102
|
+
instances can do the same, instead of each object creating its own
|
|
103
|
+
isolated per-instance transport.
|
|
104
|
+
- **Provider support is broad** <br>
|
|
105
|
+
Work with OpenAI, OpenAI-compatible endpoints, Anthropic, Google, DeepSeek,
|
|
106
|
+
Z.ai, xAI, llama.cpp, and Ollama through the same runtime.
|
|
107
|
+
- **Tools are explicit** <br>
|
|
89
108
|
Run local tools, provider-native tools, and MCP tools through the same path
|
|
90
109
|
with fewer special cases.
|
|
91
|
-
- **Providers are normalized, not flattened**
|
|
110
|
+
- **Providers are normalized, not flattened** <br>
|
|
92
111
|
Share one API surface across providers without losing access to provider-
|
|
93
112
|
specific capabilities where they matter.
|
|
94
|
-
- **
|
|
113
|
+
- **Responses keep a uniform shape** <br>
|
|
114
|
+
Provider calls return
|
|
115
|
+
[`LLM::Response`](https://0x1eef.github.io/x/llm.rb/LLM/Response.html)
|
|
116
|
+
objects as a common base shape, then extend them with endpoint- or
|
|
117
|
+
provider-specific behavior when needed.
|
|
118
|
+
- **Low-level access is still there** <br>
|
|
119
|
+
Normalized responses still keep the raw `Net::HTTPResponse` available when
|
|
120
|
+
you need headers, status, or other HTTP details.
|
|
121
|
+
- **Local model metadata is included** <br>
|
|
95
122
|
Model capabilities, pricing, and limits are available locally without extra
|
|
96
123
|
API calls.
|
|
97
124
|
|
|
98
125
|
### Design Philosophy
|
|
99
126
|
|
|
100
|
-
- **Runs on the stdlib**
|
|
127
|
+
- **Runs on the stdlib** <br>
|
|
101
128
|
Start with Ruby's standard library and add extra dependencies only when you
|
|
102
129
|
need them.
|
|
103
|
-
- **It is highly pluggable**
|
|
130
|
+
- **It is highly pluggable** <br>
|
|
104
131
|
Add tools, swap providers, change JSON backends, plug in tracing, or layer
|
|
105
132
|
internal APIs and MCP servers into the same execution path.
|
|
106
|
-
- **It scales from scripts to long-lived systems**
|
|
133
|
+
- **It scales from scripts to long-lived systems** <br>
|
|
107
134
|
The same primitives work for one-off scripts, background jobs, and more
|
|
108
135
|
demanding application workloads with streaming, persistence, and tracing.
|
|
109
|
-
- **Thread boundaries are clear**
|
|
136
|
+
- **Thread boundaries are clear** <br>
|
|
110
137
|
Providers are shareable. Contexts are stateful and should stay thread-local.
|
|
111
138
|
|
|
112
139
|
## Capabilities
|
|
@@ -114,6 +141,7 @@ same context object.
|
|
|
114
141
|
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
115
142
|
- **Context Serialization** — save and restore state across processes or time
|
|
116
143
|
- **Streaming** — visible output, reasoning output, tool-call events
|
|
144
|
+
- **Request Interruption** — stop in-flight provider work cleanly
|
|
117
145
|
- **Tool Calling** — class-based tools and closure-based functions
|
|
118
146
|
- **Run Tools While Streaming** — overlap model output with tool latency
|
|
119
147
|
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
@@ -138,7 +166,11 @@ same context object.
|
|
|
138
166
|
gem install llm.rb
|
|
139
167
|
```
|
|
140
168
|
|
|
141
|
-
##
|
|
169
|
+
## Examples
|
|
170
|
+
|
|
171
|
+
**REPL**
|
|
172
|
+
|
|
173
|
+
See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
|
|
142
174
|
|
|
143
175
|
```ruby
|
|
144
176
|
require "llm"
|
|
@@ -153,6 +185,24 @@ loop do
|
|
|
153
185
|
end
|
|
154
186
|
```
|
|
155
187
|
|
|
188
|
+
**Sequel (ORM)**
|
|
189
|
+
|
|
190
|
+
See the [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) for more examples.
|
|
191
|
+
|
|
192
|
+
```ruby
|
|
193
|
+
require "llm"
|
|
194
|
+
require "sequel"
|
|
195
|
+
require "sequel/plugins/llm"
|
|
196
|
+
|
|
197
|
+
class Context < Sequel::Model
|
|
198
|
+
plugin :llm, provider: -> { { key: ENV["#{provider.upcase}_SECRET"], persistent: true } }
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
ctx = Context.create(provider: "openai", model: "gpt-5.4-mini")
|
|
202
|
+
ctx.talk("Remember that my favorite language is Ruby")
|
|
203
|
+
puts ctx.talk("What is my favorite language?").content
|
|
204
|
+
```
|
|
205
|
+
|
|
156
206
|
## Resources
|
|
157
207
|
|
|
158
208
|
- [deepdive](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) is the
|
data/lib/llm/context.rb
CHANGED
|
@@ -2,16 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
module LLM
|
|
4
4
|
##
|
|
5
|
-
# {LLM::Context LLM::Context}
|
|
6
|
-
#
|
|
7
|
-
# and cost tracking. It evolves over time as the system runs.
|
|
5
|
+
# {LLM::Context LLM::Context} is the stateful execution boundary in
|
|
6
|
+
# llm.rb.
|
|
8
7
|
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
8
|
+
# It holds the evolving runtime state for an LLM workflow:
|
|
9
|
+
# conversation history, tool calls and returns, schema and streaming
|
|
10
|
+
# configuration, accumulated usage, and request ownership for
|
|
11
|
+
# interruption.
|
|
12
12
|
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
13
|
+
# This is broader than prompt context alone. A context is the object
|
|
14
|
+
# that lets one-off prompts, streaming turns, tool execution,
|
|
15
|
+
# persistence, retries, and serialized long-lived workflows all run
|
|
16
|
+
# through the same model.
|
|
17
|
+
#
|
|
18
|
+
# A context can drive the chat completions API that all providers
|
|
19
|
+
# support or the Responses API on providers that expose it.
|
|
15
20
|
#
|
|
16
21
|
# @example
|
|
17
22
|
# #!/usr/bin/env ruby
|
|
@@ -62,6 +67,7 @@ module LLM
|
|
|
62
67
|
@mode = params.delete(:mode) || :completions
|
|
63
68
|
@params = {model: llm.default_model, schema: nil}.compact.merge!(params)
|
|
64
69
|
@messages = LLM::Buffer.new(llm)
|
|
70
|
+
@owner = Fiber.current
|
|
65
71
|
end
|
|
66
72
|
|
|
67
73
|
##
|
|
@@ -184,6 +190,15 @@ module LLM
|
|
|
184
190
|
end
|
|
185
191
|
end
|
|
186
192
|
|
|
193
|
+
##
|
|
194
|
+
# Interrupt the active request, if any.
|
|
195
|
+
# This is inspired by Go's context cancellation model.
|
|
196
|
+
# @return [nil]
|
|
197
|
+
def interrupt!
|
|
198
|
+
llm.interrupt!(@owner)
|
|
199
|
+
end
|
|
200
|
+
alias_method :cancel!, :interrupt!
|
|
201
|
+
|
|
187
202
|
##
|
|
188
203
|
# Returns token usage accumulated in this context
|
|
189
204
|
# @note
|
|
@@ -262,13 +277,13 @@ module LLM
|
|
|
262
277
|
##
|
|
263
278
|
# @return [Hash]
|
|
264
279
|
def to_h
|
|
265
|
-
{model:, messages:}
|
|
280
|
+
{schema_version: 1, model:, messages:}
|
|
266
281
|
end
|
|
267
282
|
|
|
268
283
|
##
|
|
269
284
|
# @return [String]
|
|
270
285
|
def to_json(...)
|
|
271
|
-
|
|
286
|
+
to_h.to_json(...)
|
|
272
287
|
end
|
|
273
288
|
|
|
274
289
|
##
|
data/lib/llm/error.rb
CHANGED
|
@@ -55,6 +55,10 @@ module LLM
|
|
|
55
55
|
# When stuck in a tool call loop
|
|
56
56
|
ToolLoopError = Class.new(Error)
|
|
57
57
|
|
|
58
|
+
##
|
|
59
|
+
# When a request is interrupted
|
|
60
|
+
Interrupt = Class.new(Error)
|
|
61
|
+
|
|
58
62
|
##
|
|
59
63
|
# When a tool call cannot be mapped to a local tool
|
|
60
64
|
NoSuchToolError = Class.new(Error)
|
data/lib/llm/eventhandler.rb
CHANGED
|
@@ -13,13 +13,15 @@ module LLM
|
|
|
13
13
|
|
|
14
14
|
##
|
|
15
15
|
# "data:" event callback
|
|
16
|
-
# @param [LLM::EventStream::Event] event
|
|
16
|
+
# @param [LLM::EventStream::Event, String, nil] event
|
|
17
|
+
# @param [String, nil] chunk
|
|
17
18
|
# @return [void]
|
|
18
|
-
def on_data(event)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
19
|
+
def on_data(event, chunk = nil)
|
|
20
|
+
value = chunk ? event : event.value
|
|
21
|
+
return if value == "[DONE]"
|
|
22
|
+
payload = LLM.json.load(value)
|
|
23
|
+
return unless payload
|
|
24
|
+
@parser.parse!(payload)
|
|
23
25
|
rescue *LLM.json.parser_error
|
|
24
26
|
end
|
|
25
27
|
|
|
@@ -28,13 +30,15 @@ module LLM
|
|
|
28
30
|
# is received, regardless of whether it has
|
|
29
31
|
# a field name or not. Primarily for ollama,
|
|
30
32
|
# which does emit Server-Sent Events (SSE).
|
|
31
|
-
# @param [LLM::EventStream::Event] event
|
|
33
|
+
# @param [LLM::EventStream::Event, String, nil] event
|
|
34
|
+
# @param [String, nil] chunk
|
|
32
35
|
# @return [void]
|
|
33
|
-
def on_chunk(event)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
def on_chunk(event, chunk = nil)
|
|
37
|
+
raw_chunk = chunk || event&.chunk || event
|
|
38
|
+
return if raw_chunk == "[DONE]"
|
|
39
|
+
payload = LLM.json.load(raw_chunk)
|
|
40
|
+
return unless payload
|
|
41
|
+
@parser.parse!(payload)
|
|
38
42
|
rescue *LLM.json.parser_error
|
|
39
43
|
end
|
|
40
44
|
|
|
@@ -4,8 +4,17 @@ module LLM::EventStream
|
|
|
4
4
|
##
|
|
5
5
|
# @private
|
|
6
6
|
class Event
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
UNSET = Object.new.freeze
|
|
8
|
+
|
|
9
|
+
def self.parse(chunk)
|
|
10
|
+
newline = chunk.end_with?("\n") ? chunk.bytesize - 1 : chunk.bytesize
|
|
11
|
+
separator = chunk.index(":")
|
|
12
|
+
return [nil, nil] unless separator
|
|
13
|
+
field = chunk.byteslice(0, separator)
|
|
14
|
+
value_start = separator + (chunk.getbyte(separator + 1) == 32 ? 2 : 1)
|
|
15
|
+
value = value_start < newline ? chunk.byteslice(value_start, newline - value_start) : nil
|
|
16
|
+
[field, value]
|
|
17
|
+
end
|
|
9
18
|
|
|
10
19
|
##
|
|
11
20
|
# Returns the field name
|
|
@@ -25,9 +34,10 @@ module LLM::EventStream
|
|
|
25
34
|
##
|
|
26
35
|
# @param [String] chunk
|
|
27
36
|
# @return [LLM::EventStream::Event]
|
|
28
|
-
def initialize(chunk)
|
|
29
|
-
@field = chunk
|
|
30
|
-
@
|
|
37
|
+
def initialize(chunk, field: UNSET, value: UNSET)
|
|
38
|
+
@field, @value = self.class.parse(chunk) if field.equal?(UNSET) || value.equal?(UNSET)
|
|
39
|
+
@field = field unless field.equal?(UNSET)
|
|
40
|
+
@value = value unless value.equal?(UNSET)
|
|
31
41
|
@chunk = chunk
|
|
32
42
|
end
|
|
33
43
|
|
|
@@ -4,6 +4,9 @@ module LLM::EventStream
|
|
|
4
4
|
##
|
|
5
5
|
# @private
|
|
6
6
|
class Parser
|
|
7
|
+
COMPACT_THRESHOLD = 4096
|
|
8
|
+
Visitor = Struct.new(:target, :on_data, :on_event, :on_id, :on_retry, :on_chunk)
|
|
9
|
+
|
|
7
10
|
##
|
|
8
11
|
# @return [LLM::EventStream::Parser]
|
|
9
12
|
def initialize
|
|
@@ -18,7 +21,12 @@ module LLM::EventStream
|
|
|
18
21
|
# @param [#on_data] visitor
|
|
19
22
|
# @return [void]
|
|
20
23
|
def register(visitor)
|
|
21
|
-
@visitors <<
|
|
24
|
+
@visitors << Visitor.new(
|
|
25
|
+
visitor,
|
|
26
|
+
visitor.respond_to?(:on_data), visitor.respond_to?(:on_event),
|
|
27
|
+
visitor.respond_to?(:on_id), visitor.respond_to?(:on_retry),
|
|
28
|
+
visitor.respond_to?(:on_chunk)
|
|
29
|
+
)
|
|
22
30
|
end
|
|
23
31
|
|
|
24
32
|
##
|
|
@@ -42,7 +50,8 @@ module LLM::EventStream
|
|
|
42
50
|
# Returns the internal buffer
|
|
43
51
|
# @return [String]
|
|
44
52
|
def body
|
|
45
|
-
@buffer.dup
|
|
53
|
+
return @buffer.dup if @cursor.zero?
|
|
54
|
+
@buffer.byteslice(@cursor, @buffer.bytesize - @cursor) || +""
|
|
46
55
|
end
|
|
47
56
|
|
|
48
57
|
##
|
|
@@ -55,34 +64,72 @@ module LLM::EventStream
|
|
|
55
64
|
|
|
56
65
|
private
|
|
57
66
|
|
|
58
|
-
def
|
|
59
|
-
|
|
60
|
-
|
|
67
|
+
def parse_event!(chunk, field, value)
|
|
68
|
+
dispatch_visitors(field, value, chunk)
|
|
69
|
+
dispatch_callbacks(field, value, chunk)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def parse!(chunk)
|
|
73
|
+
field, value = Event.parse(chunk)
|
|
74
|
+
parse_event!(chunk, field, value)
|
|
61
75
|
end
|
|
62
76
|
|
|
63
|
-
def
|
|
64
|
-
@visitors.each { dispatch_visitor(_1,
|
|
65
|
-
@events[event.field].each { _1.call(event) }
|
|
77
|
+
def dispatch_visitors(field, value, chunk)
|
|
78
|
+
@visitors.each { dispatch_visitor(_1, field, value, chunk) }
|
|
66
79
|
end
|
|
67
80
|
|
|
68
|
-
def
|
|
69
|
-
|
|
70
|
-
if
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
81
|
+
def dispatch_callbacks(field, value, chunk)
|
|
82
|
+
callbacks = @events[field]
|
|
83
|
+
return if callbacks.empty?
|
|
84
|
+
event = Event.new(chunk, field:, value:)
|
|
85
|
+
callbacks.each { _1.call(event) }
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def dispatch_visitor(visitor, field, value, chunk)
|
|
89
|
+
target = visitor.target
|
|
90
|
+
if field == "data"
|
|
91
|
+
if visitor.on_data
|
|
92
|
+
target.on_data(value, chunk)
|
|
93
|
+
elsif visitor.on_chunk
|
|
94
|
+
target.on_chunk(nil, chunk)
|
|
95
|
+
end
|
|
96
|
+
elsif field == "event"
|
|
97
|
+
if visitor.on_event
|
|
98
|
+
target.on_event(value, chunk)
|
|
99
|
+
elsif visitor.on_chunk
|
|
100
|
+
target.on_chunk(nil, chunk)
|
|
101
|
+
end
|
|
102
|
+
elsif field == "id"
|
|
103
|
+
if visitor.on_id
|
|
104
|
+
target.on_id(value, chunk)
|
|
105
|
+
elsif visitor.on_chunk
|
|
106
|
+
target.on_chunk(nil, chunk)
|
|
107
|
+
end
|
|
108
|
+
elsif field == "retry"
|
|
109
|
+
if visitor.on_retry
|
|
110
|
+
target.on_retry(value, chunk)
|
|
111
|
+
elsif visitor.on_chunk
|
|
112
|
+
target.on_chunk(nil, chunk)
|
|
113
|
+
end
|
|
114
|
+
elsif visitor.on_chunk
|
|
115
|
+
target.on_chunk(nil, chunk)
|
|
74
116
|
end
|
|
75
117
|
end
|
|
76
118
|
|
|
77
119
|
def each_line
|
|
78
120
|
while (newline = @buffer.index("\n", @cursor))
|
|
79
|
-
line = @buffer
|
|
121
|
+
line = @buffer.byteslice(@cursor, newline - @cursor + 1)
|
|
80
122
|
@cursor = newline + 1
|
|
81
123
|
yield(line)
|
|
82
124
|
end
|
|
83
125
|
return if @cursor.zero?
|
|
84
|
-
@
|
|
85
|
-
|
|
126
|
+
if @cursor >= @buffer.bytesize
|
|
127
|
+
@buffer.clear
|
|
128
|
+
@cursor = 0
|
|
129
|
+
elsif @cursor >= COMPACT_THRESHOLD
|
|
130
|
+
@buffer = @buffer.byteslice(@cursor, @buffer.bytesize - @cursor) || +""
|
|
131
|
+
@cursor = 0
|
|
132
|
+
end
|
|
86
133
|
end
|
|
87
134
|
end
|
|
88
135
|
end
|
data/lib/llm/mcp/command.rb
CHANGED
|
@@ -74,7 +74,7 @@ class LLM::MCP
|
|
|
74
74
|
# The IO stream to read from (:stdout, :stderr)
|
|
75
75
|
# @raise [LLM::Error]
|
|
76
76
|
# When the command is not running
|
|
77
|
-
# @raise [IO::
|
|
77
|
+
# @raise [IO::EAGAINWaitReadable]
|
|
78
78
|
# When no complete message is available to read
|
|
79
79
|
# @return [String]
|
|
80
80
|
# The next complete line from the specified IO stream
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class LLM::MCP
|
|
4
|
+
##
|
|
5
|
+
# A per-request mailbox for routing a JSON-RPC response back to the
|
|
6
|
+
# caller waiting on that request id.
|
|
7
|
+
class Mailbox
|
|
8
|
+
def initialize
|
|
9
|
+
@queue = Queue.new
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def <<(message)
|
|
13
|
+
@queue << message
|
|
14
|
+
self
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def pop
|
|
18
|
+
@queue.pop(true)
|
|
19
|
+
rescue ThreadError
|
|
20
|
+
nil
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
data/lib/llm/mcp/pipe.rb
CHANGED