llm.rb 4.10.0 → 4.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +152 -0
- data/README.md +265 -113
- data/data/anthropic.json +209 -242
- data/data/deepseek.json +15 -15
- data/data/google.json +553 -403
- data/data/openai.json +740 -535
- data/data/xai.json +250 -253
- data/data/zai.json +157 -90
- data/lib/llm/context/deserializer.rb +2 -1
- data/lib/llm/context.rb +58 -2
- data/lib/llm/contract/completion.rb +7 -0
- data/lib/llm/error.rb +4 -0
- data/lib/llm/eventhandler.rb +7 -0
- data/lib/llm/function/registry.rb +106 -0
- data/lib/llm/function/task.rb +39 -0
- data/lib/llm/function.rb +12 -7
- data/lib/llm/mcp/transport/http.rb +40 -6
- data/lib/llm/mcp/transport/stdio.rb +7 -0
- data/lib/llm/mcp.rb +54 -24
- data/lib/llm/message.rb +9 -2
- data/lib/llm/provider.rb +10 -0
- data/lib/llm/providers/anthropic/response_adapter/completion.rb +6 -0
- data/lib/llm/providers/anthropic/stream_parser.rb +37 -4
- data/lib/llm/providers/anthropic.rb +1 -1
- data/lib/llm/providers/google/response_adapter/completion.rb +12 -5
- data/lib/llm/providers/google/stream_parser.rb +54 -11
- data/lib/llm/providers/google/utils.rb +30 -0
- data/lib/llm/providers/google.rb +2 -0
- data/lib/llm/providers/ollama/response_adapter/completion.rb +6 -0
- data/lib/llm/providers/ollama/stream_parser.rb +10 -4
- data/lib/llm/providers/ollama.rb +1 -1
- data/lib/llm/providers/openai/response_adapter/completion.rb +7 -0
- data/lib/llm/providers/openai/response_adapter/responds.rb +84 -10
- data/lib/llm/providers/openai/responses/stream_parser.rb +63 -4
- data/lib/llm/providers/openai/responses.rb +1 -1
- data/lib/llm/providers/openai/stream_parser.rb +68 -4
- data/lib/llm/providers/openai.rb +1 -1
- data/lib/llm/stream/queue.rb +51 -0
- data/lib/llm/stream.rb +102 -0
- data/lib/llm/tool.rb +50 -45
- data/lib/llm/tracer/telemetry.rb +2 -2
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +3 -2
- data/llm.gemspec +2 -2
- metadata +7 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f4c449483ce7a3b53411760d6376157fed3e23b4f013f23ae397255398bef368
|
|
4
|
+
data.tar.gz: a9a9c82b107cde72edfe6fe5f68ea7b1ea5e493314883d101c453a94db81b601
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 71a389b2fe654cfd053f45bd749c34b96c9d89ac60e984960f4a2720896588ba39056a3a92ab75a429572cd099961d9f3c02474f7dc43460b59866e41d8b5f28
|
|
7
|
+
data.tar.gz: 4532ec55176751b32ed21b281f2f71395dcd32cdf318973a751decf171af0a9e5f3f75b75871542c578fd9a2a134f8fc5cbf6a54b1df3b2dbe0c47745122b900
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## Unreleased
|
|
4
|
+
|
|
5
|
+
Changes since `v4.11.1`.
|
|
6
|
+
|
|
7
|
+
## v4.11.1
|
|
8
|
+
|
|
9
|
+
Changes since `v4.10.0`.
|
|
10
|
+
|
|
11
|
+
### Add
|
|
12
|
+
|
|
13
|
+
- Add `LLM::Stream` for richer streaming callbacks, including `on_content`,
|
|
14
|
+
`on_reasoning_content`, and `on_tool_call` for concurrent tool execution.
|
|
15
|
+
- Add `LLM::Stream#wait` as a shortcut for `queue.wait`.
|
|
16
|
+
- Add `LLM::Context#wait` as a shortcut for the configured stream's `wait`.
|
|
17
|
+
- Add `LLM::Context#call(:functions)` as a shortcut for `functions.call`.
|
|
18
|
+
- Add `LLM::Function.registry` and enhanced support for MCP tools in
|
|
19
|
+
`LLM::Tool.registry` for tool resolution during streaming.
|
|
20
|
+
- Add normalized `LLM::Response` for OpenAI Responses, providing `content`,
|
|
21
|
+
`content!`, `messages` / `choices`, `usage`, and `reasoning_content`.
|
|
22
|
+
- Add `mode: :responses` to `LLM::Context` for routing `talk` through the
|
|
23
|
+
Responses API.
|
|
24
|
+
- Add `LLM::Context#returns` for collecting pending tool returns from the context.
|
|
25
|
+
- Add persistent HTTP connection pooling for repeated MCP tool calls via
|
|
26
|
+
`LLM.mcp(http: ...).persist!`.
|
|
27
|
+
- Add explicit MCP transport constructors via `LLM::MCP.stdio(...)` and
|
|
28
|
+
`LLM::MCP.http(...)`.
|
|
29
|
+
|
|
30
|
+
### Fix
|
|
31
|
+
|
|
32
|
+
- Fix Google tool-call handling by synthesizing stable ids when Gemini does
|
|
33
|
+
not provide a direct tool-call id.
|
|
34
|
+
|
|
35
|
+
## v4.10.0
|
|
36
|
+
|
|
37
|
+
Changes since `v4.9.0`.
|
|
38
|
+
|
|
39
|
+
### Add
|
|
40
|
+
|
|
41
|
+
- Add HTTP transport for MCP with `LLM::MCP::Transport::HTTP` for remote servers
|
|
42
|
+
- Add JSON Schema union types (`any_of`, `all_of`, `one_of`) with parser integration
|
|
43
|
+
- Add JSON Schema type array union support (e.g., `"type": ["object", "null"]`)
|
|
44
|
+
- Add JSON Schema type inference from `const`, `enum`, or `default` fields
|
|
45
|
+
|
|
46
|
+
### Change
|
|
47
|
+
|
|
48
|
+
- Update `LLM::MCP` constructor for exclusive `http:` or `stdio:` transport
|
|
49
|
+
- Update `LLM::MCP` documentation for HTTP transport support
|
|
50
|
+
|
|
51
|
+
## v4.9.0
|
|
52
|
+
|
|
53
|
+
Changes since `v4.8.0`.
|
|
54
|
+
|
|
55
|
+
### Add
|
|
56
|
+
|
|
57
|
+
- Add fiber-based concurrency with `LLM::Function::FiberGroup` and
|
|
58
|
+
`LLM::Function::TaskGroup` classes for lightweight async execution.
|
|
59
|
+
- Add `:thread`, `:task`, and `:fiber` strategy parameter to
|
|
60
|
+
`LLM::Function#spawn` for explicit concurrency control.
|
|
61
|
+
- Add stdio MCP client support, including remote tool discovery and
|
|
62
|
+
invocation through `LLM.mcp`, `LLM::Context`, and existing function/tool
|
|
63
|
+
APIs.
|
|
64
|
+
- Add model registry support via `LLM::Registry`, including model
|
|
65
|
+
metadata lookup, pricing, modalities, limits, and cost estimation.
|
|
66
|
+
- Add context access to a model context window via
|
|
67
|
+
`LLM::Context#context_window`.
|
|
68
|
+
- Add tracking of defined tools in the tool registry.
|
|
69
|
+
- Add `LLM::Schema::Enum`, enabling `Enum[...]` as a schema/tool
|
|
70
|
+
parameter type.
|
|
71
|
+
- Add top-level Anthropic system instruction support using Anthropic's
|
|
72
|
+
provider-specific request format.
|
|
73
|
+
- Add richer tracing hooks and extra metadata support for
|
|
74
|
+
LangSmith/OpenTelemetry-style traces.
|
|
75
|
+
- Add rack/websocket and Relay-related example work, including MCP-focused
|
|
76
|
+
examples.
|
|
77
|
+
- Add concurrent tool execution with `LLM::Function#spawn`,
|
|
78
|
+
`LLM::Function::Array` (`call`, `wait`, `spawn`), and
|
|
79
|
+
`LLM::Function::ThreadGroup`.
|
|
80
|
+
- Add `LLM::Function::ThreadGroup#alive?` method for non-blocking
|
|
81
|
+
monitoring of concurrent tool execution.
|
|
82
|
+
- Add `LLM::Function::ThreadGroup#value` alias for `ThreadGroup#wait` for
|
|
83
|
+
consistency with Ruby's `Thread#value`.
|
|
84
|
+
|
|
85
|
+
### Change
|
|
86
|
+
|
|
87
|
+
- Rename `LLM::Session` to `LLM::Context` throughout the codebase to better
|
|
88
|
+
reflect the concept of a stateful interaction environment.
|
|
89
|
+
- Rename `LLM::Gemini` to `LLM::Google` to better reflect provider naming.
|
|
90
|
+
- Standardize model objects across providers around a smaller common
|
|
91
|
+
interface.
|
|
92
|
+
- Switch registry cost internals from `LLM::Estimate` to `LLM::Cost`.
|
|
93
|
+
- Update image generation defaults so OpenAI and xAI consistently return
|
|
94
|
+
base64-encoded image data by default.
|
|
95
|
+
- Update `LLM::Bot` deprecation warning from v5.0 to v6.0, giving users
|
|
96
|
+
more time to migrate to `LLM::Context`.
|
|
97
|
+
- Rework the README and screencast documentation to better cover MCP,
|
|
98
|
+
registry, contexts, prompts, concurrency, providers, and example flow.
|
|
99
|
+
- Expand the README with architecture, production, and provider guidance
|
|
100
|
+
while improving readability and example ordering.
|
|
101
|
+
|
|
102
|
+
### Fix
|
|
103
|
+
|
|
104
|
+
- Fix local schema `$ref` resolution in `LLM::Schema::Parser`.
|
|
105
|
+
- Fix multiple MCP issues around stdio env handling, request IDs, registry
|
|
106
|
+
interaction, tool registration, and filtering of MCP tools from the
|
|
107
|
+
standard tool registry.
|
|
108
|
+
- Fix stream parsing issues, including chunk-splitting bugs and safer
|
|
109
|
+
handling of streamed error responses.
|
|
110
|
+
- Fix prompt handling across contexts, agents, and provider adapters so
|
|
111
|
+
prompt turns remain consistent in history and completions.
|
|
112
|
+
- Fix several tool/context issues, including function return wrapping,
|
|
113
|
+
tool lookup after deserialization, unnamed subclass filtering, and
|
|
114
|
+
thread-safety around tool registry mutations.
|
|
115
|
+
- Fix Google tool-call handling to preserve `thoughtSignature`.
|
|
116
|
+
- Fix `LLM::Tracer::Logger` argument handling.
|
|
117
|
+
- Fix packaging/docs issues such as registry files in the gemspec and
|
|
118
|
+
stale provider docs.
|
|
119
|
+
- Fix Google provider handling of `nil` function IDs during context
|
|
120
|
+
deserialization.
|
|
121
|
+
- Fix MCP stdio transport by increasing poll timeout for better
|
|
122
|
+
reliability.
|
|
123
|
+
- Fix Google provider to properly cast non-Hash tool results into Hash
|
|
124
|
+
format for API compatibility.
|
|
125
|
+
- Fix schema parser to support recursive normalization of `Array`,
|
|
126
|
+
`LLM::Object`, and nested structures.
|
|
127
|
+
- Fix DeepSeek provider to tolerate malformed tool arguments.
|
|
128
|
+
- Fix `LLM::Function::TaskGroup#alive?` to properly delegate to
|
|
129
|
+
`Async::Task#alive?`.
|
|
130
|
+
- Fix various RuboCop errors across the codebase.
|
|
131
|
+
- Fix DeepSeek provider to handle JSON that might be valid but unexpected.
|
|
132
|
+
|
|
133
|
+
### Notes
|
|
134
|
+
|
|
135
|
+
Notable merged work in this range includes:
|
|
136
|
+
|
|
137
|
+
- `feat(function): add fiber-based concurrency for async environments (#64)`
|
|
138
|
+
- `feat(mcp): add stdio MCP support (#134)`
|
|
139
|
+
- `Add LLM::Registry + cost support (#133)`
|
|
140
|
+
- `Consistent model objects across providers (#131)`
|
|
141
|
+
- `Add rack + websocket example (#130)`
|
|
142
|
+
- `feat(gemspec): add changelog URI (#136)`
|
|
143
|
+
- `feat(function): alias ThreadGroup#wait as ThreadGroup#value (#62)`
|
|
144
|
+
- README and screencast refresh across `#66`, `#67`, `#68`, `#71`, and
|
|
145
|
+
`#72`
|
|
146
|
+
- `chore(bot): update deprecation warning from v5.0 to v6.0`
|
|
147
|
+
- `fix(deepseek): tolerate malformed tool arguments`
|
|
148
|
+
- `refactor(context): Rename Session as Context (#70)`
|
|
149
|
+
|
|
150
|
+
Comparison base:
|
|
151
|
+
- Latest tag: `v4.8.0` (`6468f2426ee125823b7ae43b4af507b125f96ffc`)
|
|
152
|
+
- HEAD used for this changelog: `915c48da6fda9bef1554ff613947a6ce26d382e3`
|
data/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.11.1-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
@@ -30,8 +30,16 @@ llm.rb is built around the state and execution model around them:
|
|
|
30
30
|
|
|
31
31
|
- **Contexts are central** <br>
|
|
32
32
|
They hold history, tools, schema, usage, cost, persistence, and execution state.
|
|
33
|
+
- **Contexts can be serialized** <br>
|
|
34
|
+
A context can be serialized to JSON and stored on disk, in a database, in a
|
|
35
|
+
job queue, or anywhere else your application needs to persist state.
|
|
33
36
|
- **Tool execution is explicit** <br>
|
|
34
37
|
Run local, provider-native, and MCP tools sequentially or concurrently with threads, fibers, or async tasks.
|
|
38
|
+
- **Run tools while streaming** <br>
|
|
39
|
+
Start tool work while a response is still streaming instead of waiting for the turn to finish. <br>
|
|
40
|
+
This overlaps tool latency with model output and exposes streamed tool-call events for introspection, making it one of llm.rb's strongest execution features.
|
|
41
|
+
- **HTTP MCP can reuse connections** <br>
|
|
42
|
+
Opt into persistent HTTP pooling for repeated remote MCP tool calls with `persist!`.
|
|
35
43
|
- **One API across providers and capabilities** <br>
|
|
36
44
|
The same model covers chat, files, images, audio, embeddings, vector stores, and more.
|
|
37
45
|
- **Thread-safe where it matters** <br>
|
|
@@ -75,12 +83,14 @@ llm.rb is built in layers, each providing explicit control:
|
|
|
75
83
|
llm.rb provides a complete set of primitives for building LLM-powered systems:
|
|
76
84
|
|
|
77
85
|
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
78
|
-
- **Streaming** — real-time responses across providers
|
|
86
|
+
- **Streaming** — real-time responses across providers, including structured stream callbacks
|
|
87
|
+
- **Reasoning Support** — full stream, message, and response support when providers expose reasoning
|
|
79
88
|
- **Tool Calling** — define and execute functions with automatic orchestration
|
|
89
|
+
- **Run Tools While Streaming** — begin tool work before the model finishes its turn
|
|
80
90
|
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
81
91
|
- **Agents** — reusable, preconfigured assistants with tool auto-execution
|
|
82
92
|
- **Structured Outputs** — JSON schema-based responses
|
|
83
|
-
- **MCP Support** — integrate external tool servers dynamically
|
|
93
|
+
- **MCP Support** — integrate external tool servers dynamically over stdio or HTTP
|
|
84
94
|
- **Multimodal Inputs** — text, images, audio, documents, URLs
|
|
85
95
|
- **Audio** — text-to-speech, transcription, translation
|
|
86
96
|
- **Images** — generation and editing
|
|
@@ -93,163 +103,234 @@ llm.rb provides a complete set of primitives for building LLM-powered systems:
|
|
|
93
103
|
|
|
94
104
|
## Quick Start
|
|
95
105
|
|
|
96
|
-
####
|
|
106
|
+
#### Simple Streaming
|
|
97
107
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
108
|
+
At the simplest level, any object that implements `#<<` can receive visible
|
|
109
|
+
output as it arrives. This works with `$stdout`, `StringIO`, files, sockets,
|
|
110
|
+
and other Ruby IO-style objects.
|
|
111
|
+
|
|
112
|
+
For more control, llm.rb also supports advanced streaming patterns through
|
|
113
|
+
[`LLM::Stream`](lib/llm/stream.rb). See [Advanced Streaming](#advanced-streaming)
|
|
114
|
+
for a structured callback-based example:
|
|
105
115
|
|
|
106
116
|
```ruby
|
|
107
117
|
#!/usr/bin/env ruby
|
|
108
118
|
require "llm"
|
|
109
119
|
|
|
110
120
|
llm = LLM.openai(key: ENV["KEY"])
|
|
111
|
-
ctx = LLM::Context.new(llm, stream: $stdout
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
ctx.talk(
|
|
115
|
-
|
|
121
|
+
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
122
|
+
loop do
|
|
123
|
+
print "> "
|
|
124
|
+
ctx.talk(STDIN.gets || break)
|
|
125
|
+
puts
|
|
126
|
+
end
|
|
116
127
|
```
|
|
117
128
|
|
|
118
|
-
####
|
|
129
|
+
#### Structured Outputs
|
|
119
130
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
131
|
+
The `LLM::Schema` system lets you define JSON schemas for structured outputs.
|
|
132
|
+
Schemas can be defined as classes with `property` declarations or built
|
|
133
|
+
programmatically using a fluent interface. When you pass a schema to a context,
|
|
134
|
+
llm.rb adapts it into the provider's structured-output format when that
|
|
135
|
+
provider supports one. The `content!` method then parses the assistant's JSON
|
|
136
|
+
response into a Ruby object:
|
|
124
137
|
|
|
125
138
|
```ruby
|
|
126
139
|
#!/usr/bin/env ruby
|
|
127
140
|
require "llm"
|
|
141
|
+
require "pp"
|
|
142
|
+
|
|
143
|
+
class Report < LLM::Schema
|
|
144
|
+
property :category, Enum["performance", "security", "outage"], "Report category", required: true
|
|
145
|
+
property :summary, String, "Short summary", required: true
|
|
146
|
+
property :impact, OneOf[String, Integer], "Primary impact, as text or a count", required: true
|
|
147
|
+
property :services, Array[String], "Impacted services", required: true
|
|
148
|
+
property :timestamp, String, "When it happened", optional: true
|
|
149
|
+
end
|
|
128
150
|
|
|
129
151
|
llm = LLM.openai(key: ENV["KEY"])
|
|
130
|
-
|
|
152
|
+
ctx = LLM::Context.new(llm, schema: Report)
|
|
153
|
+
res = ctx.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
|
|
154
|
+
pp res.content!
|
|
131
155
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
end
|
|
156
|
+
# {
|
|
157
|
+
# "category" => "performance",
|
|
158
|
+
# "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
|
|
159
|
+
# "impact" => "5% request timeouts",
|
|
160
|
+
# "services" => ["Database"],
|
|
161
|
+
# "timestamp" => "2024-06-05T10:42:00Z"
|
|
162
|
+
# }
|
|
140
163
|
```
|
|
141
164
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
165
|
+
#### Tool Calling
|
|
166
|
+
|
|
167
|
+
Tools in llm.rb can be defined as classes inheriting from `LLM::Tool` or as
|
|
168
|
+
closures using `LLM.function`. When the LLM requests a tool call, the context
|
|
169
|
+
stores `Function` objects in `ctx.functions`. The `call()` method executes all
|
|
170
|
+
pending functions and returns their results to the LLM. Tools describe
|
|
171
|
+
structured parameters with JSON Schema and adapt those definitions to each
|
|
172
|
+
provider's tool-calling format (OpenAI, Anthropic, Google, etc.):
|
|
145
173
|
|
|
146
174
|
```ruby
|
|
147
175
|
#!/usr/bin/env ruby
|
|
148
176
|
require "llm"
|
|
149
177
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
})
|
|
178
|
+
class System < LLM::Tool
|
|
179
|
+
name "system"
|
|
180
|
+
description "Run a shell command"
|
|
181
|
+
param :command, String, "Command to execute", required: true
|
|
155
182
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
ctx.talk("List the available GitHub MCP toolsets.")
|
|
160
|
-
ctx.talk(ctx.functions.call) while ctx.functions.any?
|
|
161
|
-
ensure
|
|
162
|
-
mcp.stop
|
|
183
|
+
def call(command:)
|
|
184
|
+
{success: system(command)}
|
|
185
|
+
end
|
|
163
186
|
end
|
|
187
|
+
|
|
188
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
189
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: [System])
|
|
190
|
+
ctx.talk("Run `date`.")
|
|
191
|
+
ctx.talk(ctx.call(:functions)) while ctx.functions.any?
|
|
164
192
|
```
|
|
165
193
|
|
|
166
|
-
####
|
|
194
|
+
#### Concurrent Tools
|
|
167
195
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
`
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
196
|
+
llm.rb provides explicit concurrency control for tool execution. The
|
|
197
|
+
`wait(:thread)` method spawns each pending function in its own thread and waits
|
|
198
|
+
for all to complete. You can also use `:fiber` for cooperative multitasking or
|
|
199
|
+
`:task` for async/await patterns (requires the `async` gem). The context
|
|
200
|
+
automatically collects all results and reports them back to the LLM in a
|
|
201
|
+
single turn, maintaining conversation flow while parallelizing independent
|
|
202
|
+
operations:
|
|
175
203
|
|
|
176
204
|
```ruby
|
|
177
205
|
#!/usr/bin/env ruby
|
|
178
206
|
require "llm"
|
|
179
207
|
|
|
180
208
|
llm = LLM.openai(key: ENV["KEY"])
|
|
181
|
-
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
end
|
|
209
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: [FetchWeather, FetchNews, FetchStock])
|
|
210
|
+
|
|
211
|
+
# Execute multiple independent tools concurrently
|
|
212
|
+
ctx.talk("Summarize the weather, headlines, and stock price.")
|
|
213
|
+
ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
|
|
187
214
|
```
|
|
188
215
|
|
|
189
|
-
####
|
|
216
|
+
#### Advanced Streaming
|
|
190
217
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
218
|
+
llm.rb also supports the [`LLM::Stream`](lib/llm/stream.rb) interface for
|
|
219
|
+
structured streaming events:
|
|
220
|
+
|
|
221
|
+
- `on_content` for visible assistant output
|
|
222
|
+
- `on_reasoning_content` for separate reasoning output
|
|
223
|
+
- `on_tool_call` for streamed tool-call notifications
|
|
224
|
+
|
|
225
|
+
Subclass [`LLM::Stream`](lib/llm/stream.rb) when you want features like
|
|
226
|
+
`queue` and `wait`, or implement the same methods on your own object. Keep these
|
|
227
|
+
callbacks fast: they run inline with the parser.
|
|
228
|
+
|
|
229
|
+
`on_tool_call` lets tools start before the model finishes its turn, for
|
|
230
|
+
example with `tool.spawn(:thread)`, `tool.spawn(:fiber)`, or
|
|
231
|
+
`tool.spawn(:task)`. That can overlap tool latency with streaming output and
|
|
232
|
+
gives you a first-class place to observe and instrument tool-call execution as
|
|
233
|
+
it unfolds.
|
|
234
|
+
|
|
235
|
+
If a stream cannot resolve a tool, `error` is an `LLM::Function::Return` that
|
|
236
|
+
communicates the failure back to the LLM. That lets the tool-call path recover
|
|
237
|
+
and keeps the session alive. It also leaves control in the callback: it can
|
|
238
|
+
send `error`, spawn the tool when `error == nil`, or handle the situation
|
|
239
|
+
however it sees fit.
|
|
240
|
+
|
|
241
|
+
In normal use this should be rare, since `on_tool_call` is usually called with
|
|
242
|
+
a resolved tool and `error == nil`. To resolve a tool call, the tool must be
|
|
243
|
+
found in `LLM::Function.registry`. That covers `LLM::Tool` subclasses,
|
|
244
|
+
including MCP tools, but not `LLM.function` closures, which are excluded
|
|
245
|
+
because they may be bound to local state:
|
|
197
246
|
|
|
198
247
|
```ruby
|
|
199
248
|
#!/usr/bin/env ruby
|
|
200
249
|
require "llm"
|
|
250
|
+
# Assume `System < LLM::Tool` is already defined.
|
|
201
251
|
|
|
202
|
-
class
|
|
203
|
-
|
|
204
|
-
description "Run a shell command"
|
|
205
|
-
param :command, String, "Command to execute", required: true
|
|
252
|
+
class Stream < LLM::Stream
|
|
253
|
+
attr_reader :content, :reasoning_content
|
|
206
254
|
|
|
207
|
-
def
|
|
208
|
-
|
|
255
|
+
def initialize
|
|
256
|
+
@content = +""
|
|
257
|
+
@reasoning_content = +""
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def on_content(content)
|
|
261
|
+
@content << content
|
|
262
|
+
print content
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def on_reasoning_content(content)
|
|
266
|
+
@reasoning_content << content
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def on_tool_call(tool, error)
|
|
270
|
+
queue << (error || tool.spawn(:thread))
|
|
209
271
|
end
|
|
210
272
|
end
|
|
211
273
|
|
|
212
274
|
llm = LLM.openai(key: ENV["KEY"])
|
|
213
|
-
ctx = LLM::Context.new(llm, stream:
|
|
214
|
-
|
|
215
|
-
ctx.talk(
|
|
275
|
+
ctx = LLM::Context.new(llm, stream: Stream.new, tools: [System])
|
|
276
|
+
|
|
277
|
+
ctx.talk("Run `date` and `uname -a`.")
|
|
278
|
+
while ctx.functions.any?
|
|
279
|
+
ctx.talk(ctx.wait(:thread))
|
|
280
|
+
end
|
|
216
281
|
```
|
|
217
282
|
|
|
218
|
-
####
|
|
283
|
+
#### MCP
|
|
219
284
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
285
|
+
llm.rb integrates with the Model Context Protocol (MCP) to dynamically discover
|
|
286
|
+
and use tools from external servers. This example starts a filesystem MCP
|
|
287
|
+
server over stdio and makes its tools available to a context, enabling the LLM
|
|
288
|
+
to interact with the local file system through a standardized interface.
|
|
289
|
+
Use `LLM::MCP.stdio` or `LLM::MCP.http` when you want to make the transport
|
|
290
|
+
explicit. Like `LLM::Context`, an MCP client is stateful and should remain
|
|
291
|
+
isolated to a single thread:
|
|
227
292
|
|
|
228
293
|
```ruby
|
|
229
294
|
#!/usr/bin/env ruby
|
|
230
295
|
require "llm"
|
|
231
|
-
require "pp"
|
|
232
296
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
297
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
298
|
+
mcp = LLM::MCP.stdio(argv: ["npx", "-y", "@modelcontextprotocol/server-filesystem", Dir.pwd])
|
|
299
|
+
|
|
300
|
+
begin
|
|
301
|
+
mcp.start
|
|
302
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
303
|
+
ctx.talk("List the directories in this project.")
|
|
304
|
+
ctx.talk(ctx.call(:functions)) while ctx.functions.any?
|
|
305
|
+
ensure
|
|
306
|
+
mcp.stop
|
|
239
307
|
end
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
You can also connect to an MCP server over HTTP. This is useful when the
|
|
311
|
+
server already runs remotely and exposes MCP through a URL instead of a local
|
|
312
|
+
process. If you expect repeated tool calls, use `persist!` to reuse a
|
|
313
|
+
process-wide HTTP connection pool. This requires the optional
|
|
314
|
+
`net-http-persistent` gem:
|
|
315
|
+
|
|
316
|
+
```ruby
|
|
317
|
+
#!/usr/bin/env ruby
|
|
318
|
+
require "llm"
|
|
240
319
|
|
|
241
320
|
llm = LLM.openai(key: ENV["KEY"])
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
321
|
+
mcp = LLM::MCP.http(
|
|
322
|
+
url: "https://api.githubcopilot.com/mcp/",
|
|
323
|
+
headers: {"Authorization" => "Bearer #{ENV.fetch("GITHUB_PAT")}"}
|
|
324
|
+
).persist!
|
|
245
325
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
326
|
+
begin
|
|
327
|
+
mcp.start
|
|
328
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
329
|
+
ctx.talk("List the available GitHub MCP toolsets.")
|
|
330
|
+
ctx.talk(ctx.call(:functions)) while ctx.functions.any?
|
|
331
|
+
ensure
|
|
332
|
+
mcp.stop
|
|
333
|
+
end
|
|
253
334
|
```
|
|
254
335
|
|
|
255
336
|
## Providers
|
|
@@ -405,27 +486,31 @@ puts "Cost: $#{model_info.cost.input}/1M input tokens"
|
|
|
405
486
|
|
|
406
487
|
#### Responses API
|
|
407
488
|
|
|
408
|
-
llm.rb also supports OpenAI's Responses API through `
|
|
409
|
-
`
|
|
410
|
-
|
|
489
|
+
llm.rb also supports OpenAI's Responses API through `LLM::Context` with
|
|
490
|
+
`mode: :responses`. The important switch is `store:`. With `store: false`, the
|
|
491
|
+
Responses API stays stateless while still using the Responses endpoint, which
|
|
492
|
+
is useful for models or features that are only available through the Responses
|
|
493
|
+
API. With `store: true`, OpenAI can keep
|
|
494
|
+
response state server-side and reduce how much conversation state needs to be
|
|
495
|
+
sent on each turn:
|
|
411
496
|
|
|
412
497
|
```ruby
|
|
413
498
|
#!/usr/bin/env ruby
|
|
414
499
|
require "llm"
|
|
415
500
|
|
|
416
501
|
llm = LLM.openai(key: ENV["KEY"])
|
|
417
|
-
ctx = LLM::Context.new(llm)
|
|
502
|
+
ctx = LLM::Context.new(llm, mode: :responses, store: false)
|
|
418
503
|
|
|
419
|
-
ctx.
|
|
420
|
-
res = ctx.
|
|
421
|
-
puts res.
|
|
504
|
+
ctx.talk("Your task is to answer the user's questions", role: :developer)
|
|
505
|
+
res = ctx.talk("What is the capital of France?")
|
|
506
|
+
puts res.content
|
|
422
507
|
```
|
|
423
508
|
|
|
424
|
-
#### Context Persistence
|
|
509
|
+
#### Context Persistence: Vanilla
|
|
425
510
|
|
|
426
|
-
Contexts can be serialized and restored across process boundaries.
|
|
427
|
-
|
|
428
|
-
|
|
511
|
+
Contexts can be serialized and restored across process boundaries. A context
|
|
512
|
+
can be serialized to JSON and stored on disk, in a database, in a job queue,
|
|
513
|
+
or anywhere else your application needs to persist state:
|
|
429
514
|
|
|
430
515
|
```ruby
|
|
431
516
|
#!/usr/bin/env ruby
|
|
@@ -435,12 +520,79 @@ llm = LLM.openai(key: ENV["KEY"])
|
|
|
435
520
|
ctx = LLM::Context.new(llm)
|
|
436
521
|
ctx.talk("Hello")
|
|
437
522
|
ctx.talk("Remember that my favorite language is Ruby")
|
|
438
|
-
|
|
523
|
+
|
|
524
|
+
# Serialize to a string when you want to store the context yourself,
|
|
525
|
+
# for example in a database row or job payload.
|
|
526
|
+
payload = ctx.to_json
|
|
439
527
|
|
|
440
528
|
restored = LLM::Context.new(llm)
|
|
441
|
-
restored.restore(
|
|
529
|
+
restored.restore(string: payload)
|
|
442
530
|
res = restored.talk("What is my favorite language?")
|
|
443
531
|
puts res.content
|
|
532
|
+
|
|
533
|
+
# You can also persist the same state to a file:
|
|
534
|
+
ctx.save(path: "context.json")
|
|
535
|
+
restored = LLM::Context.new(llm)
|
|
536
|
+
restored.restore(path: "context.json")
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
#### Context Persistence: ActiveRecord (Rails)
|
|
540
|
+
|
|
541
|
+
In a Rails application, you can also wrap persisted context state in an
|
|
542
|
+
ActiveRecord model. A minimal schema would include a `snapshot` column for the
|
|
543
|
+
serialized context payload (`jsonb` is recommended) and a `provider` column
|
|
544
|
+
for the provider name:
|
|
545
|
+
|
|
546
|
+
```ruby
|
|
547
|
+
create_table :contexts do |t|
|
|
548
|
+
t.jsonb :snapshot
|
|
549
|
+
t.string :provider, null: false
|
|
550
|
+
t.timestamps
|
|
551
|
+
end
|
|
552
|
+
```
|
|
553
|
+
|
|
554
|
+
For example:
|
|
555
|
+
|
|
556
|
+
```ruby
|
|
557
|
+
class Context < ApplicationRecord
|
|
558
|
+
def talk(...)
|
|
559
|
+
ctx.talk(...).tap { flush }
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
def wait(...)
|
|
563
|
+
ctx.wait(...).tap { flush }
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
def messages
|
|
567
|
+
ctx.messages
|
|
568
|
+
end
|
|
569
|
+
|
|
570
|
+
def model
|
|
571
|
+
ctx.model
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
def flush
|
|
575
|
+
update_column(:snapshot, ctx.to_json)
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
private
|
|
579
|
+
|
|
580
|
+
def ctx
|
|
581
|
+
@ctx ||= begin
|
|
582
|
+
ctx = LLM::Context.new(llm)
|
|
583
|
+
ctx.restore(string: snapshot) if snapshot
|
|
584
|
+
ctx
|
|
585
|
+
end
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
def llm
|
|
589
|
+
LLM.method(provider).call(key: ENV.fetch(key))
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
def key
|
|
593
|
+
"#{provider.upcase}_KEY"
|
|
594
|
+
end
|
|
595
|
+
end
|
|
444
596
|
```
|
|
445
597
|
|
|
446
598
|
#### Agents
|