llm.rb 4.8.0 → 4.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +356 -583
- data/data/anthropic.json +770 -0
- data/data/deepseek.json +75 -0
- data/data/google.json +1050 -0
- data/data/openai.json +1421 -0
- data/data/xai.json +792 -0
- data/data/zai.json +330 -0
- data/lib/llm/agent.rb +42 -41
- data/lib/llm/bot.rb +1 -263
- data/lib/llm/buffer.rb +7 -0
- data/lib/llm/{session → context}/deserializer.rb +4 -3
- data/lib/llm/context.rb +292 -0
- data/lib/llm/cost.rb +26 -0
- data/lib/llm/error.rb +8 -0
- data/lib/llm/function/array.rb +61 -0
- data/lib/llm/function/fiber_group.rb +91 -0
- data/lib/llm/function/task_group.rb +89 -0
- data/lib/llm/function/thread_group.rb +94 -0
- data/lib/llm/function.rb +75 -10
- data/lib/llm/mcp/command.rb +108 -0
- data/lib/llm/mcp/error.rb +31 -0
- data/lib/llm/mcp/pipe.rb +82 -0
- data/lib/llm/mcp/rpc.rb +118 -0
- data/lib/llm/mcp/transport/http/event_handler.rb +66 -0
- data/lib/llm/mcp/transport/http.rb +122 -0
- data/lib/llm/mcp/transport/stdio.rb +85 -0
- data/lib/llm/mcp.rb +116 -0
- data/lib/llm/message.rb +13 -11
- data/lib/llm/model.rb +2 -2
- data/lib/llm/prompt.rb +17 -7
- data/lib/llm/provider.rb +32 -17
- data/lib/llm/providers/anthropic/files.rb +3 -3
- data/lib/llm/providers/anthropic.rb +19 -4
- data/lib/llm/providers/deepseek.rb +10 -3
- data/lib/llm/providers/{gemini → google}/audio.rb +6 -6
- data/lib/llm/providers/{gemini → google}/error_handler.rb +2 -2
- data/lib/llm/providers/{gemini → google}/files.rb +11 -11
- data/lib/llm/providers/{gemini → google}/images.rb +7 -7
- data/lib/llm/providers/{gemini → google}/models.rb +5 -5
- data/lib/llm/providers/{gemini → google}/request_adapter/completion.rb +7 -3
- data/lib/llm/providers/{gemini → google}/request_adapter.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/completion.rb +7 -7
- data/lib/llm/providers/{gemini → google}/response_adapter/embedding.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/file.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/files.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/image.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/models.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/web_search.rb +2 -2
- data/lib/llm/providers/{gemini → google}/response_adapter.rb +8 -8
- data/lib/llm/providers/{gemini → google}/stream_parser.rb +3 -3
- data/lib/llm/providers/{gemini.rb → google.rb} +41 -26
- data/lib/llm/providers/llamacpp.rb +10 -3
- data/lib/llm/providers/ollama.rb +19 -4
- data/lib/llm/providers/openai/files.rb +3 -3
- data/lib/llm/providers/openai/response_adapter/completion.rb +9 -1
- data/lib/llm/providers/openai/response_adapter/responds.rb +9 -1
- data/lib/llm/providers/openai/responses.rb +9 -1
- data/lib/llm/providers/openai/stream_parser.rb +2 -0
- data/lib/llm/providers/openai.rb +19 -4
- data/lib/llm/providers/xai.rb +10 -3
- data/lib/llm/providers/zai.rb +9 -2
- data/lib/llm/registry.rb +81 -0
- data/lib/llm/schema/all_of.rb +31 -0
- data/lib/llm/schema/any_of.rb +31 -0
- data/lib/llm/schema/one_of.rb +31 -0
- data/lib/llm/schema/parser.rb +145 -0
- data/lib/llm/schema.rb +49 -8
- data/lib/llm/server_tool.rb +5 -5
- data/lib/llm/session.rb +10 -1
- data/lib/llm/tool.rb +88 -6
- data/lib/llm/tracer/logger.rb +1 -1
- data/lib/llm/tracer/telemetry.rb +7 -7
- data/lib/llm/tracer.rb +3 -3
- data/lib/llm/usage.rb +5 -0
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +39 -6
- data/llm.gemspec +45 -8
- metadata +86 -28
data/README.md
CHANGED
|
@@ -4,751 +4,544 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.10.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
11
11
|
|
|
12
|
-
llm.rb is a
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
llm.rb is a Ruby-centric toolkit for building real LLM-powered systems — where
|
|
13
|
+
LLMs are part of your architecture, not just API calls. It gives you explicit
|
|
14
|
+
control over contexts, tools, concurrency, and providers, so you can compose
|
|
15
|
+
reliable, production-ready workflows without hidden abstractions.
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
by the license. Built with [good music](https://www.youtube.com/watch?v=SNvaqwTbn14)
|
|
21
|
-
and a lot of ☕️.
|
|
17
|
+
Built for engineers who want to understand and control their LLM systems. No
|
|
18
|
+
frameworks, no hidden magic — just composable primitives for building real
|
|
19
|
+
applications, from scripts to full systems like [Relay](https://github.com/llmrb/relay).
|
|
22
20
|
|
|
23
|
-
|
|
21
|
+
Jump to [Quick start](#quick-start), discover its [capabilities](#capabilities), read about
|
|
22
|
+
its [architecture](#architecture--execution-model) or watch the
|
|
23
|
+
[Screencast](https://www.youtube.com/watch?v=x1K4wMeO_QA) for a deep dive into the design
|
|
24
|
+
and capabilities of llm.rb.
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
## What Makes It Different
|
|
26
27
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
multiple requests. The following example implements a simple REPL loop, and the response
|
|
30
|
-
is streamed to the terminal in real-time as it arrives from the provider. The provider
|
|
31
|
-
happens to be OpenAI in this case but it could be any other provider, and `$stdout`
|
|
32
|
-
could be any object that implements the `#<<` method:
|
|
28
|
+
Most LLM libraries stop at requests and responses. <br>
|
|
29
|
+
llm.rb is built around the state and execution model around them:
|
|
33
30
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
31
|
+
- **Contexts are central** <br>
|
|
32
|
+
They hold history, tools, schema, usage, cost, persistence, and execution state.
|
|
33
|
+
- **Tool execution is explicit** <br>
|
|
34
|
+
Run local, provider-native, and MCP tools sequentially or concurrently with threads, fibers, or async tasks.
|
|
35
|
+
- **One API across providers and capabilities** <br>
|
|
36
|
+
The same model covers chat, files, images, audio, embeddings, vector stores, and more.
|
|
37
|
+
- **Thread-safe where it matters** <br>
|
|
38
|
+
Providers are shareable, while contexts stay isolated and stateful.
|
|
39
|
+
- **Local metadata, fewer extra API calls** <br>
|
|
40
|
+
A built-in registry provides model capabilities, limits, pricing, and cost estimation.
|
|
41
|
+
- **Stdlib-only by default** <br>
|
|
42
|
+
llm.rb runs on the Ruby standard library by default, with providers, optional features, and the model registry loaded only when you use them.
|
|
37
43
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
print "> "
|
|
42
|
-
ses.talk(STDIN.gets || break)
|
|
43
|
-
puts
|
|
44
|
-
end
|
|
45
|
-
```
|
|
44
|
+
## Architecture & Execution Model
|
|
45
|
+
|
|
46
|
+
llm.rb is built in layers, each providing explicit control:
|
|
46
47
|
|
|
47
|
-
|
|
48
|
+
```
|
|
49
|
+
┌─────────────────────────────────────────┐
|
|
50
|
+
│ Your Application │
|
|
51
|
+
├─────────────────────────────────────────┤
|
|
52
|
+
│ Contexts & Agents │ ← Stateful workflows
|
|
53
|
+
├─────────────────────────────────────────┤
|
|
54
|
+
│ Tools & Functions │ ← Concurrent execution
|
|
55
|
+
├─────────────────────────────────────────┤
|
|
56
|
+
│ Unified Provider API (OpenAI, etc.) │ ← Provider abstraction
|
|
57
|
+
├─────────────────────────────────────────┤
|
|
58
|
+
│ HTTP, JSON, Thread Safety │ ← Infrastructure
|
|
59
|
+
└─────────────────────────────────────────┘
|
|
60
|
+
```
|
|
48
61
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
62
|
+
### Key Design Decisions
|
|
63
|
+
|
|
64
|
+
- **Thread-safe providers** - `LLM::Provider` instances are safe to share across threads
|
|
65
|
+
- **Thread-local contexts** - `LLM::Context` should generally be kept thread-local
|
|
66
|
+
- **Lazy loading** - Providers, optional features, and the model registry load on demand
|
|
67
|
+
- **JSON adapter system** - Swap JSON libraries (JSON/Oj/Yajl) for performance
|
|
68
|
+
- **Registry system** - Local metadata for model capabilities, limits, and pricing
|
|
69
|
+
- **Provider adaptation** - Normalizes differences between OpenAI, Anthropic, Google, and other providers
|
|
70
|
+
- **Structured tool execution** - Errors are captured and returned as data, not raised unpredictably
|
|
71
|
+
- **Function vs Tool APIs** - Choose between class-based tools and closure-based functions
|
|
72
|
+
|
|
73
|
+
## Capabilities
|
|
74
|
+
|
|
75
|
+
llm.rb provides a complete set of primitives for building LLM-powered systems:
|
|
76
|
+
|
|
77
|
+
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
78
|
+
- **Streaming** — real-time responses across providers
|
|
79
|
+
- **Tool Calling** — define and execute functions with automatic orchestration
|
|
80
|
+
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
81
|
+
- **Agents** — reusable, preconfigured assistants with tool auto-execution
|
|
82
|
+
- **Structured Outputs** — JSON schema-based responses
|
|
83
|
+
- **MCP Support** — integrate external tool servers dynamically
|
|
84
|
+
- **Multimodal Inputs** — text, images, audio, documents, URLs
|
|
85
|
+
- **Audio** — text-to-speech, transcription, translation
|
|
86
|
+
- **Images** — generation and editing
|
|
87
|
+
- **Files API** — upload and reference files in prompts
|
|
88
|
+
- **Embeddings** — vector generation for search and RAG
|
|
89
|
+
- **Vector Stores** — OpenAI-based retrieval workflows
|
|
90
|
+
- **Cost Tracking** — estimate usage without API calls
|
|
91
|
+
- **Observability** — tracing, logging, telemetry
|
|
92
|
+
- **Model Registry** — local metadata for capabilities, limits, pricing
|
|
93
|
+
|
|
94
|
+
## Quick Start
|
|
95
|
+
|
|
96
|
+
#### Concurrent Tools
|
|
97
|
+
|
|
98
|
+
llm.rb provides explicit concurrency control for tool execution. The
|
|
99
|
+
`wait(:thread)` method spawns each pending function in its own thread and waits
|
|
100
|
+
for all to complete. You can also use `:fiber` for cooperative multitasking or
|
|
101
|
+
`:task` for async/await patterns (requires the `async` gem). The context
|
|
102
|
+
automatically collects all results and reports them back to the LLM in a
|
|
103
|
+
single turn, maintaining conversation flow while parallelizing independent
|
|
104
|
+
operations:
|
|
53
105
|
|
|
54
106
|
```ruby
|
|
55
107
|
#!/usr/bin/env ruby
|
|
56
108
|
require "llm"
|
|
57
|
-
require "pp"
|
|
58
|
-
|
|
59
|
-
class Report < LLM::Schema
|
|
60
|
-
property :category, String, "Report category", required: true
|
|
61
|
-
property :summary, String, "Short summary", required: true
|
|
62
|
-
property :services, Array[String], "Impacted services", required: true
|
|
63
|
-
property :timestamp, String, "When it happened", optional: true
|
|
64
|
-
end
|
|
65
109
|
|
|
66
110
|
llm = LLM.openai(key: ENV["KEY"])
|
|
67
|
-
|
|
68
|
-
res = ses.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
|
|
69
|
-
pp res.content!
|
|
111
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: [FetchWeather, FetchNews, FetchStock])
|
|
70
112
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
# "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
|
|
75
|
-
# "services" => ["Database"],
|
|
76
|
-
# "timestamp" => "2024-06-05T10:42:00Z"
|
|
77
|
-
# }
|
|
113
|
+
# Execute multiple independent tools concurrently
|
|
114
|
+
ctx.talk("Summarize the weather, headlines, and stock price.")
|
|
115
|
+
ctx.talk(ctx.functions.wait(:thread)) while ctx.functions.any?
|
|
78
116
|
```
|
|
79
117
|
|
|
80
|
-
####
|
|
118
|
+
#### MCP
|
|
81
119
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
the result back on the next request. The following example implements a simple tool
|
|
87
|
-
that runs shell commands:
|
|
120
|
+
llm.rb integrates with the Model Context Protocol (MCP) to dynamically discover
|
|
121
|
+
and use tools from external servers. This example starts a filesystem MCP
|
|
122
|
+
server over stdio and makes its tools available to a context, enabling the LLM
|
|
123
|
+
to interact with the local file system through a standardized interface:
|
|
88
124
|
|
|
89
125
|
```ruby
|
|
90
126
|
#!/usr/bin/env ruby
|
|
91
127
|
require "llm"
|
|
92
128
|
|
|
93
|
-
class System < LLM::Tool
|
|
94
|
-
name "system"
|
|
95
|
-
description "Run a shell command"
|
|
96
|
-
param :command, String, "Command to execute", required: true
|
|
97
|
-
|
|
98
|
-
def call(command:)
|
|
99
|
-
{success: system(command)}
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
|
|
103
129
|
llm = LLM.openai(key: ENV["KEY"])
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
130
|
+
mcp = LLM.mcp(stdio: {argv: ["npx", "-y", "@modelcontextprotocol/server-filesystem", Dir.pwd]})
|
|
131
|
+
|
|
132
|
+
begin
|
|
133
|
+
mcp.start
|
|
134
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
135
|
+
ctx.talk("List the directories in this project.")
|
|
136
|
+
ctx.talk(ctx.functions.call) while ctx.functions.any?
|
|
137
|
+
ensure
|
|
138
|
+
mcp.stop
|
|
139
|
+
end
|
|
107
140
|
```
|
|
108
141
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
class provides a class-level DSL for defining reusable, preconfigured
|
|
113
|
-
assistants with defaults for model, tools, schema, and instructions.
|
|
114
|
-
Instructions are injected only on the first request, and unlike
|
|
115
|
-
[LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html),
|
|
116
|
-
an [LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
117
|
-
will automatically call tools when needed:
|
|
142
|
+
You can also connect to a hosted MCP server over HTTP. This is useful when the
|
|
143
|
+
server already runs remotely and exposes MCP through a URL instead of a local
|
|
144
|
+
process:
|
|
118
145
|
|
|
119
146
|
```ruby
|
|
120
147
|
#!/usr/bin/env ruby
|
|
121
148
|
require "llm"
|
|
122
149
|
|
|
123
|
-
class SystemAdmin < LLM::Agent
|
|
124
|
-
model "gpt-4.1"
|
|
125
|
-
instructions "You are a Linux system admin"
|
|
126
|
-
tools Shell
|
|
127
|
-
schema Result
|
|
128
|
-
end
|
|
129
|
-
|
|
130
150
|
llm = LLM.openai(key: ENV["KEY"])
|
|
131
|
-
|
|
132
|
-
|
|
151
|
+
mcp = LLM.mcp(http: {
|
|
152
|
+
url: "https://api.githubcopilot.com/mcp/",
|
|
153
|
+
headers: {"Authorization" => "Bearer #{ENV.fetch("GITHUB_PAT")}"}
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
begin
|
|
157
|
+
mcp.start
|
|
158
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
159
|
+
ctx.talk("List the available GitHub MCP toolsets.")
|
|
160
|
+
ctx.talk(ctx.functions.call) while ctx.functions.any?
|
|
161
|
+
ensure
|
|
162
|
+
mcp.stop
|
|
163
|
+
end
|
|
133
164
|
```
|
|
134
165
|
|
|
135
|
-
####
|
|
166
|
+
#### Streaming Chat
|
|
136
167
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
168
|
+
This example demonstrates llm.rb's streaming support. The `stream: $stdout`
|
|
169
|
+
parameter tells the context to write responses incrementally as they arrive
|
|
170
|
+
from the LLM. The `Context` object manages the conversation history, and
|
|
171
|
+
`talk()` sends your input while automatically appending both your message and
|
|
172
|
+
the LLM's response to the context. Streams accept any object with `#<<`,
|
|
173
|
+
giving you flexibility to pipe output to files, network sockets, or custom
|
|
174
|
+
buffers:
|
|
142
175
|
|
|
143
176
|
```ruby
|
|
144
177
|
#!/usr/bin/env ruby
|
|
145
178
|
require "llm"
|
|
146
179
|
|
|
147
180
|
llm = LLM.openai(key: ENV["KEY"])
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
user "Now double the speed for the same time."
|
|
181
|
+
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
182
|
+
loop do
|
|
183
|
+
print "> "
|
|
184
|
+
ctx.talk(STDIN.gets || break)
|
|
185
|
+
puts
|
|
154
186
|
end
|
|
155
|
-
|
|
156
|
-
ses.talk(prompt)
|
|
157
187
|
```
|
|
158
188
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
189
|
+
#### Tool Calling
|
|
190
|
+
|
|
191
|
+
Tools in llm.rb can be defined as classes inheriting from `LLM::Tool` or as
|
|
192
|
+
closures using `LLM.function`. When the LLM requests a tool call, the context
|
|
193
|
+
stores `Function` objects in `ctx.functions`. The `call()` method executes all
|
|
194
|
+
pending functions and returns their results to the LLM. Tools support
|
|
195
|
+
structured parameters with JSON Schema validation and automatically adapt to
|
|
196
|
+
each provider's API format (OpenAI, Anthropic, Google, etc.):
|
|
163
197
|
|
|
164
198
|
```ruby
|
|
165
199
|
#!/usr/bin/env ruby
|
|
166
200
|
require "llm"
|
|
167
201
|
|
|
168
|
-
|
|
169
|
-
|
|
202
|
+
class System < LLM::Tool
|
|
203
|
+
name "system"
|
|
204
|
+
description "Run a shell command"
|
|
205
|
+
param :command, String, "Command to execute", required: true
|
|
170
206
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
user "Now double the speed for the same time."
|
|
207
|
+
def call(command:)
|
|
208
|
+
{success: system(command)}
|
|
209
|
+
end
|
|
175
210
|
end
|
|
176
211
|
|
|
177
|
-
|
|
212
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
213
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: [System])
|
|
214
|
+
ctx.talk("Run `date`.")
|
|
215
|
+
ctx.talk(ctx.functions.call) while ctx.functions.any?
|
|
178
216
|
```
|
|
179
217
|
|
|
180
|
-
####
|
|
181
|
-
|
|
182
|
-
llm.rb is designed for threaded environments with throughput in mind.
|
|
183
|
-
Locks are used selectively, and localized state is preferred wherever
|
|
184
|
-
possible. Blanket locking across every class could help guarantee
|
|
185
|
-
correctness but it could also add contention, reduce throughput,
|
|
186
|
-
and increase complexity.
|
|
187
|
-
|
|
188
|
-
That's why we decided to optimize for both correctness and throughput
|
|
189
|
-
instead. An important part of that design is guaranteeing that
|
|
190
|
-
[LLM::Provider](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
|
|
191
|
-
is safe to share and use across threads. [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html) and
|
|
192
|
-
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) are
|
|
193
|
-
stateful objects that should be kept local to a single thread.
|
|
194
|
-
|
|
195
|
-
[LLM::Tracer](https://0x1eef.github.io/x/llm.rb/LLM/Tracer.html) and its
|
|
196
|
-
subclasses are also designed to be thread-local, which means that
|
|
197
|
-
`llm.tracer = ...` only impacts the current thread and must be set
|
|
198
|
-
again in each thread where a tracer is desired. This avoids contention
|
|
199
|
-
on tracer state, keeps tracing isolated per thread, and allows different
|
|
200
|
-
tracers to be used in different threads simultaneously.
|
|
201
|
-
|
|
202
|
-
So the recommended pattern is to keep one session, tracer or agent per
|
|
203
|
-
thread, and share a provider across multiple threads:
|
|
218
|
+
#### Structured Outputs
|
|
204
219
|
|
|
220
|
+
The `LLM::Schema` system lets you define JSON schemas that LLMs must follow.
|
|
221
|
+
Schemas can be defined as classes with `property` declarations or built
|
|
222
|
+
programmatically using a fluent interface. When you pass a schema to a context,
|
|
223
|
+
llm.rb automatically configures the provider's JSON mode and validates
|
|
224
|
+
responses against your schema. The `content!` method returns the parsed JSON
|
|
225
|
+
object, while errors are captured as structured data rather than raising
|
|
226
|
+
exceptions:
|
|
205
227
|
|
|
206
228
|
```ruby
|
|
207
229
|
#!/usr/bin/env ruby
|
|
208
230
|
require "llm"
|
|
231
|
+
require "pp"
|
|
209
232
|
|
|
210
|
-
|
|
211
|
-
|
|
233
|
+
class Report < LLM::Schema
|
|
234
|
+
property :category, Enum["performance", "security", "outage"], "Report category", required: true
|
|
235
|
+
property :summary, String, "Short summary", required: true
|
|
236
|
+
property :impact, OneOf[String, Integer], "Primary impact, as text or a count", required: true
|
|
237
|
+
property :services, Array[String], "Impacted services", required: true
|
|
238
|
+
property :timestamp, String, "When it happened", optional: true
|
|
239
|
+
end
|
|
212
240
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
res = ses.talk "#{x} + 5 = ?"
|
|
218
|
-
res.content!
|
|
219
|
-
end
|
|
220
|
-
end.map(&:value)
|
|
241
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
242
|
+
ctx = LLM::Context.new(llm, schema: Report)
|
|
243
|
+
res = ctx.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
|
|
244
|
+
pp res.content!
|
|
221
245
|
|
|
222
|
-
|
|
246
|
+
# {
|
|
247
|
+
# "category" => "performance",
|
|
248
|
+
# "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
|
|
249
|
+
# "impact" => "5% request timeouts",
|
|
250
|
+
# "services" => ["Database"],
|
|
251
|
+
# "timestamp" => "2024-06-05T10:42:00Z"
|
|
252
|
+
# }
|
|
223
253
|
```
|
|
224
254
|
|
|
225
|
-
##
|
|
226
|
-
|
|
227
|
-
#### General
|
|
228
|
-
- ✅ Unified API across providers
|
|
229
|
-
- 📦 Zero runtime deps (stdlib-only)
|
|
230
|
-
- 🧵 Thread-safe providers for multi-threaded workloads
|
|
231
|
-
- 🧩 Pluggable JSON adapters (JSON, Oj, Yajl, etc)
|
|
232
|
-
- 🧱 Builtin tracer API ([LLM::Tracer](https://0x1eef.github.io/x/llm.rb/LLM/Tracer.html))
|
|
255
|
+
## Providers
|
|
233
256
|
|
|
234
|
-
|
|
257
|
+
llm.rb supports multiple LLM providers with a unified API.
|
|
258
|
+
All providers share the same context, tool, and concurrency interfaces, making
|
|
259
|
+
it easy to switch between cloud and local models:
|
|
235
260
|
|
|
236
|
-
-
|
|
237
|
-
-
|
|
238
|
-
-
|
|
261
|
+
- **OpenAI** (`LLM.openai`)
|
|
262
|
+
- **Anthropic** (`LLM.anthropic`)
|
|
263
|
+
- **Google** (`LLM.google`)
|
|
264
|
+
- **DeepSeek** (`LLM.deepseek`)
|
|
265
|
+
- **xAI** (`LLM.xai`)
|
|
266
|
+
- **zAI** (`LLM.zai`)
|
|
267
|
+
- **Ollama** (`LLM.ollama`)
|
|
268
|
+
- **Llama.cpp** (`LLM.llamacpp`)
|
|
239
269
|
|
|
240
|
-
|
|
241
|
-
- 🧠 Stateless + stateful chat (completions + responses)
|
|
242
|
-
- 💾 Save and restore sessions across processes
|
|
243
|
-
- 🤖 Tool calling / function execution
|
|
244
|
-
- 🔁 Agent tool-call auto-execution (bounded)
|
|
245
|
-
- 🗂️ JSON Schema structured output
|
|
246
|
-
- 📡 Streaming responses
|
|
247
|
-
|
|
248
|
-
#### Media
|
|
249
|
-
- 🗣️ TTS, transcription, translation
|
|
250
|
-
- 🖼️ Image generation + editing
|
|
251
|
-
- 📎 Files API + prompt-aware file inputs
|
|
252
|
-
- 📦 Streaming multipart uploads (no full buffering)
|
|
253
|
-
- 💡 Multimodal prompts (text, documents, audio, images, video, URLs)
|
|
254
|
-
|
|
255
|
-
#### Embeddings
|
|
256
|
-
- 🧮 Embeddings
|
|
257
|
-
- 🧱 OpenAI vector stores (RAG)
|
|
270
|
+
## Production
|
|
258
271
|
|
|
259
|
-
####
|
|
260
|
-
- 📜 Models API
|
|
261
|
-
- 🔧 OpenAI responses + moderations
|
|
272
|
+
#### Ready for production
|
|
262
273
|
|
|
263
|
-
|
|
274
|
+
llm.rb is designed for production use from the ground up:
|
|
264
275
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
| **Multimodal Prompts** *(text, documents, audio, images, videos, URLs, etc)* | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
|
273
|
-
| **Files API** | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
274
|
-
| **Models API** | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
|
275
|
-
| **Audio (TTS / Transcribe / Translate)** | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
276
|
-
| **Image Generation & Editing** | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
|
277
|
-
| **Local Model Support** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ |
|
|
278
|
-
| **Vector Stores (RAG)** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
279
|
-
| **Responses** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
280
|
-
| **Moderations** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
276
|
+
- **Thread-safe providers** - Share `LLM::Provider` instances across your application
|
|
277
|
+
- **Thread-local contexts** - Keep `LLM::Context` instances thread-local for state isolation
|
|
278
|
+
- **Cost tracking** - Know your spend before the bill arrives
|
|
279
|
+
- **Observability** - Built-in tracing with OpenTelemetry support
|
|
280
|
+
- **Persistence** - Save and restore contexts across processes
|
|
281
|
+
- **Performance** - Swap JSON adapters and enable HTTP connection pooling
|
|
282
|
+
- **Error handling** - Structured errors, not unpredictable exceptions
|
|
281
283
|
|
|
282
|
-
|
|
284
|
+
#### Tracing
|
|
283
285
|
|
|
286
|
+
llm.rb includes built-in tracers for local logging, OpenTelemetry, and
|
|
287
|
+
LangSmith. Assign a tracer to a provider and all context requests and tool
|
|
288
|
+
calls made through that provider will be instrumented. Tracers are local to
|
|
289
|
+
the current fiber, so the same provider can use different tracers in different
|
|
290
|
+
concurrent tasks without interfering with each other.
|
|
284
291
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
### Providers
|
|
288
|
-
|
|
289
|
-
#### LLM::Provider
|
|
290
|
-
|
|
291
|
-
All providers inherit from [LLM::Provider](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html) –
|
|
292
|
-
they share a common interface and set of functionality. Each provider can be instantiated
|
|
293
|
-
using an API key (if required) and an optional set of configuration options via
|
|
294
|
-
[the singleton methods of LLM](https://0x1eef.github.io/x/llm.rb/LLM.html). For example:
|
|
295
|
-
|
|
296
|
-
```ruby
|
|
297
|
-
#!/usr/bin/env ruby
|
|
298
|
-
require "llm"
|
|
299
|
-
|
|
300
|
-
##
|
|
301
|
-
# remote providers
|
|
302
|
-
llm = LLM.openai(key: "yourapikey")
|
|
303
|
-
llm = LLM.gemini(key: "yourapikey")
|
|
304
|
-
llm = LLM.anthropic(key: "yourapikey")
|
|
305
|
-
llm = LLM.xai(key: "yourapikey")
|
|
306
|
-
llm = LLM.zai(key: "yourapikey")
|
|
307
|
-
llm = LLM.deepseek(key: "yourapikey")
|
|
308
|
-
|
|
309
|
-
##
|
|
310
|
-
# local providers
|
|
311
|
-
llm = LLM.ollama(key: nil)
|
|
312
|
-
llm = LLM.llamacpp(key: nil)
|
|
313
|
-
```
|
|
314
|
-
|
|
315
|
-
#### LLM::Response
|
|
316
|
-
|
|
317
|
-
All provider methods that perform requests return an
|
|
318
|
-
[LLM::Response](https://0x1eef.github.io/x/llm.rb/LLM/Response.html).
|
|
319
|
-
If the HTTP response is JSON (`content-type: application/json`),
|
|
320
|
-
`response.body` is parsed into an
|
|
321
|
-
[LLM::Object](https://0x1eef.github.io/x/llm.rb/LLM/Object.html) for
|
|
322
|
-
dot-access. For non-JSON responses, `response.body` is a raw string.
|
|
323
|
-
It is also possible to access top-level keys directly on the response
|
|
324
|
-
(eg: `res.object` instead of `res.body.object`):
|
|
292
|
+
Use the logger tracer when you want structured logs through Ruby's standard
|
|
293
|
+
library:
|
|
325
294
|
|
|
326
295
|
```ruby
|
|
327
296
|
#!/usr/bin/env ruby
|
|
328
297
|
require "llm"
|
|
329
298
|
|
|
330
299
|
llm = LLM.openai(key: ENV["KEY"])
|
|
331
|
-
|
|
332
|
-
puts res.object
|
|
333
|
-
puts res.data.first.id
|
|
334
|
-
```
|
|
335
|
-
|
|
336
|
-
#### Persistence
|
|
337
|
-
|
|
338
|
-
The llm.rb library can maintain a process-wide connection pool
|
|
339
|
-
for each provider that is instantiated. This feature can improve
|
|
340
|
-
performance but it is optional, the implementation depends on
|
|
341
|
-
[net-http-persistent](https://github.com/drbrain/net-http-persistent),
|
|
342
|
-
and the gem should be installed separately:
|
|
343
|
-
|
|
344
|
-
```ruby
|
|
345
|
-
#!/usr/bin/env ruby
|
|
346
|
-
require "llm"
|
|
300
|
+
llm.tracer = LLM::Tracer::Logger.new(llm, io: $stdout)
|
|
347
301
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
res2 = llm.responses.create "message 2", previous_response_id: res1.response_id
|
|
351
|
-
res3 = llm.responses.create "message 3", previous_response_id: res2.response_id
|
|
352
|
-
puts res3.output_text
|
|
302
|
+
ctx = LLM::Context.new(llm)
|
|
303
|
+
ctx.talk("Hello")
|
|
353
304
|
```
|
|
354
305
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
The llm.rb library includes telemetry support through its tracer API, and it
|
|
358
|
-
can be used to trace LLM requests. It can be useful for debugging, monitoring,
|
|
359
|
-
and observability. The primary use case in mind is integration with tools like
|
|
360
|
-
[LangSmith](https://www.langsmith.com/).
|
|
361
|
-
|
|
362
|
-
It is worth mentioning that tracers are local to a thread, and they
|
|
363
|
-
should be configured per thread. That means that `llm.tracer = LLM::Tracer::Telemetry.new(llm)`
|
|
364
|
-
only impacts the current thread, and it should be repeated in each thread where
|
|
365
|
-
tracing is required.
|
|
366
|
-
|
|
367
|
-
The telemetry implementation uses the [opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby)
|
|
368
|
-
and is based on the [gen-ai telemetry spec(s)](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/).
|
|
369
|
-
This feature is optional, disabled by default, and the [opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby)
|
|
370
|
-
gem should be installed separately. Please also note that llm.rb will take care of
|
|
371
|
-
loading and configuring the [opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby)
|
|
372
|
-
library for you, and llm.rb configures an in-memory exporter that doesn't have
|
|
373
|
-
external dependencies by default:
|
|
306
|
+
Use the telemetry tracer when you want OpenTelemetry spans. This requires the
|
|
307
|
+
`opentelemetry-sdk` gem, and exporters such as OTLP can be added separately:
|
|
374
308
|
|
|
375
309
|
```ruby
|
|
376
310
|
#!/usr/bin/env ruby
|
|
377
311
|
require "llm"
|
|
378
|
-
require "pp"
|
|
379
312
|
|
|
380
313
|
llm = LLM.openai(key: ENV["KEY"])
|
|
381
314
|
llm.tracer = LLM::Tracer::Telemetry.new(llm)
|
|
382
315
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
ses.tracer.spans.each { |span| pp span }
|
|
316
|
+
ctx = LLM::Context.new(llm)
|
|
317
|
+
ctx.talk("Hello")
|
|
318
|
+
pp llm.tracer.spans
|
|
387
319
|
```
|
|
388
320
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
multiple observability tools. By default the export is batched in the background,
|
|
392
|
-
and happens automatically but short lived scripts might need to
|
|
393
|
-
[explicitly flush](https://0x1eef.github.io/x/llm.rb/LLM/Tracer/Telemetry#flush!-instance_method)
|
|
394
|
-
the exporter before they exit – otherwise some telemetry data could be lost:
|
|
395
|
-
|
|
396
|
-
```ruby
|
|
397
|
-
#!/usr/bin/env ruby
|
|
398
|
-
require "llm"
|
|
399
|
-
require "opentelemetry-exporter-otlp"
|
|
400
|
-
|
|
401
|
-
endpoint = "https://api.smith.langchain.com/otel/v1/traces"
|
|
402
|
-
exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(endpoint:)
|
|
403
|
-
|
|
404
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
405
|
-
llm.tracer = LLM::Tracer::Telemetry.new(llm, exporter:)
|
|
406
|
-
|
|
407
|
-
ses = LLM::Session.new(llm)
|
|
408
|
-
ses.talk "hello"
|
|
409
|
-
ses.talk "how are you?"
|
|
410
|
-
|
|
411
|
-
at_exit do
|
|
412
|
-
# Helpful for short-lived scripts, otherwise the exporter
|
|
413
|
-
# might not have time to flush pending telemetry data
|
|
414
|
-
ses.tracer.flush!
|
|
415
|
-
end
|
|
416
|
-
```
|
|
417
|
-
|
|
418
|
-
#### Logger
|
|
419
|
-
|
|
420
|
-
The llm.rb library includes simple logging support through its
|
|
421
|
-
tracer API, and Ruby's standard library ([ruby/logger](https://github.com/ruby/logger)).
|
|
422
|
-
This feature is optional, disabled by default, and it can be useful for debugging and/or
|
|
423
|
-
monitoring requests to LLM providers. The `path` or `io` options can be used to choose
|
|
424
|
-
where logs are written, and by default it is set to `$stdout`. Like other tracers,
|
|
425
|
-
the logger tracer is local to a thread:
|
|
321
|
+
Use the LangSmith tracer when you want LangSmith-compatible metadata and trace
|
|
322
|
+
grouping on top of the telemetry tracer:
|
|
426
323
|
|
|
427
324
|
```ruby
|
|
428
325
|
#!/usr/bin/env ruby
|
|
429
326
|
require "llm"
|
|
430
327
|
|
|
431
328
|
llm = LLM.openai(key: ENV["KEY"])
|
|
432
|
-
llm.tracer = LLM::Tracer::
|
|
329
|
+
llm.tracer = LLM::Tracer::Langsmith.new(
|
|
330
|
+
llm,
|
|
331
|
+
metadata: {env: "dev"},
|
|
332
|
+
tags: ["chatbot"]
|
|
333
|
+
)
|
|
433
334
|
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
ses.talk "Adios."
|
|
335
|
+
ctx = LLM::Context.new(llm)
|
|
336
|
+
ctx.talk("Hello")
|
|
437
337
|
```
|
|
438
338
|
|
|
439
|
-
####
|
|
339
|
+
#### Thread Safety
|
|
440
340
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
– inclusive of tool metadata as well:
|
|
341
|
+
llm.rb uses Ruby's `Monitor` class to ensure thread safety at the provider
|
|
342
|
+
level, allowing you to share a single provider instance across multiple threads
|
|
343
|
+
while maintaining state isolation through thread-local contexts. This design
|
|
344
|
+
enables efficient resource sharing while preventing race conditions in
|
|
345
|
+
concurrent applications:
|
|
447
346
|
|
|
448
|
-
* Process 1
|
|
449
347
|
```ruby
|
|
450
348
|
#!/usr/bin/env ruby
|
|
451
349
|
require "llm"
|
|
452
350
|
|
|
351
|
+
# Thread-safe providers - create once, use everywhere
|
|
453
352
|
llm = LLM.openai(key: ENV["KEY"])
|
|
454
|
-
ses = LLM::Session.new(llm)
|
|
455
|
-
ses.talk "Howdy partner"
|
|
456
|
-
ses.talk "I'll see you later"
|
|
457
|
-
ses.save(path: "session.json")
|
|
458
|
-
```
|
|
459
|
-
* Process 2
|
|
460
|
-
```ruby
|
|
461
|
-
#!/usr/bin/env ruby
|
|
462
|
-
require "llm"
|
|
463
|
-
require "pp"
|
|
464
353
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
```
|
|
471
|
-
|
|
472
|
-
But how does it work without a file ? The [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html)
|
|
473
|
-
class implements `#to_json` and it can be used to obtain a JSON representation
|
|
474
|
-
of a session that can be stored in a `jsonb` column in PostgreSQL, or any
|
|
475
|
-
other storage backend. The session can then be restored from the JSON
|
|
476
|
-
representation via the restore method and its `string` argument:
|
|
477
|
-
|
|
478
|
-
```ruby
|
|
479
|
-
#!/usr/bin/env ruby
|
|
480
|
-
require "llm"
|
|
354
|
+
# Each thread should have its own context for state isolation
|
|
355
|
+
Thread.new do
|
|
356
|
+
ctx = LLM::Context.new(llm) # Thread-local context
|
|
357
|
+
ctx.talk("Hello from thread 1")
|
|
358
|
+
end
|
|
481
359
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
json = ses1.to_json
|
|
488
|
-
ses2 = LLM::Session.new(llm)
|
|
489
|
-
ses2.restore(string: json)
|
|
490
|
-
ses2.talk "Howdy partner. I'm back"
|
|
360
|
+
Thread.new do
|
|
361
|
+
ctx = LLM::Context.new(llm) # Thread-local context
|
|
362
|
+
ctx.talk("Hello from thread 2")
|
|
363
|
+
end
|
|
491
364
|
```
|
|
492
365
|
|
|
493
|
-
|
|
366
|
+
#### Performance Tuning
|
|
494
367
|
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
Its most notable feature is that it can act as a closure and has access to
|
|
501
|
-
its surrounding scope, which can be useful in some situations:
|
|
368
|
+
llm.rb's JSON adapter system lets you swap JSON libraries for better
|
|
369
|
+
performance in high-throughput applications. The library supports stdlib JSON,
|
|
370
|
+
Oj, and Yajl, with Oj typically offering the best performance. Additionally,
|
|
371
|
+
you can enable HTTP connection pooling using the optional `net-http-persistent`
|
|
372
|
+
gem to reduce connection overhead in production environments:
|
|
502
373
|
|
|
503
374
|
```ruby
|
|
504
375
|
#!/usr/bin/env ruby
|
|
505
376
|
require "llm"
|
|
506
377
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
fn.description "Run a shell command"
|
|
510
|
-
fn.params do |schema|
|
|
511
|
-
schema.object(command: schema.string.required)
|
|
512
|
-
end
|
|
513
|
-
fn.define do |command:|
|
|
514
|
-
ro, wo = IO.pipe
|
|
515
|
-
re, we = IO.pipe
|
|
516
|
-
Process.wait Process.spawn(command, out: wo, err: we)
|
|
517
|
-
[wo,we].each(&:close)
|
|
518
|
-
{stderr: re.read, stdout: ro.read}
|
|
519
|
-
end
|
|
520
|
-
end
|
|
521
|
-
|
|
522
|
-
ses = LLM::Session.new(llm, tools: [tool])
|
|
523
|
-
ses.talk "Your task is to run shell commands via a tool.", role: :user
|
|
524
|
-
|
|
525
|
-
ses.talk "What is the current date?", role: :user
|
|
526
|
-
ses.talk ses.functions.map(&:call) # report return value to the LLM
|
|
378
|
+
# Swap JSON libraries for better performance
|
|
379
|
+
LLM.json = :oj # Use Oj for faster JSON parsing
|
|
527
380
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
##
|
|
532
|
-
# {stderr: "", stdout: "Thu May 1 10:01:02 UTC 2025"}
|
|
533
|
-
# {stderr: "", stdout: "FreeBSD"}
|
|
381
|
+
# Enable HTTP connection pooling for high-throughput applications
|
|
382
|
+
llm = LLM.openai(key: ENV["KEY"]).persist! # Uses net-http-persistent when available
|
|
534
383
|
```
|
|
535
384
|
|
|
536
|
-
####
|
|
537
|
-
|
|
538
|
-
The [LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) class can be used
|
|
539
|
-
to implement a [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
|
|
540
|
-
as a class. Under the hood, a subclass of [LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html)
|
|
541
|
-
wraps an instance of [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
|
|
542
|
-
and delegates to it.
|
|
385
|
+
#### Model Registry
|
|
543
386
|
|
|
544
|
-
|
|
545
|
-
and
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
387
|
+
llm.rb includes a local model registry that provides metadata about model
|
|
388
|
+
capabilities, pricing, and limits without requiring API calls. The registry is
|
|
389
|
+
shipped with the gem and sourced from https://models.dev, giving you access to
|
|
390
|
+
up-to-date information about context windows, token costs, and supported
|
|
391
|
+
modalities for each provider:
|
|
549
392
|
|
|
550
393
|
```ruby
|
|
551
394
|
#!/usr/bin/env ruby
|
|
552
395
|
require "llm"
|
|
553
396
|
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
def call(command:)
|
|
560
|
-
ro, wo = IO.pipe
|
|
561
|
-
re, we = IO.pipe
|
|
562
|
-
Process.wait Process.spawn(command, out: wo, err: we)
|
|
563
|
-
[wo,we].each(&:close)
|
|
564
|
-
{stderr: re.read, stdout: ro.read}
|
|
565
|
-
end
|
|
566
|
-
end
|
|
567
|
-
|
|
568
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
569
|
-
ses = LLM::Session.new(llm, tools: [System])
|
|
570
|
-
ses.talk "Your task is to run shell commands via a tool.", role: :user
|
|
571
|
-
|
|
572
|
-
ses.talk "What is the current date?", role: :user
|
|
573
|
-
ses.talk ses.functions.map(&:call) # report return value to the LLM
|
|
574
|
-
|
|
575
|
-
ses.talk "What operating system am I running?", role: :user
|
|
576
|
-
ses.talk ses.functions.map(&:call) # report return value to the LLM
|
|
577
|
-
|
|
578
|
-
##
|
|
579
|
-
# {stderr: "", stdout: "Thu May 1 10:01:02 UTC 2025"}
|
|
580
|
-
# {stderr: "", stdout: "FreeBSD"}
|
|
397
|
+
# Access model metadata, capabilities, and pricing
|
|
398
|
+
registry = LLM.registry_for(:openai)
|
|
399
|
+
model_info = registry.limit(model: "gpt-4.1")
|
|
400
|
+
puts "Context window: #{model_info.context} tokens"
|
|
401
|
+
puts "Cost: $#{model_info.cost.input}/1M input tokens"
|
|
581
402
|
```
|
|
582
403
|
|
|
583
|
-
|
|
404
|
+
## More Examples
|
|
584
405
|
|
|
585
|
-
####
|
|
406
|
+
#### Responses API
|
|
586
407
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
it has been uploaded. The file (a specialized instance of
|
|
591
|
-
[LLM::Response](https://0x1eef.github.io/x/llm.rb/LLM/Response.html)
|
|
592
|
-
) is given as part of a prompt that is understood by llm.rb:
|
|
408
|
+
llm.rb also supports OpenAI's Responses API through `llm.responses` and
|
|
409
|
+
`ctx.respond`. This API can maintain response state server-side and can reduce
|
|
410
|
+
how much conversation state needs to be sent on each turn:
|
|
593
411
|
|
|
594
412
|
```ruby
|
|
595
413
|
#!/usr/bin/env ruby
|
|
596
414
|
require "llm"
|
|
597
|
-
require "pp"
|
|
598
415
|
|
|
599
416
|
llm = LLM.openai(key: ENV["KEY"])
|
|
600
|
-
|
|
601
|
-
file = llm.files.create(file: "/tmp/llm-book.pdf")
|
|
602
|
-
res = ses.talk ["Tell me about this file", file]
|
|
603
|
-
pp res.content
|
|
604
|
-
```
|
|
605
|
-
|
|
606
|
-
### Prompts
|
|
417
|
+
ctx = LLM::Context.new(llm)
|
|
607
418
|
|
|
608
|
-
|
|
419
|
+
ctx.respond("Your task is to answer the user's questions", role: :developer)
|
|
420
|
+
res = ctx.respond("What is the capital of France?")
|
|
421
|
+
puts res.output_text
|
|
422
|
+
```
|
|
609
423
|
|
|
610
|
-
|
|
611
|
-
and URLs. With llm.rb you pass those inputs by tagging them with one of
|
|
612
|
-
the following methods. And for multipart prompts, we can pass an array
|
|
613
|
-
where each element is a part of the input. See the example below for
|
|
614
|
-
details, in the meantime here are the methods to know for multimodal
|
|
615
|
-
inputs:
|
|
424
|
+
#### Context Persistence
|
|
616
425
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
426
|
+
Contexts can be serialized and restored across process boundaries. This makes
|
|
427
|
+
it possible to persist conversation state in a file, database, or queue and
|
|
428
|
+
resume work later:
|
|
620
429
|
|
|
621
430
|
```ruby
|
|
622
431
|
#!/usr/bin/env ruby
|
|
623
432
|
require "llm"
|
|
624
433
|
|
|
625
434
|
llm = LLM.openai(key: ENV["KEY"])
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
435
|
+
ctx = LLM::Context.new(llm)
|
|
436
|
+
ctx.talk("Hello")
|
|
437
|
+
ctx.talk("Remember that my favorite language is Ruby")
|
|
438
|
+
ctx.save(path: "context.json")
|
|
439
|
+
|
|
440
|
+
restored = LLM::Context.new(llm)
|
|
441
|
+
restored.restore(path: "context.json")
|
|
442
|
+
res = restored.talk("What is my favorite language?")
|
|
443
|
+
puts res.content
|
|
630
444
|
```
|
|
631
445
|
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
#### Speech
|
|
446
|
+
#### Agents
|
|
635
447
|
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
448
|
+
Agents in llm.rb are reusable, preconfigured assistants that automatically
|
|
449
|
+
execute tool calls and maintain conversation state. Unlike contexts which
|
|
450
|
+
require manual tool execution, agents automatically handle the tool call loop,
|
|
451
|
+
making them ideal for autonomous workflows where you want the LLM to
|
|
452
|
+
independently use available tools to accomplish tasks:
|
|
641
453
|
|
|
642
454
|
```ruby
|
|
643
455
|
#!/usr/bin/env ruby
|
|
644
456
|
require "llm"
|
|
645
457
|
|
|
458
|
+
class SystemAdmin < LLM::Agent
|
|
459
|
+
model "gpt-4.1"
|
|
460
|
+
instructions "You are a Linux system admin"
|
|
461
|
+
tools Shell
|
|
462
|
+
schema Result
|
|
463
|
+
end
|
|
464
|
+
|
|
646
465
|
llm = LLM.openai(key: ENV["KEY"])
|
|
647
|
-
|
|
648
|
-
|
|
466
|
+
agent = SystemAdmin.new(llm)
|
|
467
|
+
res = agent.talk("Run 'date'")
|
|
649
468
|
```
|
|
650
469
|
|
|
651
|
-
####
|
|
470
|
+
#### Cost Tracking
|
|
652
471
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
472
|
+
llm.rb provides built-in cost estimation that works without making additional
|
|
473
|
+
API calls. The cost tracking system uses the local model registry to calculate
|
|
474
|
+
estimated costs based on token usage, giving you visibility into spending
|
|
475
|
+
before bills arrive. This is particularly useful for monitoring usage in
|
|
476
|
+
production applications and setting budget alerts:
|
|
657
477
|
|
|
658
478
|
```ruby
|
|
659
479
|
#!/usr/bin/env ruby
|
|
660
480
|
require "llm"
|
|
661
481
|
|
|
662
482
|
llm = LLM.openai(key: ENV["KEY"])
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
483
|
+
ctx = LLM::Context.new(llm)
|
|
484
|
+
ctx.talk "Hello"
|
|
485
|
+
puts "Estimated cost so far: $#{ctx.cost}"
|
|
486
|
+
ctx.talk "Tell me a joke"
|
|
487
|
+
puts "Estimated cost so far: $#{ctx.cost}"
|
|
667
488
|
```
|
|
668
489
|
|
|
669
|
-
####
|
|
490
|
+
#### Multimodal Prompts
|
|
670
491
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
and at the time of writing, it can only translate to English:
|
|
492
|
+
Contexts provide helpers for composing multimodal prompts from URLs, local
|
|
493
|
+
files, and provider-managed remote files. These tagged objects let providers
|
|
494
|
+
adapt the input into the format they expect:
|
|
675
495
|
|
|
676
496
|
```ruby
|
|
677
497
|
#!/usr/bin/env ruby
|
|
678
498
|
require "llm"
|
|
679
499
|
|
|
680
500
|
llm = LLM.openai(key: ENV["KEY"])
|
|
681
|
-
|
|
682
|
-
file: File.join(Dir.home, "bomdia.mp3")
|
|
683
|
-
)
|
|
684
|
-
puts res.text # => "Good morning."
|
|
685
|
-
```
|
|
686
|
-
|
|
687
|
-
### Images
|
|
501
|
+
ctx = LLM::Context.new(llm)
|
|
688
502
|
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
Some but not all LLM providers implement image generation capabilities that
|
|
692
|
-
can create new images from a prompt, or edit an existing image with a
|
|
693
|
-
prompt. The following example uses the OpenAI provider to create an
|
|
694
|
-
image of a dog on a rocket to the moon. The image is then written to
|
|
695
|
-
`${HOME}/dogonrocket.png` as the final step:
|
|
696
|
-
|
|
697
|
-
```ruby
|
|
698
|
-
#!/usr/bin/env ruby
|
|
699
|
-
require "llm"
|
|
700
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
701
|
-
res = llm.images.create(prompt: "a dog on a rocket to the moon")
|
|
702
|
-
IO.copy_stream res.images[0], File.join(Dir.home, "dogonrocket.png")
|
|
503
|
+
res = ctx.talk ["Describe this image", ctx.image_url("https://example.com/cat.jpg")]
|
|
504
|
+
puts res.content
|
|
703
505
|
```
|
|
704
506
|
|
|
705
|
-
####
|
|
507
|
+
#### Audio Generation
|
|
706
508
|
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
509
|
+
llm.rb supports OpenAI's audio API for text-to-speech generation, allowing you
|
|
510
|
+
to create speech from text with configurable voices and output formats. The
|
|
511
|
+
audio API returns binary audio data that can be streamed directly to files or
|
|
512
|
+
other IO objects, enabling integration with multimedia applications:
|
|
711
513
|
|
|
712
514
|
```ruby
|
|
713
515
|
#!/usr/bin/env ruby
|
|
714
516
|
require "llm"
|
|
517
|
+
|
|
715
518
|
llm = LLM.openai(key: ENV["KEY"])
|
|
716
|
-
res = llm.
|
|
717
|
-
|
|
718
|
-
prompt: "add a hat to the logo",
|
|
719
|
-
)
|
|
720
|
-
IO.copy_stream res.images[0], File.join(Dir.home, "logo-with-hat.png")
|
|
519
|
+
res = llm.audio.create_speech(input: "Hello world")
|
|
520
|
+
IO.copy_stream res.audio, File.join(Dir.home, "hello.mp3")
|
|
721
521
|
```
|
|
722
522
|
|
|
723
|
-
####
|
|
523
|
+
#### Image Generation
|
|
724
524
|
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
525
|
+
llm.rb provides access to OpenAI's DALL-E image generation API through a
|
|
526
|
+
unified interface. The API supports multiple response formats including
|
|
527
|
+
base64-encoded images and temporary URLs, with automatic handling of binary
|
|
528
|
+
data streaming for efficient file operations:
|
|
729
529
|
|
|
730
530
|
```ruby
|
|
731
531
|
#!/usr/bin/env ruby
|
|
732
532
|
require "llm"
|
|
533
|
+
|
|
733
534
|
llm = LLM.openai(key: ENV["KEY"])
|
|
734
|
-
res = llm.images.
|
|
735
|
-
|
|
736
|
-
n: 5
|
|
737
|
-
)
|
|
738
|
-
res.images.each.with_index do |image, index|
|
|
739
|
-
IO.copy_stream image,
|
|
740
|
-
File.join(Dir.home, "logo-variation#{index}.png")
|
|
741
|
-
end
|
|
535
|
+
res = llm.images.create(prompt: "a dog on a rocket to the moon")
|
|
536
|
+
IO.copy_stream res.images[0], File.join(Dir.home, "dogonrocket.png")
|
|
742
537
|
```
|
|
743
538
|
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
#### Text
|
|
539
|
+
#### Embeddings
|
|
747
540
|
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
541
|
+
llm.rb's embedding API generates vector representations of text for semantic
|
|
542
|
+
search and retrieval-augmented generation (RAG) workflows. The API supports
|
|
543
|
+
batch processing of multiple inputs and returns normalized vectors suitable for
|
|
544
|
+
vector similarity operations, with consistent dimensionality across providers:
|
|
752
545
|
|
|
753
546
|
```ruby
|
|
754
547
|
#!/usr/bin/env ruby
|
|
@@ -760,52 +553,32 @@ puts res.class
|
|
|
760
553
|
puts res.embeddings.size
|
|
761
554
|
puts res.embeddings[0].size
|
|
762
555
|
|
|
763
|
-
##
|
|
764
556
|
# LLM::Response
|
|
765
557
|
# 3
|
|
766
558
|
# 1536
|
|
767
559
|
```
|
|
768
560
|
|
|
769
|
-
|
|
561
|
+
## Real-World Example: Relay
|
|
770
562
|
|
|
771
|
-
|
|
563
|
+
See how these pieces come together in a complete application architecture with
|
|
564
|
+
[Relay](https://github.com/llmrb/relay), a production-ready LLM application
|
|
565
|
+
built on llm.rb that demonstrates:
|
|
772
566
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
567
|
+
- Context management across requests
|
|
568
|
+
- Tool composition and execution
|
|
569
|
+
- Concurrent workflows
|
|
570
|
+
- Cost tracking and observability
|
|
571
|
+
- Production deployment patterns
|
|
777
572
|
|
|
778
|
-
|
|
779
|
-
#!/usr/bin/env ruby
|
|
780
|
-
require "llm"
|
|
781
|
-
require "pp"
|
|
782
|
-
|
|
783
|
-
##
|
|
784
|
-
# List all models
|
|
785
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
786
|
-
llm.models.all.each do |model|
|
|
787
|
-
puts "model: #{model.id}"
|
|
788
|
-
end
|
|
789
|
-
|
|
790
|
-
##
|
|
791
|
-
# Select a model
|
|
792
|
-
model = llm.models.all.find { |m| m.id == "gpt-3.5-turbo" }
|
|
793
|
-
ses = LLM::Session.new(llm, model: model.id)
|
|
794
|
-
res = ses.talk "Hello #{model.id} :)"
|
|
795
|
-
pp res.content
|
|
796
|
-
```
|
|
573
|
+
Watch the screencast:
|
|
797
574
|
|
|
798
|
-
|
|
575
|
+
[](https://www.youtube.com/watch?v=x1K4wMeO_QA)
|
|
799
576
|
|
|
800
|
-
|
|
577
|
+
## Installation
|
|
801
578
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
* [GitHub.com](https://github.com/llmrb/llm.rb)
|
|
807
|
-
* [GitLab.com](https://gitlab.com/llmrb/llm.rb)
|
|
808
|
-
* [Codeberg.org](https://codeberg.org/llmrb/llm.rb)
|
|
579
|
+
```bash
|
|
580
|
+
gem install llm.rb
|
|
581
|
+
```
|
|
809
582
|
|
|
810
583
|
## License
|
|
811
584
|
|