llm.rb 4.8.0 → 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +337 -590
- data/data/anthropic.json +770 -0
- data/data/deepseek.json +75 -0
- data/data/google.json +1050 -0
- data/data/openai.json +1421 -0
- data/data/xai.json +792 -0
- data/data/zai.json +330 -0
- data/lib/llm/agent.rb +42 -41
- data/lib/llm/bot.rb +1 -263
- data/lib/llm/buffer.rb +7 -0
- data/lib/llm/{session → context}/deserializer.rb +4 -3
- data/lib/llm/context.rb +292 -0
- data/lib/llm/cost.rb +26 -0
- data/lib/llm/error.rb +8 -0
- data/lib/llm/function/array.rb +61 -0
- data/lib/llm/function/fiber_group.rb +91 -0
- data/lib/llm/function/task_group.rb +89 -0
- data/lib/llm/function/thread_group.rb +94 -0
- data/lib/llm/function.rb +75 -10
- data/lib/llm/mcp/command.rb +108 -0
- data/lib/llm/mcp/error.rb +31 -0
- data/lib/llm/mcp/pipe.rb +82 -0
- data/lib/llm/mcp/rpc.rb +118 -0
- data/lib/llm/mcp/transport/stdio.rb +85 -0
- data/lib/llm/mcp.rb +102 -0
- data/lib/llm/message.rb +13 -11
- data/lib/llm/model.rb +2 -2
- data/lib/llm/prompt.rb +17 -7
- data/lib/llm/provider.rb +32 -17
- data/lib/llm/providers/anthropic/files.rb +3 -3
- data/lib/llm/providers/anthropic.rb +19 -4
- data/lib/llm/providers/deepseek.rb +10 -3
- data/lib/llm/providers/{gemini → google}/audio.rb +6 -6
- data/lib/llm/providers/{gemini → google}/error_handler.rb +2 -2
- data/lib/llm/providers/{gemini → google}/files.rb +11 -11
- data/lib/llm/providers/{gemini → google}/images.rb +7 -7
- data/lib/llm/providers/{gemini → google}/models.rb +5 -5
- data/lib/llm/providers/{gemini → google}/request_adapter/completion.rb +7 -3
- data/lib/llm/providers/{gemini → google}/request_adapter.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/completion.rb +7 -7
- data/lib/llm/providers/{gemini → google}/response_adapter/embedding.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/file.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/files.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/image.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/models.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/web_search.rb +2 -2
- data/lib/llm/providers/{gemini → google}/response_adapter.rb +8 -8
- data/lib/llm/providers/{gemini → google}/stream_parser.rb +3 -3
- data/lib/llm/providers/{gemini.rb → google.rb} +41 -26
- data/lib/llm/providers/llamacpp.rb +10 -3
- data/lib/llm/providers/ollama.rb +19 -4
- data/lib/llm/providers/openai/files.rb +3 -3
- data/lib/llm/providers/openai/response_adapter/completion.rb +9 -1
- data/lib/llm/providers/openai/response_adapter/responds.rb +9 -1
- data/lib/llm/providers/openai/responses.rb +9 -1
- data/lib/llm/providers/openai/stream_parser.rb +2 -0
- data/lib/llm/providers/openai.rb +19 -4
- data/lib/llm/providers/xai.rb +10 -3
- data/lib/llm/providers/zai.rb +9 -2
- data/lib/llm/registry.rb +81 -0
- data/lib/llm/schema/parser.rb +109 -0
- data/lib/llm/schema.rb +4 -0
- data/lib/llm/server_tool.rb +5 -5
- data/lib/llm/session.rb +10 -1
- data/lib/llm/tool.rb +85 -4
- data/lib/llm/tracer/logger.rb +1 -1
- data/lib/llm/tracer/telemetry.rb +7 -7
- data/lib/llm/tracer.rb +3 -3
- data/lib/llm/usage.rb +5 -0
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +39 -6
- data/llm.gemspec +45 -8
- metadata +81 -28
data/README.md
CHANGED
|
@@ -4,751 +4,518 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.9.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
11
11
|
|
|
12
|
-
llm.rb is a
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
llm.rb is a Ruby-centric toolkit for building real LLM-powered systems — where
|
|
13
|
+
LLMs are part of your architecture, not just API calls. It gives you explicit
|
|
14
|
+
control over contexts, tools, concurrency, and providers, so you can compose
|
|
15
|
+
reliable, production-ready workflows without hidden abstractions.
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
by the license. Built with [good music](https://www.youtube.com/watch?v=SNvaqwTbn14)
|
|
21
|
-
and a lot of ☕️.
|
|
17
|
+
Built for engineers who want to understand and control their LLM systems. No
|
|
18
|
+
frameworks, no hidden magic — just composable primitives for building real
|
|
19
|
+
applications, from scripts to full systems like [Relay](https://github.com/llmrb/relay).
|
|
22
20
|
|
|
23
|
-
|
|
21
|
+
Jump to [Quick start](#quick-start), discover its [capabilities](#capabilities), read about
|
|
22
|
+
its [architecture](#architecture--execution-model) or watch the
|
|
23
|
+
[Screencast](https://www.youtube.com/watch?v=x1K4wMeO_QA) for a deep dive into the design
|
|
24
|
+
and capabilities of llm.rb.
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
## What Makes It Different
|
|
26
27
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
multiple requests. The following example implements a simple REPL loop, and the response
|
|
30
|
-
is streamed to the terminal in real-time as it arrives from the provider. The provider
|
|
31
|
-
happens to be OpenAI in this case but it could be any other provider, and `$stdout`
|
|
32
|
-
could be any object that implements the `#<<` method:
|
|
28
|
+
Most LLM libraries stop at requests and responses. <br>
|
|
29
|
+
llm.rb is built around the state and execution model around them:
|
|
33
30
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
31
|
+
- **Contexts are central** <br>
|
|
32
|
+
They hold history, tools, schema, usage, cost, persistence, and execution state.
|
|
33
|
+
- **Tool execution is explicit** <br>
|
|
34
|
+
Run local, provider-native, and MCP tools sequentially or concurrently with threads, fibers, or async tasks.
|
|
35
|
+
- **One API across providers and capabilities** <br>
|
|
36
|
+
The same model covers chat, files, images, audio, embeddings, vector stores, and more.
|
|
37
|
+
- **Thread-safe where it matters** <br>
|
|
38
|
+
Providers are shareable, while contexts stay isolated and stateful.
|
|
39
|
+
- **Local metadata, fewer extra API calls** <br>
|
|
40
|
+
A built-in registry provides model capabilities, limits, pricing, and cost estimation.
|
|
41
|
+
- **Stdlib-only by default** <br>
|
|
42
|
+
llm.rb runs on the Ruby standard library by default, with providers, optional features, and the model registry loaded only when you use them.
|
|
46
43
|
|
|
47
|
-
|
|
44
|
+
## Architecture & Execution Model
|
|
48
45
|
|
|
49
|
-
|
|
50
|
-
a simple DSL for describing the structure of a response that an LLM emits according
|
|
51
|
-
to a JSON schema. The schema lets a client describe what JSON object an LLM should
|
|
52
|
-
emit, and the LLM will abide by the schema to the best of its ability:
|
|
46
|
+
llm.rb is built in layers, each providing explicit control:
|
|
53
47
|
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
67
|
-
ses = LLM::Session.new(llm, schema: Report)
|
|
68
|
-
res = ses.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
|
|
69
|
-
pp res.content!
|
|
70
|
-
|
|
71
|
-
##
|
|
72
|
-
# {
|
|
73
|
-
# "category" => "Performance Incident",
|
|
74
|
-
# "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
|
|
75
|
-
# "services" => ["Database"],
|
|
76
|
-
# "timestamp" => "2024-06-05T10:42:00Z"
|
|
77
|
-
# }
|
|
48
|
+
```
|
|
49
|
+
┌─────────────────────────────────────────┐
|
|
50
|
+
│ Your Application │
|
|
51
|
+
├─────────────────────────────────────────┤
|
|
52
|
+
│ Contexts & Agents │ ← Stateful workflows
|
|
53
|
+
├─────────────────────────────────────────┤
|
|
54
|
+
│ Tools & Functions │ ← Concurrent execution
|
|
55
|
+
├─────────────────────────────────────────┤
|
|
56
|
+
│ Unified Provider API (OpenAI, etc.) │ ← Provider abstraction
|
|
57
|
+
├─────────────────────────────────────────┤
|
|
58
|
+
│ HTTP, JSON, Thread Safety │ ← Infrastructure
|
|
59
|
+
└─────────────────────────────────────────┘
|
|
78
60
|
```
|
|
79
61
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
62
|
+
### Key Design Decisions
|
|
63
|
+
|
|
64
|
+
- **Thread-safe providers** - `LLM::Provider` instances are safe to share across threads
|
|
65
|
+
- **Thread-local contexts** - `LLM::Context` should generally be kept thread-local
|
|
66
|
+
- **Lazy loading** - Providers, optional features, and the model registry load on demand
|
|
67
|
+
- **JSON adapter system** - Swap JSON libraries (JSON/Oj/Yajl) for performance
|
|
68
|
+
- **Registry system** - Local metadata for model capabilities, limits, and pricing
|
|
69
|
+
- **Provider adaptation** - Normalizes differences between OpenAI, Anthropic, Google, and other providers
|
|
70
|
+
- **Structured tool execution** - Errors are captured and returned as data, not raised unpredictably
|
|
71
|
+
- **Function vs Tool APIs** - Choose between class-based tools and closure-based functions
|
|
72
|
+
|
|
73
|
+
## Capabilities
|
|
74
|
+
|
|
75
|
+
llm.rb provides a complete set of primitives for building LLM-powered systems:
|
|
76
|
+
|
|
77
|
+
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
78
|
+
- **Streaming** — real-time responses across providers
|
|
79
|
+
- **Tool Calling** — define and execute functions with automatic orchestration
|
|
80
|
+
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
81
|
+
- **Agents** — reusable, preconfigured assistants with tool auto-execution
|
|
82
|
+
- **Structured Outputs** — JSON schema-based responses
|
|
83
|
+
- **MCP Support** — integrate external tool servers dynamically
|
|
84
|
+
- **Multimodal Inputs** — text, images, audio, documents, URLs
|
|
85
|
+
- **Audio** — text-to-speech, transcription, translation
|
|
86
|
+
- **Images** — generation and editing
|
|
87
|
+
- **Files API** — upload and reference files in prompts
|
|
88
|
+
- **Embeddings** — vector generation for search and RAG
|
|
89
|
+
- **Vector Stores** — OpenAI-based retrieval workflows
|
|
90
|
+
- **Cost Tracking** — estimate usage without API calls
|
|
91
|
+
- **Observability** — tracing, logging, telemetry
|
|
92
|
+
- **Model Registry** — local metadata for capabilities, limits, pricing
|
|
93
|
+
|
|
94
|
+
## Quick Start
|
|
95
|
+
|
|
96
|
+
#### Concurrent Tools
|
|
97
|
+
|
|
98
|
+
llm.rb provides explicit concurrency control for tool execution. The
|
|
99
|
+
`wait(:thread)` method spawns each pending function in its own thread and waits
|
|
100
|
+
for all to complete. You can also use `:fiber` for cooperative multitasking or
|
|
101
|
+
`:task` for async/await patterns (requires the `async` gem). The context
|
|
102
|
+
automatically collects all results and reports them back to the LLM in a
|
|
103
|
+
single turn, maintaining conversation flow while parallelizing independent
|
|
104
|
+
operations:
|
|
88
105
|
|
|
89
106
|
```ruby
|
|
90
107
|
#!/usr/bin/env ruby
|
|
91
108
|
require "llm"
|
|
92
109
|
|
|
93
|
-
class System < LLM::Tool
|
|
94
|
-
name "system"
|
|
95
|
-
description "Run a shell command"
|
|
96
|
-
param :command, String, "Command to execute", required: true
|
|
97
|
-
|
|
98
|
-
def call(command:)
|
|
99
|
-
{success: system(command)}
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
|
|
103
110
|
llm = LLM.openai(key: ENV["KEY"])
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
111
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: [FetchWeather, FetchNews, FetchStock])
|
|
112
|
+
|
|
113
|
+
# Execute multiple independent tools concurrently
|
|
114
|
+
ctx.talk("Summarize the weather, headlines, and stock price.")
|
|
115
|
+
ctx.talk(ctx.functions.wait(:thread)) while ctx.functions.any?
|
|
107
116
|
```
|
|
108
117
|
|
|
109
|
-
####
|
|
118
|
+
#### MCP
|
|
110
119
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
[LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html),
|
|
116
|
-
an [LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
117
|
-
will automatically call tools when needed:
|
|
120
|
+
llm.rb integrates with the Model Context Protocol (MCP) to dynamically discover
|
|
121
|
+
and use tools from external servers. This example starts a filesystem MCP
|
|
122
|
+
server over stdio and makes its tools available to a context, enabling the LLM
|
|
123
|
+
to interact with the local file system through a standardized interface:
|
|
118
124
|
|
|
119
125
|
```ruby
|
|
120
126
|
#!/usr/bin/env ruby
|
|
121
127
|
require "llm"
|
|
122
128
|
|
|
123
|
-
class SystemAdmin < LLM::Agent
|
|
124
|
-
model "gpt-4.1"
|
|
125
|
-
instructions "You are a Linux system admin"
|
|
126
|
-
tools Shell
|
|
127
|
-
schema Result
|
|
128
|
-
end
|
|
129
|
-
|
|
130
129
|
llm = LLM.openai(key: ENV["KEY"])
|
|
131
|
-
|
|
132
|
-
|
|
130
|
+
mcp = LLM.mcp(stdio: {argv: ["npx", "-y", "@modelcontextprotocol/server-filesystem", Dir.pwd]})
|
|
131
|
+
|
|
132
|
+
begin
|
|
133
|
+
mcp.start
|
|
134
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
135
|
+
ctx.talk("List the directories in this project.")
|
|
136
|
+
ctx.talk(ctx.functions.call) while ctx.functions.any?
|
|
137
|
+
ensure
|
|
138
|
+
mcp.stop
|
|
139
|
+
end
|
|
133
140
|
```
|
|
134
141
|
|
|
135
|
-
####
|
|
142
|
+
#### Streaming Chat
|
|
136
143
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
144
|
+
This example demonstrates llm.rb's streaming support. The `stream: $stdout`
|
|
145
|
+
parameter tells the context to write responses incrementally as they arrive
|
|
146
|
+
from the LLM. The `Context` object manages the conversation history, and
|
|
147
|
+
`talk()` sends your input while automatically appending both your message and
|
|
148
|
+
the LLM's response to the context. Streams accept any object with `#<<`,
|
|
149
|
+
giving you flexibility to pipe output to files, network sockets, or custom
|
|
150
|
+
buffers:
|
|
142
151
|
|
|
143
152
|
```ruby
|
|
144
153
|
#!/usr/bin/env ruby
|
|
145
154
|
require "llm"
|
|
146
155
|
|
|
147
156
|
llm = LLM.openai(key: ENV["KEY"])
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
user "Now double the speed for the same time."
|
|
157
|
+
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
158
|
+
loop do
|
|
159
|
+
print "> "
|
|
160
|
+
ctx.talk(STDIN.gets || break)
|
|
161
|
+
puts
|
|
154
162
|
end
|
|
155
|
-
|
|
156
|
-
ses.talk(prompt)
|
|
157
163
|
```
|
|
158
164
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
165
|
+
#### Tool Calling
|
|
166
|
+
|
|
167
|
+
Tools in llm.rb can be defined as classes inheriting from `LLM::Tool` or as
|
|
168
|
+
closures using `LLM.function`. When the LLM requests a tool call, the context
|
|
169
|
+
stores `Function` objects in `ctx.functions`. The `call()` method executes all
|
|
170
|
+
pending functions and returns their results to the LLM. Tools support
|
|
171
|
+
structured parameters with JSON Schema validation and automatically adapt to
|
|
172
|
+
each provider's API format (OpenAI, Anthropic, Google, etc.):
|
|
163
173
|
|
|
164
174
|
```ruby
|
|
165
175
|
#!/usr/bin/env ruby
|
|
166
176
|
require "llm"
|
|
167
177
|
|
|
168
|
-
|
|
169
|
-
|
|
178
|
+
class System < LLM::Tool
|
|
179
|
+
name "system"
|
|
180
|
+
description "Run a shell command"
|
|
181
|
+
param :command, String, "Command to execute", required: true
|
|
170
182
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
user "Now double the speed for the same time."
|
|
183
|
+
def call(command:)
|
|
184
|
+
{success: system(command)}
|
|
185
|
+
end
|
|
175
186
|
end
|
|
176
187
|
|
|
177
|
-
|
|
188
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
189
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: [System])
|
|
190
|
+
ctx.talk("Run `date`.")
|
|
191
|
+
ctx.talk(ctx.functions.call) while ctx.functions.any?
|
|
178
192
|
```
|
|
179
193
|
|
|
180
|
-
####
|
|
181
|
-
|
|
182
|
-
llm.rb is designed for threaded environments with throughput in mind.
|
|
183
|
-
Locks are used selectively, and localized state is preferred wherever
|
|
184
|
-
possible. Blanket locking across every class could help guarantee
|
|
185
|
-
correctness but it could also add contention, reduce throughput,
|
|
186
|
-
and increase complexity.
|
|
187
|
-
|
|
188
|
-
That's why we decided to optimize for both correctness and throughput
|
|
189
|
-
instead. An important part of that design is guaranteeing that
|
|
190
|
-
[LLM::Provider](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
|
|
191
|
-
is safe to share and use across threads. [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html) and
|
|
192
|
-
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) are
|
|
193
|
-
stateful objects that should be kept local to a single thread.
|
|
194
|
-
|
|
195
|
-
[LLM::Tracer](https://0x1eef.github.io/x/llm.rb/LLM/Tracer.html) and its
|
|
196
|
-
subclasses are also designed to be thread-local, which means that
|
|
197
|
-
`llm.tracer = ...` only impacts the current thread and must be set
|
|
198
|
-
again in each thread where a tracer is desired. This avoids contention
|
|
199
|
-
on tracer state, keeps tracing isolated per thread, and allows different
|
|
200
|
-
tracers to be used in different threads simultaneously.
|
|
201
|
-
|
|
202
|
-
So the recommended pattern is to keep one session, tracer or agent per
|
|
203
|
-
thread, and share a provider across multiple threads:
|
|
194
|
+
#### Structured Outputs
|
|
204
195
|
|
|
196
|
+
The `LLM::Schema` system lets you define JSON schemas that LLMs must follow.
|
|
197
|
+
Schemas can be defined as classes with `property` declarations or built
|
|
198
|
+
programmatically using a fluent interface. When you pass a schema to a context,
|
|
199
|
+
llm.rb automatically configures the provider's JSON mode and validates
|
|
200
|
+
responses against your schema. The `content!` method returns the parsed JSON
|
|
201
|
+
object, while errors are captured as structured data rather than raising
|
|
202
|
+
exceptions:
|
|
205
203
|
|
|
206
204
|
```ruby
|
|
207
205
|
#!/usr/bin/env ruby
|
|
208
206
|
require "llm"
|
|
207
|
+
require "pp"
|
|
209
208
|
|
|
210
|
-
|
|
211
|
-
|
|
209
|
+
class Report < LLM::Schema
|
|
210
|
+
property :category, Enum["performance", "security", "outage"], "Report category", required: true
|
|
211
|
+
property :summary, String, "Short summary", required: true
|
|
212
|
+
property :services, Array[String], "Impacted services", required: true
|
|
213
|
+
property :timestamp, String, "When it happened", optional: true
|
|
214
|
+
end
|
|
212
215
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
res = ses.talk "#{x} + 5 = ?"
|
|
218
|
-
res.content!
|
|
219
|
-
end
|
|
220
|
-
end.map(&:value)
|
|
216
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
217
|
+
ctx = LLM::Context.new(llm, schema: Report)
|
|
218
|
+
res = ctx.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
|
|
219
|
+
pp res.content!
|
|
221
220
|
|
|
222
|
-
|
|
221
|
+
# {
|
|
222
|
+
# "category" => "performance",
|
|
223
|
+
# "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
|
|
224
|
+
# "services" => ["Database"],
|
|
225
|
+
# "timestamp" => "2024-06-05T10:42:00Z"
|
|
226
|
+
# }
|
|
223
227
|
```
|
|
224
228
|
|
|
225
|
-
##
|
|
226
|
-
|
|
227
|
-
#### General
|
|
228
|
-
- ✅ Unified API across providers
|
|
229
|
-
- 📦 Zero runtime deps (stdlib-only)
|
|
230
|
-
- 🧵 Thread-safe providers for multi-threaded workloads
|
|
231
|
-
- 🧩 Pluggable JSON adapters (JSON, Oj, Yajl, etc)
|
|
232
|
-
- 🧱 Builtin tracer API ([LLM::Tracer](https://0x1eef.github.io/x/llm.rb/LLM/Tracer.html))
|
|
233
|
-
|
|
234
|
-
#### Optionals
|
|
235
|
-
|
|
236
|
-
- ♻️ Optional persistent HTTP pool via net-http-persistent ([net-http-persistent](https://github.com/drbrain/net-http-persistent))
|
|
237
|
-
- 📈 Optional telemetry support via OpenTelemetry ([opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby))
|
|
238
|
-
- 🪵 Optional logging support via Ruby's standard library ([ruby/logger](https://github.com/ruby/logger))
|
|
239
|
-
|
|
240
|
-
#### Chat, Agents
|
|
241
|
-
- 🧠 Stateless + stateful chat (completions + responses)
|
|
242
|
-
- 💾 Save and restore sessions across processes
|
|
243
|
-
- 🤖 Tool calling / function execution
|
|
244
|
-
- 🔁 Agent tool-call auto-execution (bounded)
|
|
245
|
-
- 🗂️ JSON Schema structured output
|
|
246
|
-
- 📡 Streaming responses
|
|
247
|
-
|
|
248
|
-
#### Media
|
|
249
|
-
- 🗣️ TTS, transcription, translation
|
|
250
|
-
- 🖼️ Image generation + editing
|
|
251
|
-
- 📎 Files API + prompt-aware file inputs
|
|
252
|
-
- 📦 Streaming multipart uploads (no full buffering)
|
|
253
|
-
- 💡 Multimodal prompts (text, documents, audio, images, video, URLs)
|
|
254
|
-
|
|
255
|
-
#### Embeddings
|
|
256
|
-
- 🧮 Embeddings
|
|
257
|
-
- 🧱 OpenAI vector stores (RAG)
|
|
229
|
+
## Providers
|
|
258
230
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
231
|
+
llm.rb supports multiple LLM providers with a unified API.
|
|
232
|
+
All providers share the same context, tool, and concurrency interfaces, making
|
|
233
|
+
it easy to switch between cloud and local models:
|
|
262
234
|
|
|
263
|
-
|
|
235
|
+
- **OpenAI** (`LLM.openai`)
|
|
236
|
+
- **Anthropic** (`LLM.anthropic`)
|
|
237
|
+
- **Google** (`LLM.google`)
|
|
238
|
+
- **DeepSeek** (`LLM.deepseek`)
|
|
239
|
+
- **xAI** (`LLM.xai`)
|
|
240
|
+
- **zAI** (`LLM.zai`)
|
|
241
|
+
- **Ollama** (`LLM.ollama`)
|
|
242
|
+
- **Llama.cpp** (`LLM.llamacpp`)
|
|
264
243
|
|
|
265
|
-
|
|
266
|
-
|--------------------------------------|:------:|:---------:|:------:|:--------:|:----------:|:------:|:------:|:--------:|
|
|
267
|
-
| **Chat Completions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
268
|
-
| **Streaming** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
269
|
-
| **Tool Calling** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
270
|
-
| **JSON Schema / Structured Output** | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ✅* | ✅* |
|
|
271
|
-
| **Embeddings** | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
|
272
|
-
| **Multimodal Prompts** *(text, documents, audio, images, videos, URLs, etc)* | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
|
273
|
-
| **Files API** | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
274
|
-
| **Models API** | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
|
275
|
-
| **Audio (TTS / Transcribe / Translate)** | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
276
|
-
| **Image Generation & Editing** | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
|
277
|
-
| **Local Model Support** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ |
|
|
278
|
-
| **Vector Stores (RAG)** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
279
|
-
| **Responses** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
280
|
-
| **Moderations** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
244
|
+
## Production
|
|
281
245
|
|
|
282
|
-
|
|
246
|
+
#### Ready for production
|
|
283
247
|
|
|
248
|
+
llm.rb is designed for production use from the ground up:
|
|
284
249
|
|
|
285
|
-
|
|
250
|
+
- **Thread-safe providers** - Share `LLM::Provider` instances across your application
|
|
251
|
+
- **Thread-local contexts** - Keep `LLM::Context` instances thread-local for state isolation
|
|
252
|
+
- **Cost tracking** - Know your spend before the bill arrives
|
|
253
|
+
- **Observability** - Built-in tracing with OpenTelemetry support
|
|
254
|
+
- **Persistence** - Save and restore contexts across processes
|
|
255
|
+
- **Performance** - Swap JSON adapters and enable HTTP connection pooling
|
|
256
|
+
- **Error handling** - Structured errors, not unpredictable exceptions
|
|
286
257
|
|
|
287
|
-
|
|
258
|
+
#### Tracing
|
|
288
259
|
|
|
289
|
-
|
|
260
|
+
llm.rb includes built-in tracers for local logging, OpenTelemetry, and
|
|
261
|
+
LangSmith. Assign a tracer to a provider and all context requests and tool
|
|
262
|
+
calls made through that provider will be instrumented. Tracers are local to
|
|
263
|
+
the current fiber, so the same provider can use different tracers in different
|
|
264
|
+
concurrent tasks without interfering with each other.
|
|
290
265
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
using an API key (if required) and an optional set of configuration options via
|
|
294
|
-
[the singleton methods of LLM](https://0x1eef.github.io/x/llm.rb/LLM.html). For example:
|
|
295
|
-
|
|
296
|
-
```ruby
|
|
297
|
-
#!/usr/bin/env ruby
|
|
298
|
-
require "llm"
|
|
299
|
-
|
|
300
|
-
##
|
|
301
|
-
# remote providers
|
|
302
|
-
llm = LLM.openai(key: "yourapikey")
|
|
303
|
-
llm = LLM.gemini(key: "yourapikey")
|
|
304
|
-
llm = LLM.anthropic(key: "yourapikey")
|
|
305
|
-
llm = LLM.xai(key: "yourapikey")
|
|
306
|
-
llm = LLM.zai(key: "yourapikey")
|
|
307
|
-
llm = LLM.deepseek(key: "yourapikey")
|
|
308
|
-
|
|
309
|
-
##
|
|
310
|
-
# local providers
|
|
311
|
-
llm = LLM.ollama(key: nil)
|
|
312
|
-
llm = LLM.llamacpp(key: nil)
|
|
313
|
-
```
|
|
314
|
-
|
|
315
|
-
#### LLM::Response
|
|
316
|
-
|
|
317
|
-
All provider methods that perform requests return an
|
|
318
|
-
[LLM::Response](https://0x1eef.github.io/x/llm.rb/LLM/Response.html).
|
|
319
|
-
If the HTTP response is JSON (`content-type: application/json`),
|
|
320
|
-
`response.body` is parsed into an
|
|
321
|
-
[LLM::Object](https://0x1eef.github.io/x/llm.rb/LLM/Object.html) for
|
|
322
|
-
dot-access. For non-JSON responses, `response.body` is a raw string.
|
|
323
|
-
It is also possible to access top-level keys directly on the response
|
|
324
|
-
(eg: `res.object` instead of `res.body.object`):
|
|
266
|
+
Use the logger tracer when you want structured logs through Ruby's standard
|
|
267
|
+
library:
|
|
325
268
|
|
|
326
269
|
```ruby
|
|
327
270
|
#!/usr/bin/env ruby
|
|
328
271
|
require "llm"
|
|
329
272
|
|
|
330
273
|
llm = LLM.openai(key: ENV["KEY"])
|
|
331
|
-
|
|
332
|
-
puts res.object
|
|
333
|
-
puts res.data.first.id
|
|
334
|
-
```
|
|
335
|
-
|
|
336
|
-
#### Persistence
|
|
337
|
-
|
|
338
|
-
The llm.rb library can maintain a process-wide connection pool
|
|
339
|
-
for each provider that is instantiated. This feature can improve
|
|
340
|
-
performance but it is optional, the implementation depends on
|
|
341
|
-
[net-http-persistent](https://github.com/drbrain/net-http-persistent),
|
|
342
|
-
and the gem should be installed separately:
|
|
343
|
-
|
|
344
|
-
```ruby
|
|
345
|
-
#!/usr/bin/env ruby
|
|
346
|
-
require "llm"
|
|
274
|
+
llm.tracer = LLM::Tracer::Logger.new(llm, io: $stdout)
|
|
347
275
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
res2 = llm.responses.create "message 2", previous_response_id: res1.response_id
|
|
351
|
-
res3 = llm.responses.create "message 3", previous_response_id: res2.response_id
|
|
352
|
-
puts res3.output_text
|
|
276
|
+
ctx = LLM::Context.new(llm)
|
|
277
|
+
ctx.talk("Hello")
|
|
353
278
|
```
|
|
354
279
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
The llm.rb library includes telemetry support through its tracer API, and it
|
|
358
|
-
can be used to trace LLM requests. It can be useful for debugging, monitoring,
|
|
359
|
-
and observability. The primary use case in mind is integration with tools like
|
|
360
|
-
[LangSmith](https://www.langsmith.com/).
|
|
361
|
-
|
|
362
|
-
It is worth mentioning that tracers are local to a thread, and they
|
|
363
|
-
should be configured per thread. That means that `llm.tracer = LLM::Tracer::Telemetry.new(llm)`
|
|
364
|
-
only impacts the current thread, and it should be repeated in each thread where
|
|
365
|
-
tracing is required.
|
|
366
|
-
|
|
367
|
-
The telemetry implementation uses the [opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby)
|
|
368
|
-
and is based on the [gen-ai telemetry spec(s)](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/).
|
|
369
|
-
This feature is optional, disabled by default, and the [opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby)
|
|
370
|
-
gem should be installed separately. Please also note that llm.rb will take care of
|
|
371
|
-
loading and configuring the [opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby)
|
|
372
|
-
library for you, and llm.rb configures an in-memory exporter that doesn't have
|
|
373
|
-
external dependencies by default:
|
|
280
|
+
Use the telemetry tracer when you want OpenTelemetry spans. This requires the
|
|
281
|
+
`opentelemetry-sdk` gem, and exporters such as OTLP can be added separately:
|
|
374
282
|
|
|
375
283
|
```ruby
|
|
376
284
|
#!/usr/bin/env ruby
|
|
377
285
|
require "llm"
|
|
378
|
-
require "pp"
|
|
379
286
|
|
|
380
287
|
llm = LLM.openai(key: ENV["KEY"])
|
|
381
288
|
llm.tracer = LLM::Tracer::Telemetry.new(llm)
|
|
382
289
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
ses.tracer.spans.each { |span| pp span }
|
|
290
|
+
ctx = LLM::Context.new(llm)
|
|
291
|
+
ctx.talk("Hello")
|
|
292
|
+
pp llm.tracer.spans
|
|
387
293
|
```
|
|
388
294
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
multiple observability tools. By default the export is batched in the background,
|
|
392
|
-
and happens automatically but short lived scripts might need to
|
|
393
|
-
[explicitly flush](https://0x1eef.github.io/x/llm.rb/LLM/Tracer/Telemetry#flush!-instance_method)
|
|
394
|
-
the exporter before they exit – otherwise some telemetry data could be lost:
|
|
395
|
-
|
|
396
|
-
```ruby
|
|
397
|
-
#!/usr/bin/env ruby
|
|
398
|
-
require "llm"
|
|
399
|
-
require "opentelemetry-exporter-otlp"
|
|
400
|
-
|
|
401
|
-
endpoint = "https://api.smith.langchain.com/otel/v1/traces"
|
|
402
|
-
exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(endpoint:)
|
|
403
|
-
|
|
404
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
405
|
-
llm.tracer = LLM::Tracer::Telemetry.new(llm, exporter:)
|
|
406
|
-
|
|
407
|
-
ses = LLM::Session.new(llm)
|
|
408
|
-
ses.talk "hello"
|
|
409
|
-
ses.talk "how are you?"
|
|
410
|
-
|
|
411
|
-
at_exit do
|
|
412
|
-
# Helpful for short-lived scripts, otherwise the exporter
|
|
413
|
-
# might not have time to flush pending telemetry data
|
|
414
|
-
ses.tracer.flush!
|
|
415
|
-
end
|
|
416
|
-
```
|
|
417
|
-
|
|
418
|
-
#### Logger
|
|
419
|
-
|
|
420
|
-
The llm.rb library includes simple logging support through its
|
|
421
|
-
tracer API, and Ruby's standard library ([ruby/logger](https://github.com/ruby/logger)).
|
|
422
|
-
This feature is optional, disabled by default, and it can be useful for debugging and/or
|
|
423
|
-
monitoring requests to LLM providers. The `path` or `io` options can be used to choose
|
|
424
|
-
where logs are written, and by default it is set to `$stdout`. Like other tracers,
|
|
425
|
-
the logger tracer is local to a thread:
|
|
295
|
+
Use the LangSmith tracer when you want LangSmith-compatible metadata and trace
|
|
296
|
+
grouping on top of the telemetry tracer:
|
|
426
297
|
|
|
427
298
|
```ruby
|
|
428
299
|
#!/usr/bin/env ruby
|
|
429
300
|
require "llm"
|
|
430
301
|
|
|
431
302
|
llm = LLM.openai(key: ENV["KEY"])
|
|
432
|
-
llm.tracer = LLM::Tracer::
|
|
303
|
+
llm.tracer = LLM::Tracer::Langsmith.new(
|
|
304
|
+
llm,
|
|
305
|
+
metadata: {env: "dev"},
|
|
306
|
+
tags: ["chatbot"]
|
|
307
|
+
)
|
|
433
308
|
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
ses.talk "Adios."
|
|
309
|
+
ctx = LLM::Context.new(llm)
|
|
310
|
+
ctx.talk("Hello")
|
|
437
311
|
```
|
|
438
312
|
|
|
439
|
-
####
|
|
313
|
+
#### Thread Safety
|
|
440
314
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
– inclusive of tool metadata as well:
|
|
315
|
+
llm.rb uses Ruby's `Monitor` class to ensure thread safety at the provider
|
|
316
|
+
level, allowing you to share a single provider instance across multiple threads
|
|
317
|
+
while maintaining state isolation through thread-local contexts. This design
|
|
318
|
+
enables efficient resource sharing while preventing race conditions in
|
|
319
|
+
concurrent applications:
|
|
447
320
|
|
|
448
|
-
* Process 1
|
|
449
321
|
```ruby
|
|
450
322
|
#!/usr/bin/env ruby
|
|
451
323
|
require "llm"
|
|
452
324
|
|
|
325
|
+
# Thread-safe providers - create once, use everywhere
|
|
453
326
|
llm = LLM.openai(key: ENV["KEY"])
|
|
454
|
-
ses = LLM::Session.new(llm)
|
|
455
|
-
ses.talk "Howdy partner"
|
|
456
|
-
ses.talk "I'll see you later"
|
|
457
|
-
ses.save(path: "session.json")
|
|
458
|
-
```
|
|
459
|
-
* Process 2
|
|
460
|
-
```ruby
|
|
461
|
-
#!/usr/bin/env ruby
|
|
462
|
-
require "llm"
|
|
463
|
-
require "pp"
|
|
464
|
-
|
|
465
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
466
|
-
ses = LLM::Session.new(llm)
|
|
467
|
-
ses.restore(path: "session.json")
|
|
468
|
-
ses.talk "Howdy partner. I'm back"
|
|
469
|
-
pp ses.messages
|
|
470
|
-
```
|
|
471
|
-
|
|
472
|
-
But how does it work without a file ? The [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html)
|
|
473
|
-
class implements `#to_json` and it can be used to obtain a JSON representation
|
|
474
|
-
of a session that can be stored in a `jsonb` column in PostgreSQL, or any
|
|
475
|
-
other storage backend. The session can then be restored from the JSON
|
|
476
|
-
representation via the restore method and its `string` argument:
|
|
477
327
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
328
|
+
# Each thread should have its own context for state isolation
|
|
329
|
+
Thread.new do
|
|
330
|
+
ctx = LLM::Context.new(llm) # Thread-local context
|
|
331
|
+
ctx.talk("Hello from thread 1")
|
|
332
|
+
end
|
|
481
333
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
json = ses1.to_json
|
|
488
|
-
ses2 = LLM::Session.new(llm)
|
|
489
|
-
ses2.restore(string: json)
|
|
490
|
-
ses2.talk "Howdy partner. I'm back"
|
|
334
|
+
Thread.new do
|
|
335
|
+
ctx = LLM::Context.new(llm) # Thread-local context
|
|
336
|
+
ctx.talk("Hello from thread 2")
|
|
337
|
+
end
|
|
491
338
|
```
|
|
492
339
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
#### LLM::Function
|
|
340
|
+
#### Performance Tuning
|
|
496
341
|
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
342
|
+
llm.rb's JSON adapter system lets you swap JSON libraries for better
|
|
343
|
+
performance in high-throughput applications. The library supports stdlib JSON,
|
|
344
|
+
Oj, and Yajl, with Oj typically offering the best performance. Additionally,
|
|
345
|
+
you can enable HTTP connection pooling using the optional `net-http-persistent`
|
|
346
|
+
gem to reduce connection overhead in production environments:
|
|
502
347
|
|
|
503
348
|
```ruby
|
|
504
349
|
#!/usr/bin/env ruby
|
|
505
350
|
require "llm"
|
|
506
351
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
fn.description "Run a shell command"
|
|
510
|
-
fn.params do |schema|
|
|
511
|
-
schema.object(command: schema.string.required)
|
|
512
|
-
end
|
|
513
|
-
fn.define do |command:|
|
|
514
|
-
ro, wo = IO.pipe
|
|
515
|
-
re, we = IO.pipe
|
|
516
|
-
Process.wait Process.spawn(command, out: wo, err: we)
|
|
517
|
-
[wo,we].each(&:close)
|
|
518
|
-
{stderr: re.read, stdout: ro.read}
|
|
519
|
-
end
|
|
520
|
-
end
|
|
521
|
-
|
|
522
|
-
ses = LLM::Session.new(llm, tools: [tool])
|
|
523
|
-
ses.talk "Your task is to run shell commands via a tool.", role: :user
|
|
524
|
-
|
|
525
|
-
ses.talk "What is the current date?", role: :user
|
|
526
|
-
ses.talk ses.functions.map(&:call) # report return value to the LLM
|
|
527
|
-
|
|
528
|
-
ses.talk "What operating system am I running?", role: :user
|
|
529
|
-
ses.talk ses.functions.map(&:call) # report return value to the LLM
|
|
352
|
+
# Swap JSON libraries for better performance
|
|
353
|
+
LLM.json = :oj # Use Oj for faster JSON parsing
|
|
530
354
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
# {stderr: "", stdout: "FreeBSD"}
|
|
355
|
+
# Enable HTTP connection pooling for high-throughput applications
|
|
356
|
+
llm = LLM.openai(key: ENV["KEY"]).persist! # Uses net-http-persistent when available
|
|
534
357
|
```
|
|
535
358
|
|
|
536
|
-
####
|
|
359
|
+
#### Model Registry
|
|
537
360
|
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
The choice between [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
|
|
545
|
-
and [LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) is often a matter of
|
|
546
|
-
preference but each carry their own benefits. For example, [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
|
|
547
|
-
has the benefit of being a closure that has access to its surrounding context and
|
|
548
|
-
sometimes that is useful:
|
|
361
|
+
llm.rb includes a local model registry that provides metadata about model
|
|
362
|
+
capabilities, pricing, and limits without requiring API calls. The registry is
|
|
363
|
+
shipped with the gem and sourced from https://models.dev, giving you access to
|
|
364
|
+
up-to-date information about context windows, token costs, and supported
|
|
365
|
+
modalities for each provider:
|
|
549
366
|
|
|
550
367
|
```ruby
|
|
551
368
|
#!/usr/bin/env ruby
|
|
552
369
|
require "llm"
|
|
553
370
|
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
def call(command:)
|
|
560
|
-
ro, wo = IO.pipe
|
|
561
|
-
re, we = IO.pipe
|
|
562
|
-
Process.wait Process.spawn(command, out: wo, err: we)
|
|
563
|
-
[wo,we].each(&:close)
|
|
564
|
-
{stderr: re.read, stdout: ro.read}
|
|
565
|
-
end
|
|
566
|
-
end
|
|
567
|
-
|
|
568
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
569
|
-
ses = LLM::Session.new(llm, tools: [System])
|
|
570
|
-
ses.talk "Your task is to run shell commands via a tool.", role: :user
|
|
571
|
-
|
|
572
|
-
ses.talk "What is the current date?", role: :user
|
|
573
|
-
ses.talk ses.functions.map(&:call) # report return value to the LLM
|
|
574
|
-
|
|
575
|
-
ses.talk "What operating system am I running?", role: :user
|
|
576
|
-
ses.talk ses.functions.map(&:call) # report return value to the LLM
|
|
577
|
-
|
|
578
|
-
##
|
|
579
|
-
# {stderr: "", stdout: "Thu May 1 10:01:02 UTC 2025"}
|
|
580
|
-
# {stderr: "", stdout: "FreeBSD"}
|
|
371
|
+
# Access model metadata, capabilities, and pricing
|
|
372
|
+
registry = LLM.registry_for(:openai)
|
|
373
|
+
model_info = registry.limit(model: "gpt-4.1")
|
|
374
|
+
puts "Context window: #{model_info.context} tokens"
|
|
375
|
+
puts "Cost: $#{model_info.cost.input}/1M input tokens"
|
|
581
376
|
```
|
|
582
377
|
|
|
583
|
-
|
|
378
|
+
## More Examples
|
|
584
379
|
|
|
585
|
-
####
|
|
380
|
+
#### Responses API
|
|
586
381
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
it has been uploaded. The file (a specialized instance of
|
|
591
|
-
[LLM::Response](https://0x1eef.github.io/x/llm.rb/LLM/Response.html)
|
|
592
|
-
) is given as part of a prompt that is understood by llm.rb:
|
|
382
|
+
llm.rb also supports OpenAI's Responses API through `llm.responses` and
|
|
383
|
+
`ctx.respond`. This API can maintain response state server-side and can reduce
|
|
384
|
+
how much conversation state needs to be sent on each turn:
|
|
593
385
|
|
|
594
386
|
```ruby
|
|
595
387
|
#!/usr/bin/env ruby
|
|
596
388
|
require "llm"
|
|
597
|
-
require "pp"
|
|
598
389
|
|
|
599
390
|
llm = LLM.openai(key: ENV["KEY"])
|
|
600
|
-
|
|
601
|
-
file = llm.files.create(file: "/tmp/llm-book.pdf")
|
|
602
|
-
res = ses.talk ["Tell me about this file", file]
|
|
603
|
-
pp res.content
|
|
604
|
-
```
|
|
391
|
+
ctx = LLM::Context.new(llm)
|
|
605
392
|
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
393
|
+
ctx.respond("Your task is to answer the user's questions", role: :developer)
|
|
394
|
+
res = ctx.respond("What is the capital of France?")
|
|
395
|
+
puts res.output_text
|
|
396
|
+
```
|
|
609
397
|
|
|
610
|
-
|
|
611
|
-
and URLs. With llm.rb you pass those inputs by tagging them with one of
|
|
612
|
-
the following methods. And for multipart prompts, we can pass an array
|
|
613
|
-
where each element is a part of the input. See the example below for
|
|
614
|
-
details, in the meantime here are the methods to know for multimodal
|
|
615
|
-
inputs:
|
|
398
|
+
#### Context Persistence
|
|
616
399
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
400
|
+
Contexts can be serialized and restored across process boundaries. This makes
|
|
401
|
+
it possible to persist conversation state in a file, database, or queue and
|
|
402
|
+
resume work later:
|
|
620
403
|
|
|
621
404
|
```ruby
|
|
622
405
|
#!/usr/bin/env ruby
|
|
623
406
|
require "llm"
|
|
624
407
|
|
|
625
408
|
llm = LLM.openai(key: ENV["KEY"])
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
409
|
+
ctx = LLM::Context.new(llm)
|
|
410
|
+
ctx.talk("Hello")
|
|
411
|
+
ctx.talk("Remember that my favorite language is Ruby")
|
|
412
|
+
ctx.save(path: "context.json")
|
|
413
|
+
|
|
414
|
+
restored = LLM::Context.new(llm)
|
|
415
|
+
restored.restore(path: "context.json")
|
|
416
|
+
res = restored.talk("What is my favorite language?")
|
|
417
|
+
puts res.content
|
|
630
418
|
```
|
|
631
419
|
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
#### Speech
|
|
420
|
+
#### Agents
|
|
635
421
|
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
422
|
+
Agents in llm.rb are reusable, preconfigured assistants that automatically
|
|
423
|
+
execute tool calls and maintain conversation state. Unlike contexts which
|
|
424
|
+
require manual tool execution, agents automatically handle the tool call loop,
|
|
425
|
+
making them ideal for autonomous workflows where you want the LLM to
|
|
426
|
+
independently use available tools to accomplish tasks:
|
|
641
427
|
|
|
642
428
|
```ruby
|
|
643
429
|
#!/usr/bin/env ruby
|
|
644
430
|
require "llm"
|
|
645
431
|
|
|
432
|
+
class SystemAdmin < LLM::Agent
|
|
433
|
+
model "gpt-4.1"
|
|
434
|
+
instructions "You are a Linux system admin"
|
|
435
|
+
tools Shell
|
|
436
|
+
schema Result
|
|
437
|
+
end
|
|
438
|
+
|
|
646
439
|
llm = LLM.openai(key: ENV["KEY"])
|
|
647
|
-
|
|
648
|
-
|
|
440
|
+
agent = SystemAdmin.new(llm)
|
|
441
|
+
res = agent.talk("Run 'date'")
|
|
649
442
|
```
|
|
650
443
|
|
|
651
|
-
####
|
|
444
|
+
#### Cost Tracking
|
|
652
445
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
446
|
+
llm.rb provides built-in cost estimation that works without making additional
|
|
447
|
+
API calls. The cost tracking system uses the local model registry to calculate
|
|
448
|
+
estimated costs based on token usage, giving you visibility into spending
|
|
449
|
+
before bills arrive. This is particularly useful for monitoring usage in
|
|
450
|
+
production applications and setting budget alerts:
|
|
657
451
|
|
|
658
452
|
```ruby
|
|
659
453
|
#!/usr/bin/env ruby
|
|
660
454
|
require "llm"
|
|
661
455
|
|
|
662
456
|
llm = LLM.openai(key: ENV["KEY"])
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
457
|
+
ctx = LLM::Context.new(llm)
|
|
458
|
+
ctx.talk "Hello"
|
|
459
|
+
puts "Estimated cost so far: $#{ctx.cost}"
|
|
460
|
+
ctx.talk "Tell me a joke"
|
|
461
|
+
puts "Estimated cost so far: $#{ctx.cost}"
|
|
667
462
|
```
|
|
668
463
|
|
|
669
|
-
####
|
|
464
|
+
#### Multimodal Prompts
|
|
670
465
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
and at the time of writing, it can only translate to English:
|
|
466
|
+
Contexts provide helpers for composing multimodal prompts from URLs, local
|
|
467
|
+
files, and provider-managed remote files. These tagged objects let providers
|
|
468
|
+
adapt the input into the format they expect:
|
|
675
469
|
|
|
676
470
|
```ruby
|
|
677
471
|
#!/usr/bin/env ruby
|
|
678
472
|
require "llm"
|
|
679
473
|
|
|
680
474
|
llm = LLM.openai(key: ENV["KEY"])
|
|
681
|
-
|
|
682
|
-
file: File.join(Dir.home, "bomdia.mp3")
|
|
683
|
-
)
|
|
684
|
-
puts res.text # => "Good morning."
|
|
685
|
-
```
|
|
475
|
+
ctx = LLM::Context.new(llm)
|
|
686
476
|
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
#### Create
|
|
690
|
-
|
|
691
|
-
Some but not all LLM providers implement image generation capabilities that
|
|
692
|
-
can create new images from a prompt, or edit an existing image with a
|
|
693
|
-
prompt. The following example uses the OpenAI provider to create an
|
|
694
|
-
image of a dog on a rocket to the moon. The image is then written to
|
|
695
|
-
`${HOME}/dogonrocket.png` as the final step:
|
|
696
|
-
|
|
697
|
-
```ruby
|
|
698
|
-
#!/usr/bin/env ruby
|
|
699
|
-
require "llm"
|
|
700
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
701
|
-
res = llm.images.create(prompt: "a dog on a rocket to the moon")
|
|
702
|
-
IO.copy_stream res.images[0], File.join(Dir.home, "dogonrocket.png")
|
|
477
|
+
res = ctx.talk ["Describe this image", ctx.image_url("https://example.com/cat.jpg")]
|
|
478
|
+
puts res.content
|
|
703
479
|
```
|
|
704
480
|
|
|
705
|
-
####
|
|
481
|
+
#### Audio Generation
|
|
706
482
|
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
483
|
+
llm.rb supports OpenAI's audio API for text-to-speech generation, allowing you
|
|
484
|
+
to create speech from text with configurable voices and output formats. The
|
|
485
|
+
audio API returns binary audio data that can be streamed directly to files or
|
|
486
|
+
other IO objects, enabling integration with multimedia applications:
|
|
711
487
|
|
|
712
488
|
```ruby
|
|
713
489
|
#!/usr/bin/env ruby
|
|
714
490
|
require "llm"
|
|
491
|
+
|
|
715
492
|
llm = LLM.openai(key: ENV["KEY"])
|
|
716
|
-
res = llm.
|
|
717
|
-
|
|
718
|
-
prompt: "add a hat to the logo",
|
|
719
|
-
)
|
|
720
|
-
IO.copy_stream res.images[0], File.join(Dir.home, "logo-with-hat.png")
|
|
493
|
+
res = llm.audio.create_speech(input: "Hello world")
|
|
494
|
+
IO.copy_stream res.audio, File.join(Dir.home, "hello.mp3")
|
|
721
495
|
```
|
|
722
496
|
|
|
723
|
-
####
|
|
497
|
+
#### Image Generation
|
|
724
498
|
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
499
|
+
llm.rb provides access to OpenAI's DALL-E image generation API through a
|
|
500
|
+
unified interface. The API supports multiple response formats including
|
|
501
|
+
base64-encoded images and temporary URLs, with automatic handling of binary
|
|
502
|
+
data streaming for efficient file operations:
|
|
729
503
|
|
|
730
504
|
```ruby
|
|
731
505
|
#!/usr/bin/env ruby
|
|
732
506
|
require "llm"
|
|
507
|
+
|
|
733
508
|
llm = LLM.openai(key: ENV["KEY"])
|
|
734
|
-
res = llm.images.
|
|
735
|
-
|
|
736
|
-
n: 5
|
|
737
|
-
)
|
|
738
|
-
res.images.each.with_index do |image, index|
|
|
739
|
-
IO.copy_stream image,
|
|
740
|
-
File.join(Dir.home, "logo-variation#{index}.png")
|
|
741
|
-
end
|
|
509
|
+
res = llm.images.create(prompt: "a dog on a rocket to the moon")
|
|
510
|
+
IO.copy_stream res.images[0], File.join(Dir.home, "dogonrocket.png")
|
|
742
511
|
```
|
|
743
512
|
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
#### Text
|
|
513
|
+
#### Embeddings
|
|
747
514
|
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
515
|
+
llm.rb's embedding API generates vector representations of text for semantic
|
|
516
|
+
search and retrieval-augmented generation (RAG) workflows. The API supports
|
|
517
|
+
batch processing of multiple inputs and returns normalized vectors suitable for
|
|
518
|
+
vector similarity operations, with consistent dimensionality across providers:
|
|
752
519
|
|
|
753
520
|
```ruby
|
|
754
521
|
#!/usr/bin/env ruby
|
|
@@ -760,52 +527,32 @@ puts res.class
|
|
|
760
527
|
puts res.embeddings.size
|
|
761
528
|
puts res.embeddings[0].size
|
|
762
529
|
|
|
763
|
-
##
|
|
764
530
|
# LLM::Response
|
|
765
531
|
# 3
|
|
766
532
|
# 1536
|
|
767
533
|
```
|
|
768
534
|
|
|
769
|
-
|
|
535
|
+
## Real-World Example: Relay
|
|
770
536
|
|
|
771
|
-
|
|
537
|
+
See how these pieces come together in a complete application architecture with
|
|
538
|
+
[Relay](https://github.com/llmrb/relay), a production-ready LLM application
|
|
539
|
+
built on llm.rb that demonstrates:
|
|
772
540
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
541
|
+
- Context management across requests
|
|
542
|
+
- Tool composition and execution
|
|
543
|
+
- Concurrent workflows
|
|
544
|
+
- Cost tracking and observability
|
|
545
|
+
- Production deployment patterns
|
|
777
546
|
|
|
778
|
-
|
|
779
|
-
#!/usr/bin/env ruby
|
|
780
|
-
require "llm"
|
|
781
|
-
require "pp"
|
|
547
|
+
Watch the screencast:
|
|
782
548
|
|
|
783
|
-
|
|
784
|
-
# List all models
|
|
785
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
786
|
-
llm.models.all.each do |model|
|
|
787
|
-
puts "model: #{model.id}"
|
|
788
|
-
end
|
|
549
|
+
[](https://www.youtube.com/watch?v=x1K4wMeO_QA)
|
|
789
550
|
|
|
790
|
-
##
|
|
791
|
-
# Select a model
|
|
792
|
-
model = llm.models.all.find { |m| m.id == "gpt-3.5-turbo" }
|
|
793
|
-
ses = LLM::Session.new(llm, model: model.id)
|
|
794
|
-
res = ses.talk "Hello #{model.id} :)"
|
|
795
|
-
pp res.content
|
|
796
|
-
```
|
|
797
|
-
|
|
798
|
-
## Install
|
|
799
|
-
|
|
800
|
-
llm.rb can be installed via rubygems.org:
|
|
551
|
+
## Installation
|
|
801
552
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
* [GitHub.com](https://github.com/llmrb/llm.rb)
|
|
807
|
-
* [GitLab.com](https://gitlab.com/llmrb/llm.rb)
|
|
808
|
-
* [Codeberg.org](https://codeberg.org/llmrb/llm.rb)
|
|
553
|
+
```bash
|
|
554
|
+
gem install llm.rb
|
|
555
|
+
```
|
|
809
556
|
|
|
810
557
|
## License
|
|
811
558
|
|