llm.rb 4.7.0 → 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +335 -587
- data/data/anthropic.json +770 -0
- data/data/deepseek.json +75 -0
- data/data/google.json +1050 -0
- data/data/openai.json +1421 -0
- data/data/xai.json +792 -0
- data/data/zai.json +330 -0
- data/lib/llm/agent.rb +42 -41
- data/lib/llm/bot.rb +1 -263
- data/lib/llm/buffer.rb +7 -0
- data/lib/llm/{session → context}/deserializer.rb +4 -3
- data/lib/llm/context.rb +292 -0
- data/lib/llm/cost.rb +26 -0
- data/lib/llm/error.rb +8 -0
- data/lib/llm/eventstream/parser.rb +0 -5
- data/lib/llm/function/array.rb +61 -0
- data/lib/llm/function/fiber_group.rb +91 -0
- data/lib/llm/function/task_group.rb +89 -0
- data/lib/llm/function/thread_group.rb +94 -0
- data/lib/llm/function.rb +75 -10
- data/lib/llm/mcp/command.rb +108 -0
- data/lib/llm/mcp/error.rb +31 -0
- data/lib/llm/mcp/pipe.rb +82 -0
- data/lib/llm/mcp/rpc.rb +118 -0
- data/lib/llm/mcp/transport/stdio.rb +85 -0
- data/lib/llm/mcp.rb +102 -0
- data/lib/llm/message.rb +13 -11
- data/lib/llm/model.rb +115 -0
- data/lib/llm/prompt.rb +17 -7
- data/lib/llm/provider.rb +60 -32
- data/lib/llm/providers/anthropic/error_handler.rb +1 -1
- data/lib/llm/providers/anthropic/files.rb +3 -3
- data/lib/llm/providers/anthropic/models.rb +1 -1
- data/lib/llm/providers/anthropic/request_adapter.rb +20 -3
- data/lib/llm/providers/anthropic/response_adapter/models.rb +13 -0
- data/lib/llm/providers/anthropic/response_adapter.rb +2 -0
- data/lib/llm/providers/anthropic.rb +21 -5
- data/lib/llm/providers/deepseek.rb +10 -3
- data/lib/llm/providers/{gemini → google}/audio.rb +6 -6
- data/lib/llm/providers/{gemini → google}/error_handler.rb +20 -5
- data/lib/llm/providers/{gemini → google}/files.rb +11 -11
- data/lib/llm/providers/{gemini → google}/images.rb +7 -7
- data/lib/llm/providers/{gemini → google}/models.rb +5 -5
- data/lib/llm/providers/{gemini → google}/request_adapter/completion.rb +7 -3
- data/lib/llm/providers/{gemini → google}/request_adapter.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/completion.rb +7 -7
- data/lib/llm/providers/{gemini → google}/response_adapter/embedding.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/file.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/files.rb +1 -1
- data/lib/llm/providers/{gemini → google}/response_adapter/image.rb +1 -1
- data/lib/llm/providers/google/response_adapter/models.rb +13 -0
- data/lib/llm/providers/{gemini → google}/response_adapter/web_search.rb +2 -2
- data/lib/llm/providers/{gemini → google}/response_adapter.rb +8 -8
- data/lib/llm/providers/{gemini → google}/stream_parser.rb +3 -3
- data/lib/llm/providers/{gemini.rb → google.rb} +41 -26
- data/lib/llm/providers/llamacpp.rb +10 -3
- data/lib/llm/providers/ollama/error_handler.rb +1 -1
- data/lib/llm/providers/ollama/models.rb +1 -1
- data/lib/llm/providers/ollama/response_adapter/models.rb +13 -0
- data/lib/llm/providers/ollama/response_adapter.rb +2 -0
- data/lib/llm/providers/ollama.rb +19 -4
- data/lib/llm/providers/openai/error_handler.rb +18 -3
- data/lib/llm/providers/openai/files.rb +3 -3
- data/lib/llm/providers/openai/images.rb +17 -11
- data/lib/llm/providers/openai/models.rb +1 -1
- data/lib/llm/providers/openai/response_adapter/completion.rb +9 -1
- data/lib/llm/providers/openai/response_adapter/models.rb +13 -0
- data/lib/llm/providers/openai/response_adapter/responds.rb +9 -1
- data/lib/llm/providers/openai/response_adapter.rb +2 -0
- data/lib/llm/providers/openai/responses.rb +16 -1
- data/lib/llm/providers/openai/stream_parser.rb +2 -0
- data/lib/llm/providers/openai.rb +28 -6
- data/lib/llm/providers/xai/images.rb +7 -6
- data/lib/llm/providers/xai.rb +10 -3
- data/lib/llm/providers/zai.rb +9 -2
- data/lib/llm/registry.rb +81 -0
- data/lib/llm/schema/enum.rb +16 -0
- data/lib/llm/schema/parser.rb +109 -0
- data/lib/llm/schema.rb +5 -0
- data/lib/llm/server_tool.rb +5 -5
- data/lib/llm/session.rb +10 -1
- data/lib/llm/tool/param.rb +1 -1
- data/lib/llm/tool.rb +86 -5
- data/lib/llm/tracer/langsmith.rb +144 -0
- data/lib/llm/tracer/logger.rb +9 -1
- data/lib/llm/tracer/null.rb +8 -0
- data/lib/llm/tracer/telemetry.rb +98 -78
- data/lib/llm/tracer.rb +108 -4
- data/lib/llm/usage.rb +5 -0
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +40 -6
- data/llm.gemspec +45 -8
- metadata +87 -28
- data/lib/llm/providers/gemini/response_adapter/models.rb +0 -15
data/README.md
CHANGED
|
@@ -4,409 +4,267 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.9.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
11
11
|
|
|
12
|
-
llm.rb is a
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
llm.rb is a Ruby-centric toolkit for building real LLM-powered systems — where
|
|
13
|
+
LLMs are part of your architecture, not just API calls. It gives you explicit
|
|
14
|
+
control over contexts, tools, concurrency, and providers, so you can compose
|
|
15
|
+
reliable, production-ready workflows without hidden abstractions.
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
by the license. Built with [good music](https://www.youtube.com/watch?v=SNvaqwTbn14)
|
|
21
|
-
and a lot of ☕️.
|
|
17
|
+
Built for engineers who want to understand and control their LLM systems. No
|
|
18
|
+
frameworks, no hidden magic — just composable primitives for building real
|
|
19
|
+
applications, from scripts to full systems like [Relay](https://github.com/llmrb/relay).
|
|
22
20
|
|
|
23
|
-
|
|
21
|
+
Jump to [Quick start](#quick-start), discover its [capabilities](#capabilities), read about
|
|
22
|
+
its [architecture](#architecture--execution-model) or watch the
|
|
23
|
+
[Screencast](https://www.youtube.com/watch?v=x1K4wMeO_QA) for a deep dive into the design
|
|
24
|
+
and capabilities of llm.rb.
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
## What Makes It Different
|
|
26
27
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
multiple requests. The following example implements a simple REPL loop, and the response
|
|
30
|
-
is streamed to the terminal in real-time as it arrives from the provider. The provider
|
|
31
|
-
happens to be OpenAI in this case but it could be any other provider, and `$stdout`
|
|
32
|
-
could be any object that implements the `#<<` method:
|
|
28
|
+
Most LLM libraries stop at requests and responses. <br>
|
|
29
|
+
llm.rb is built around the state and execution model around them:
|
|
33
30
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
#### Schema
|
|
48
|
-
|
|
49
|
-
The [LLM::Schema](https://0x1eef.github.io/x/llm.rb/LLM/Schema.html) class provides
|
|
50
|
-
a simple DSL for describing the structure of a response that an LLM emits according
|
|
51
|
-
to a JSON schema. The schema lets a client describe what JSON object an LLM should
|
|
52
|
-
emit, and the LLM will abide by the schema to the best of its ability:
|
|
53
|
-
|
|
54
|
-
```ruby
|
|
55
|
-
#!/usr/bin/env ruby
|
|
56
|
-
require "llm"
|
|
57
|
-
require "pp"
|
|
31
|
+
- **Contexts are central** <br>
|
|
32
|
+
They hold history, tools, schema, usage, cost, persistence, and execution state.
|
|
33
|
+
- **Tool execution is explicit** <br>
|
|
34
|
+
Run local, provider-native, and MCP tools sequentially or concurrently with threads, fibers, or async tasks.
|
|
35
|
+
- **One API across providers and capabilities** <br>
|
|
36
|
+
The same model covers chat, files, images, audio, embeddings, vector stores, and more.
|
|
37
|
+
- **Thread-safe where it matters** <br>
|
|
38
|
+
Providers are shareable, while contexts stay isolated and stateful.
|
|
39
|
+
- **Local metadata, fewer extra API calls** <br>
|
|
40
|
+
A built-in registry provides model capabilities, limits, pricing, and cost estimation.
|
|
41
|
+
- **Stdlib-only by default** <br>
|
|
42
|
+
llm.rb runs on the Ruby standard library by default, with providers, optional features, and the model registry loaded only when you use them.
|
|
58
43
|
|
|
59
|
-
|
|
60
|
-
property :category, String, "Report category", required: true
|
|
61
|
-
property :summary, String, "Short summary", required: true
|
|
62
|
-
property :services, Array[String], "Impacted services", required: true
|
|
63
|
-
property :timestamp, String, "When it happened", optional: true
|
|
64
|
-
end
|
|
44
|
+
## Architecture & Execution Model
|
|
65
45
|
|
|
66
|
-
llm
|
|
67
|
-
ses = LLM::Session.new(llm, schema: Report)
|
|
68
|
-
res = ses.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
|
|
69
|
-
pp res.content!
|
|
46
|
+
llm.rb is built in layers, each providing explicit control:
|
|
70
47
|
|
|
71
|
-
##
|
|
72
|
-
# {
|
|
73
|
-
# "category" => "Performance Incident",
|
|
74
|
-
# "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
|
|
75
|
-
# "services" => ["Database"],
|
|
76
|
-
# "timestamp" => "2024-06-05T10:42:00Z"
|
|
77
|
-
# }
|
|
78
48
|
```
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
#!/usr/bin/env ruby
|
|
91
|
-
require "llm"
|
|
92
|
-
|
|
93
|
-
class System < LLM::Tool
|
|
94
|
-
name "system"
|
|
95
|
-
description "Run a shell command"
|
|
96
|
-
param :command, String, "Command to execute", required: true
|
|
97
|
-
|
|
98
|
-
def call(command:)
|
|
99
|
-
{success: system(command)}
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
104
|
-
ses = LLM::Session.new(llm, tools: [System])
|
|
105
|
-
ses.talk("Run `date`.")
|
|
106
|
-
ses.talk(ses.functions.map(&:call)) # report return value to the LLM
|
|
49
|
+
┌─────────────────────────────────────────┐
|
|
50
|
+
│ Your Application │
|
|
51
|
+
├─────────────────────────────────────────┤
|
|
52
|
+
│ Contexts & Agents │ ← Stateful workflows
|
|
53
|
+
├─────────────────────────────────────────┤
|
|
54
|
+
│ Tools & Functions │ ← Concurrent execution
|
|
55
|
+
├─────────────────────────────────────────┤
|
|
56
|
+
│ Unified Provider API (OpenAI, etc.) │ ← Provider abstraction
|
|
57
|
+
├─────────────────────────────────────────┤
|
|
58
|
+
│ HTTP, JSON, Thread Safety │ ← Infrastructure
|
|
59
|
+
└─────────────────────────────────────────┘
|
|
107
60
|
```
|
|
108
61
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
62
|
+
### Key Design Decisions
|
|
63
|
+
|
|
64
|
+
- **Thread-safe providers** - `LLM::Provider` instances are safe to share across threads
|
|
65
|
+
- **Thread-local contexts** - `LLM::Context` should generally be kept thread-local
|
|
66
|
+
- **Lazy loading** - Providers, optional features, and the model registry load on demand
|
|
67
|
+
- **JSON adapter system** - Swap JSON libraries (JSON/Oj/Yajl) for performance
|
|
68
|
+
- **Registry system** - Local metadata for model capabilities, limits, and pricing
|
|
69
|
+
- **Provider adaptation** - Normalizes differences between OpenAI, Anthropic, Google, and other providers
|
|
70
|
+
- **Structured tool execution** - Errors are captured and returned as data, not raised unpredictably
|
|
71
|
+
- **Function vs Tool APIs** - Choose between class-based tools and closure-based functions
|
|
72
|
+
|
|
73
|
+
## Capabilities
|
|
74
|
+
|
|
75
|
+
llm.rb provides a complete set of primitives for building LLM-powered systems:
|
|
76
|
+
|
|
77
|
+
- **Chat & Contexts** — stateless and stateful interactions with persistence
|
|
78
|
+
- **Streaming** — real-time responses across providers
|
|
79
|
+
- **Tool Calling** — define and execute functions with automatic orchestration
|
|
80
|
+
- **Concurrent Execution** — threads, async tasks, and fibers
|
|
81
|
+
- **Agents** — reusable, preconfigured assistants with tool auto-execution
|
|
82
|
+
- **Structured Outputs** — JSON schema-based responses
|
|
83
|
+
- **MCP Support** — integrate external tool servers dynamically
|
|
84
|
+
- **Multimodal Inputs** — text, images, audio, documents, URLs
|
|
85
|
+
- **Audio** — text-to-speech, transcription, translation
|
|
86
|
+
- **Images** — generation and editing
|
|
87
|
+
- **Files API** — upload and reference files in prompts
|
|
88
|
+
- **Embeddings** — vector generation for search and RAG
|
|
89
|
+
- **Vector Stores** — OpenAI-based retrieval workflows
|
|
90
|
+
- **Cost Tracking** — estimate usage without API calls
|
|
91
|
+
- **Observability** — tracing, logging, telemetry
|
|
92
|
+
- **Model Registry** — local metadata for capabilities, limits, pricing
|
|
93
|
+
|
|
94
|
+
## Quick Start
|
|
95
|
+
|
|
96
|
+
#### Concurrent Tools
|
|
97
|
+
|
|
98
|
+
llm.rb provides explicit concurrency control for tool execution. The
|
|
99
|
+
`wait(:thread)` method spawns each pending function in its own thread and waits
|
|
100
|
+
for all to complete. You can also use `:fiber` for cooperative multitasking or
|
|
101
|
+
`:task` for async/await patterns (requires the `async` gem). The context
|
|
102
|
+
automatically collects all results and reports them back to the LLM in a
|
|
103
|
+
single turn, maintaining conversation flow while parallelizing independent
|
|
104
|
+
operations:
|
|
118
105
|
|
|
119
106
|
```ruby
|
|
120
107
|
#!/usr/bin/env ruby
|
|
121
108
|
require "llm"
|
|
122
109
|
|
|
123
|
-
class SystemAdmin < LLM::Agent
|
|
124
|
-
model "gpt-4.1"
|
|
125
|
-
instructions "You are a Linux system admin"
|
|
126
|
-
tools Shell
|
|
127
|
-
schema Result
|
|
128
|
-
end
|
|
129
|
-
|
|
130
110
|
llm = LLM.openai(key: ENV["KEY"])
|
|
131
|
-
|
|
132
|
-
|
|
111
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: [FetchWeather, FetchNews, FetchStock])
|
|
112
|
+
|
|
113
|
+
# Execute multiple independent tools concurrently
|
|
114
|
+
ctx.talk("Summarize the weather, headlines, and stock price.")
|
|
115
|
+
ctx.talk(ctx.functions.wait(:thread)) while ctx.functions.any?
|
|
133
116
|
```
|
|
134
117
|
|
|
135
|
-
####
|
|
118
|
+
#### MCP
|
|
136
119
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
prior context:
|
|
120
|
+
llm.rb integrates with the Model Context Protocol (MCP) to dynamically discover
|
|
121
|
+
and use tools from external servers. This example starts a filesystem MCP
|
|
122
|
+
server over stdio and makes its tools available to a context, enabling the LLM
|
|
123
|
+
to interact with the local file system through a standardized interface:
|
|
142
124
|
|
|
143
125
|
```ruby
|
|
144
126
|
#!/usr/bin/env ruby
|
|
145
127
|
require "llm"
|
|
146
128
|
|
|
147
129
|
llm = LLM.openai(key: ENV["KEY"])
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
130
|
+
mcp = LLM.mcp(stdio: {argv: ["npx", "-y", "@modelcontextprotocol/server-filesystem", Dir.pwd]})
|
|
131
|
+
|
|
132
|
+
begin
|
|
133
|
+
mcp.start
|
|
134
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
135
|
+
ctx.talk("List the directories in this project.")
|
|
136
|
+
ctx.talk(ctx.functions.call) while ctx.functions.any?
|
|
137
|
+
ensure
|
|
138
|
+
mcp.stop
|
|
154
139
|
end
|
|
155
|
-
|
|
156
|
-
ses.talk(prompt)
|
|
157
140
|
```
|
|
158
141
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
This
|
|
162
|
-
|
|
142
|
+
#### Streaming Chat
|
|
143
|
+
|
|
144
|
+
This example demonstrates llm.rb's streaming support. The `stream: $stdout`
|
|
145
|
+
parameter tells the context to write responses incrementally as they arrive
|
|
146
|
+
from the LLM. The `Context` object manages the conversation history, and
|
|
147
|
+
`talk()` sends your input while automatically appending both your message and
|
|
148
|
+
the LLM's response to the context. Streams accept any object with `#<<`,
|
|
149
|
+
giving you flexibility to pipe output to files, network sockets, or custom
|
|
150
|
+
buffers:
|
|
163
151
|
|
|
164
152
|
```ruby
|
|
165
153
|
#!/usr/bin/env ruby
|
|
166
154
|
require "llm"
|
|
167
155
|
|
|
168
156
|
llm = LLM.openai(key: ENV["KEY"])
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
user "Now double the speed for the same time."
|
|
157
|
+
ctx = LLM::Context.new(llm, stream: $stdout)
|
|
158
|
+
loop do
|
|
159
|
+
print "> "
|
|
160
|
+
ctx.talk(STDIN.gets || break)
|
|
161
|
+
puts
|
|
175
162
|
end
|
|
176
|
-
|
|
177
|
-
ses.talk(prompt)
|
|
178
163
|
```
|
|
179
164
|
|
|
180
|
-
####
|
|
165
|
+
#### Tool Calling
|
|
181
166
|
|
|
182
|
-
llm.rb
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
and
|
|
187
|
-
|
|
188
|
-
That's why we decided to optimize for both correctness and throughput
|
|
189
|
-
instead. An important part of that design is guaranteeing that
|
|
190
|
-
[LLM::Provider](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
|
|
191
|
-
is safe to share across threads. [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html) and
|
|
192
|
-
[LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) are
|
|
193
|
-
stateful objects that should be kept local to a single thread. So the
|
|
194
|
-
recommended pattern is to keep one session or agent per thread,
|
|
195
|
-
and share a provider across multiple threads:
|
|
167
|
+
Tools in llm.rb can be defined as classes inheriting from `LLM::Tool` or as
|
|
168
|
+
closures using `LLM.function`. When the LLM requests a tool call, the context
|
|
169
|
+
stores `Function` objects in `ctx.functions`. The `call()` method executes all
|
|
170
|
+
pending functions and returns their results to the LLM. Tools support
|
|
171
|
+
structured parameters with JSON Schema validation and automatically adapt to
|
|
172
|
+
each provider's API format (OpenAI, Anthropic, Google, etc.):
|
|
196
173
|
|
|
197
174
|
```ruby
|
|
198
175
|
#!/usr/bin/env ruby
|
|
199
176
|
require "llm"
|
|
200
177
|
|
|
201
|
-
|
|
202
|
-
|
|
178
|
+
class System < LLM::Tool
|
|
179
|
+
name "system"
|
|
180
|
+
description "Run a shell command"
|
|
181
|
+
param :command, String, "Command to execute", required: true
|
|
203
182
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
ses = LLM::Session.new(llm, schema:)
|
|
207
|
-
res = ses.talk "#{x} + 5 = ?"
|
|
208
|
-
res.content!
|
|
183
|
+
def call(command:)
|
|
184
|
+
{success: system(command)}
|
|
209
185
|
end
|
|
210
|
-
end
|
|
186
|
+
end
|
|
211
187
|
|
|
212
|
-
|
|
188
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
189
|
+
ctx = LLM::Context.new(llm, stream: $stdout, tools: [System])
|
|
190
|
+
ctx.talk("Run `date`.")
|
|
191
|
+
ctx.talk(ctx.functions.call) while ctx.functions.any?
|
|
213
192
|
```
|
|
214
193
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
#### General
|
|
218
|
-
- ✅ Unified API across providers
|
|
219
|
-
- 📦 Zero runtime deps (stdlib-only)
|
|
220
|
-
- 🧵 Thread-safe providers for multi-threaded workloads
|
|
221
|
-
- 🧩 Pluggable JSON adapters (JSON, Oj, Yajl, etc)
|
|
222
|
-
- 🧱 Builtin tracer API ([LLM::Tracer](https://0x1eef.github.io/x/llm.rb/LLM/Tracer.html))
|
|
223
|
-
|
|
224
|
-
#### Optionals
|
|
194
|
+
#### Structured Outputs
|
|
225
195
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
- 🤖 Tool calling / function execution
|
|
234
|
-
- 🔁 Agent tool-call auto-execution (bounded)
|
|
235
|
-
- 🗂️ JSON Schema structured output
|
|
236
|
-
- 📡 Streaming responses
|
|
237
|
-
|
|
238
|
-
#### Media
|
|
239
|
-
- 🗣️ TTS, transcription, translation
|
|
240
|
-
- 🖼️ Image generation + editing
|
|
241
|
-
- 📎 Files API + prompt-aware file inputs
|
|
242
|
-
- 📦 Streaming multipart uploads (no full buffering)
|
|
243
|
-
- 💡 Multimodal prompts (text, documents, audio, images, video, URLs)
|
|
244
|
-
|
|
245
|
-
#### Embeddings
|
|
246
|
-
- 🧮 Embeddings
|
|
247
|
-
- 🧱 OpenAI vector stores (RAG)
|
|
248
|
-
|
|
249
|
-
#### Miscellaneous
|
|
250
|
-
- 📜 Models API
|
|
251
|
-
- 🔧 OpenAI responses + moderations
|
|
252
|
-
|
|
253
|
-
## Matrix
|
|
254
|
-
|
|
255
|
-
| Feature / Provider | OpenAI | Anthropic | Gemini | DeepSeek | xAI (Grok) | zAI | Ollama | LlamaCpp |
|
|
256
|
-
|--------------------------------------|:------:|:---------:|:------:|:--------:|:----------:|:------:|:------:|:--------:|
|
|
257
|
-
| **Chat Completions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
258
|
-
| **Streaming** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
259
|
-
| **Tool Calling** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
260
|
-
| **JSON Schema / Structured Output** | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ✅* | ✅* |
|
|
261
|
-
| **Embeddings** | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
|
262
|
-
| **Multimodal Prompts** *(text, documents, audio, images, videos, URLs, etc)* | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
|
263
|
-
| **Files API** | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
264
|
-
| **Models API** | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
|
265
|
-
| **Audio (TTS / Transcribe / Translate)** | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
266
|
-
| **Image Generation & Editing** | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ |
|
|
267
|
-
| **Local Model Support** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ |
|
|
268
|
-
| **Vector Stores (RAG)** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
269
|
-
| **Responses** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
270
|
-
| **Moderations** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
271
|
-
|
|
272
|
-
\* JSON Schema support in Ollama/LlamaCpp depends on the model, not the API.
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
## Examples
|
|
276
|
-
|
|
277
|
-
### Providers
|
|
278
|
-
|
|
279
|
-
#### LLM::Provider
|
|
280
|
-
|
|
281
|
-
All providers inherit from [LLM::Provider](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html) –
|
|
282
|
-
they share a common interface and set of functionality. Each provider can be instantiated
|
|
283
|
-
using an API key (if required) and an optional set of configuration options via
|
|
284
|
-
[the singleton methods of LLM](https://0x1eef.github.io/x/llm.rb/LLM.html). For example:
|
|
196
|
+
The `LLM::Schema` system lets you define JSON schemas that LLMs must follow.
|
|
197
|
+
Schemas can be defined as classes with `property` declarations or built
|
|
198
|
+
programmatically using a fluent interface. When you pass a schema to a context,
|
|
199
|
+
llm.rb automatically configures the provider's JSON mode and validates
|
|
200
|
+
responses against your schema. The `content!` method returns the parsed JSON
|
|
201
|
+
object, while errors are captured as structured data rather than raising
|
|
202
|
+
exceptions:
|
|
285
203
|
|
|
286
204
|
```ruby
|
|
287
205
|
#!/usr/bin/env ruby
|
|
288
206
|
require "llm"
|
|
207
|
+
require "pp"
|
|
289
208
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
llm = LLM.zai(key: "yourapikey")
|
|
297
|
-
llm = LLM.deepseek(key: "yourapikey")
|
|
298
|
-
|
|
299
|
-
##
|
|
300
|
-
# local providers
|
|
301
|
-
llm = LLM.ollama(key: nil)
|
|
302
|
-
llm = LLM.llamacpp(key: nil)
|
|
303
|
-
```
|
|
304
|
-
|
|
305
|
-
#### LLM::Response
|
|
306
|
-
|
|
307
|
-
All provider methods that perform requests return an
|
|
308
|
-
[LLM::Response](https://0x1eef.github.io/x/llm.rb/LLM/Response.html).
|
|
309
|
-
If the HTTP response is JSON (`content-type: application/json`),
|
|
310
|
-
`response.body` is parsed into an
|
|
311
|
-
[LLM::Object](https://0x1eef.github.io/x/llm.rb/LLM/Object.html) for
|
|
312
|
-
dot-access. For non-JSON responses, `response.body` is a raw string.
|
|
313
|
-
It is also possible to access top-level keys directly on the response
|
|
314
|
-
(eg: `res.object` instead of `res.body.object`):
|
|
315
|
-
|
|
316
|
-
```ruby
|
|
317
|
-
#!/usr/bin/env ruby
|
|
318
|
-
require "llm"
|
|
209
|
+
class Report < LLM::Schema
|
|
210
|
+
property :category, Enum["performance", "security", "outage"], "Report category", required: true
|
|
211
|
+
property :summary, String, "Short summary", required: true
|
|
212
|
+
property :services, Array[String], "Impacted services", required: true
|
|
213
|
+
property :timestamp, String, "When it happened", optional: true
|
|
214
|
+
end
|
|
319
215
|
|
|
320
216
|
llm = LLM.openai(key: ENV["KEY"])
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
```
|
|
325
|
-
|
|
326
|
-
#### Persistence
|
|
327
|
-
|
|
328
|
-
The llm.rb library can maintain a process-wide connection pool
|
|
329
|
-
for each provider that is instantiated. This feature can improve
|
|
330
|
-
performance but it is optional, the implementation depends on
|
|
331
|
-
[net-http-persistent](https://github.com/drbrain/net-http-persistent),
|
|
332
|
-
and the gem should be installed separately:
|
|
333
|
-
|
|
334
|
-
```ruby
|
|
335
|
-
#!/usr/bin/env ruby
|
|
336
|
-
require "llm"
|
|
217
|
+
ctx = LLM::Context.new(llm, schema: Report)
|
|
218
|
+
res = ctx.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
|
|
219
|
+
pp res.content!
|
|
337
220
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
221
|
+
# {
|
|
222
|
+
# "category" => "performance",
|
|
223
|
+
# "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
|
|
224
|
+
# "services" => ["Database"],
|
|
225
|
+
# "timestamp" => "2024-06-05T10:42:00Z"
|
|
226
|
+
# }
|
|
343
227
|
```
|
|
344
228
|
|
|
345
|
-
|
|
229
|
+
## Providers
|
|
346
230
|
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
[LangSmith](https://www.langsmith.com/).
|
|
231
|
+
llm.rb supports multiple LLM providers with a unified API.
|
|
232
|
+
All providers share the same context, tool, and concurrency interfaces, making
|
|
233
|
+
it easy to switch between cloud and local models:
|
|
351
234
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
235
|
+
- **OpenAI** (`LLM.openai`)
|
|
236
|
+
- **Anthropic** (`LLM.anthropic`)
|
|
237
|
+
- **Google** (`LLM.google`)
|
|
238
|
+
- **DeepSeek** (`LLM.deepseek`)
|
|
239
|
+
- **xAI** (`LLM.xai`)
|
|
240
|
+
- **zAI** (`LLM.zai`)
|
|
241
|
+
- **Ollama** (`LLM.ollama`)
|
|
242
|
+
- **Llama.cpp** (`LLM.llamacpp`)
|
|
359
243
|
|
|
360
|
-
|
|
361
|
-
#!/usr/bin/env ruby
|
|
362
|
-
require "llm"
|
|
363
|
-
require "pp"
|
|
364
|
-
|
|
365
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
366
|
-
llm.tracer = LLM::Tracer::Telemetry.new(llm)
|
|
367
|
-
|
|
368
|
-
ses = LLM::Session.new(llm)
|
|
369
|
-
ses.talk "Hello world!"
|
|
370
|
-
ses.talk "Adios."
|
|
371
|
-
ses.tracer.spans.each { |span| pp span }
|
|
372
|
-
```
|
|
373
|
-
|
|
374
|
-
The llm.rb library also supports export through the OpenTelemetry Protocol (OTLP).
|
|
375
|
-
OTLP is a standard protocol for exporting telemetry data, and it is supported by
|
|
376
|
-
multiple observability tools. By default the export is batched in the background,
|
|
377
|
-
and happens automatically but short lived scripts might need to
|
|
378
|
-
[explicitly flush](https://0x1eef.github.io/x/llm.rb/LLM/Tracer/Telemetry#flush!-instance_method)
|
|
379
|
-
the exporter before they exit – otherwise some telemetry data could be lost:
|
|
380
|
-
|
|
381
|
-
```ruby
|
|
382
|
-
#!/usr/bin/env ruby
|
|
383
|
-
require "llm"
|
|
384
|
-
require "opentelemetry-exporter-otlp"
|
|
244
|
+
## Production
|
|
385
245
|
|
|
386
|
-
|
|
387
|
-
exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(endpoint:)
|
|
246
|
+
#### Ready for production
|
|
388
247
|
|
|
389
|
-
|
|
390
|
-
llm.tracer = LLM::Tracer::Telemetry.new(llm, exporter:)
|
|
248
|
+
llm.rb is designed for production use from the ground up:
|
|
391
249
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
250
|
+
- **Thread-safe providers** - Share `LLM::Provider` instances across your application
|
|
251
|
+
- **Thread-local contexts** - Keep `LLM::Context` instances thread-local for state isolation
|
|
252
|
+
- **Cost tracking** - Know your spend before the bill arrives
|
|
253
|
+
- **Observability** - Built-in tracing with OpenTelemetry support
|
|
254
|
+
- **Persistence** - Save and restore contexts across processes
|
|
255
|
+
- **Performance** - Swap JSON adapters and enable HTTP connection pooling
|
|
256
|
+
- **Error handling** - Structured errors, not unpredictable exceptions
|
|
395
257
|
|
|
396
|
-
|
|
397
|
-
# Helpful for short-lived scripts, otherwise the exporter
|
|
398
|
-
# might not have time to flush pending telemetry data
|
|
399
|
-
ses.tracer.flush!
|
|
400
|
-
end
|
|
401
|
-
```
|
|
258
|
+
#### Tracing
|
|
402
259
|
|
|
403
|
-
|
|
260
|
+
llm.rb includes built-in tracers for local logging, OpenTelemetry, and
|
|
261
|
+
LangSmith. Assign a tracer to a provider and all context requests and tool
|
|
262
|
+
calls made through that provider will be instrumented. Tracers are local to
|
|
263
|
+
the current fiber, so the same provider can use different tracers in different
|
|
264
|
+
concurrent tasks without interfering with each other.
|
|
404
265
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
This feature is optional, disabled by default, and it can be useful for debugging and/or
|
|
408
|
-
monitoring requests to LLM providers. The `path` or `io` options can be used to choose
|
|
409
|
-
where logs are written to, and by default it is set to `$stdout`:
|
|
266
|
+
Use the logger tracer when you want structured logs through Ruby's standard
|
|
267
|
+
library:
|
|
410
268
|
|
|
411
269
|
```ruby
|
|
412
270
|
#!/usr/bin/env ruby
|
|
@@ -415,339 +273,249 @@ require "llm"
|
|
|
415
273
|
llm = LLM.openai(key: ENV["KEY"])
|
|
416
274
|
llm.tracer = LLM::Tracer::Logger.new(llm, io: $stdout)
|
|
417
275
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
ses.talk "Adios."
|
|
276
|
+
ctx = LLM::Context.new(llm)
|
|
277
|
+
ctx.talk("Hello")
|
|
421
278
|
```
|
|
422
279
|
|
|
423
|
-
|
|
280
|
+
Use the telemetry tracer when you want OpenTelemetry spans. This requires the
|
|
281
|
+
`opentelemetry-sdk` gem, and exporters such as OTLP can be added separately:
|
|
424
282
|
|
|
425
|
-
[LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html) can be
|
|
426
|
-
serialized and deserialized across process boundaries and persisted to
|
|
427
|
-
storage such as files, a `jsonb` column (PostgreSQL), or other backends
|
|
428
|
-
through a JSON representation of the history encapsulated by
|
|
429
|
-
[LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html)
|
|
430
|
-
– inclusive of tool metadata as well:
|
|
431
|
-
|
|
432
|
-
* Process 1
|
|
433
283
|
```ruby
|
|
434
284
|
#!/usr/bin/env ruby
|
|
435
285
|
require "llm"
|
|
436
286
|
|
|
437
287
|
llm = LLM.openai(key: ENV["KEY"])
|
|
438
|
-
|
|
439
|
-
ses.talk "Howdy partner"
|
|
440
|
-
ses.talk "I'll see you later"
|
|
441
|
-
ses.save(path: "session.json")
|
|
442
|
-
```
|
|
443
|
-
* Process 2
|
|
444
|
-
```ruby
|
|
445
|
-
#!/usr/bin/env ruby
|
|
446
|
-
require "llm"
|
|
447
|
-
require "pp"
|
|
288
|
+
llm.tracer = LLM::Tracer::Telemetry.new(llm)
|
|
448
289
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
ses.talk "Howdy partner. I'm back"
|
|
453
|
-
pp ses.messages
|
|
290
|
+
ctx = LLM::Context.new(llm)
|
|
291
|
+
ctx.talk("Hello")
|
|
292
|
+
pp llm.tracer.spans
|
|
454
293
|
```
|
|
455
294
|
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
of a session that can be stored in a `jsonb` column in PostgreSQL, or any
|
|
459
|
-
other storage backend. The session can then be restored from the JSON
|
|
460
|
-
representation via the restore method and its `string` argument:
|
|
295
|
+
Use the LangSmith tracer when you want LangSmith-compatible metadata and trace
|
|
296
|
+
grouping on top of the telemetry tracer:
|
|
461
297
|
|
|
462
298
|
```ruby
|
|
463
299
|
#!/usr/bin/env ruby
|
|
464
300
|
require "llm"
|
|
465
301
|
|
|
466
302
|
llm = LLM.openai(key: ENV["KEY"])
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
ses2 = LLM::Session.new(llm)
|
|
473
|
-
ses2.restore(string: json)
|
|
474
|
-
ses2.talk "Howdy partner. I'm back"
|
|
475
|
-
```
|
|
303
|
+
llm.tracer = LLM::Tracer::Langsmith.new(
|
|
304
|
+
llm,
|
|
305
|
+
metadata: {env: "dev"},
|
|
306
|
+
tags: ["chatbot"]
|
|
307
|
+
)
|
|
476
308
|
|
|
477
|
-
|
|
309
|
+
ctx = LLM::Context.new(llm)
|
|
310
|
+
ctx.talk("Hello")
|
|
311
|
+
```
|
|
478
312
|
|
|
479
|
-
####
|
|
313
|
+
#### Thread Safety
|
|
480
314
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
315
|
+
llm.rb uses Ruby's `Monitor` class to ensure thread safety at the provider
|
|
316
|
+
level, allowing you to share a single provider instance across multiple threads
|
|
317
|
+
while maintaining state isolation through thread-local contexts. This design
|
|
318
|
+
enables efficient resource sharing while preventing race conditions in
|
|
319
|
+
concurrent applications:
|
|
486
320
|
|
|
487
321
|
```ruby
|
|
488
322
|
#!/usr/bin/env ruby
|
|
489
323
|
require "llm"
|
|
490
324
|
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
fn.description "Run a shell command"
|
|
494
|
-
fn.params do |schema|
|
|
495
|
-
schema.object(command: schema.string.required)
|
|
496
|
-
end
|
|
497
|
-
fn.define do |command:|
|
|
498
|
-
ro, wo = IO.pipe
|
|
499
|
-
re, we = IO.pipe
|
|
500
|
-
Process.wait Process.spawn(command, out: wo, err: we)
|
|
501
|
-
[wo,we].each(&:close)
|
|
502
|
-
{stderr: re.read, stdout: ro.read}
|
|
503
|
-
end
|
|
504
|
-
end
|
|
505
|
-
|
|
506
|
-
ses = LLM::Session.new(llm, tools: [tool])
|
|
507
|
-
ses.talk "Your task is to run shell commands via a tool.", role: :user
|
|
508
|
-
|
|
509
|
-
ses.talk "What is the current date?", role: :user
|
|
510
|
-
ses.talk ses.functions.map(&:call) # report return value to the LLM
|
|
325
|
+
# Thread-safe providers - create once, use everywhere
|
|
326
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
511
327
|
|
|
512
|
-
|
|
513
|
-
|
|
328
|
+
# Each thread should have its own context for state isolation
|
|
329
|
+
Thread.new do
|
|
330
|
+
ctx = LLM::Context.new(llm) # Thread-local context
|
|
331
|
+
ctx.talk("Hello from thread 1")
|
|
332
|
+
end
|
|
514
333
|
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
334
|
+
Thread.new do
|
|
335
|
+
ctx = LLM::Context.new(llm) # Thread-local context
|
|
336
|
+
ctx.talk("Hello from thread 2")
|
|
337
|
+
end
|
|
518
338
|
```
|
|
519
339
|
|
|
520
|
-
####
|
|
340
|
+
#### Performance Tuning
|
|
521
341
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
The choice between [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
|
|
529
|
-
and [LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) is often a matter of
|
|
530
|
-
preference but each carry their own benefits. For example, [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
|
|
531
|
-
has the benefit of being a closure that has access to its surrounding context and
|
|
532
|
-
sometimes that is useful:
|
|
342
|
+
llm.rb's JSON adapter system lets you swap JSON libraries for better
|
|
343
|
+
performance in high-throughput applications. The library supports stdlib JSON,
|
|
344
|
+
Oj, and Yajl, with Oj typically offering the best performance. Additionally,
|
|
345
|
+
you can enable HTTP connection pooling using the optional `net-http-persistent`
|
|
346
|
+
gem to reduce connection overhead in production environments:
|
|
533
347
|
|
|
534
348
|
```ruby
|
|
535
349
|
#!/usr/bin/env ruby
|
|
536
350
|
require "llm"
|
|
537
351
|
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
description "Run a shell command"
|
|
541
|
-
param :command, String, "The command to execute", required: true
|
|
542
|
-
|
|
543
|
-
def call(command:)
|
|
544
|
-
ro, wo = IO.pipe
|
|
545
|
-
re, we = IO.pipe
|
|
546
|
-
Process.wait Process.spawn(command, out: wo, err: we)
|
|
547
|
-
[wo,we].each(&:close)
|
|
548
|
-
{stderr: re.read, stdout: ro.read}
|
|
549
|
-
end
|
|
550
|
-
end
|
|
551
|
-
|
|
552
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
553
|
-
ses = LLM::Session.new(llm, tools: [System])
|
|
554
|
-
ses.talk "Your task is to run shell commands via a tool.", role: :user
|
|
352
|
+
# Swap JSON libraries for better performance
|
|
353
|
+
LLM.json = :oj # Use Oj for faster JSON parsing
|
|
555
354
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
ses.talk "What operating system am I running?", role: :user
|
|
560
|
-
ses.talk ses.functions.map(&:call) # report return value to the LLM
|
|
561
|
-
|
|
562
|
-
##
|
|
563
|
-
# {stderr: "", stdout: "Thu May 1 10:01:02 UTC 2025"}
|
|
564
|
-
# {stderr: "", stdout: "FreeBSD"}
|
|
355
|
+
# Enable HTTP connection pooling for high-throughput applications
|
|
356
|
+
llm = LLM.openai(key: ENV["KEY"]).persist! # Uses net-http-persistent when available
|
|
565
357
|
```
|
|
566
358
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
#### Create
|
|
359
|
+
#### Model Registry
|
|
570
360
|
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
) is given as part of a prompt that is understood by llm.rb:
|
|
361
|
+
llm.rb includes a local model registry that provides metadata about model
|
|
362
|
+
capabilities, pricing, and limits without requiring API calls. The registry is
|
|
363
|
+
shipped with the gem and sourced from https://models.dev, giving you access to
|
|
364
|
+
up-to-date information about context windows, token costs, and supported
|
|
365
|
+
modalities for each provider:
|
|
577
366
|
|
|
578
367
|
```ruby
|
|
579
368
|
#!/usr/bin/env ruby
|
|
580
369
|
require "llm"
|
|
581
|
-
require "pp"
|
|
582
370
|
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
371
|
+
# Access model metadata, capabilities, and pricing
|
|
372
|
+
registry = LLM.registry_for(:openai)
|
|
373
|
+
model_info = registry.limit(model: "gpt-4.1")
|
|
374
|
+
puts "Context window: #{model_info.context} tokens"
|
|
375
|
+
puts "Cost: $#{model_info.cost.input}/1M input tokens"
|
|
588
376
|
```
|
|
589
377
|
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
#### Multimodal
|
|
378
|
+
## More Examples
|
|
593
379
|
|
|
594
|
-
|
|
595
|
-
and URLs. With llm.rb you pass those inputs by tagging them with one of
|
|
596
|
-
the following methods. And for multipart prompts, we can pass an array
|
|
597
|
-
where each element is a part of the input. See the example below for
|
|
598
|
-
details, in the meantime here are the methods to know for multimodal
|
|
599
|
-
inputs:
|
|
380
|
+
#### Responses API
|
|
600
381
|
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
382
|
+
llm.rb also supports OpenAI's Responses API through `llm.responses` and
|
|
383
|
+
`ctx.respond`. This API can maintain response state server-side and can reduce
|
|
384
|
+
how much conversation state needs to be sent on each turn:
|
|
604
385
|
|
|
605
386
|
```ruby
|
|
606
387
|
#!/usr/bin/env ruby
|
|
607
388
|
require "llm"
|
|
608
389
|
|
|
609
390
|
llm = LLM.openai(key: ENV["KEY"])
|
|
610
|
-
|
|
611
|
-
res = ses.talk ["Tell me about this image URL", ses.image_url(url)]
|
|
612
|
-
res = ses.talk ["Tell me about this PDF", ses.remote_file(file)]
|
|
613
|
-
res = ses.talk ["Tell me about this image", ses.local_file(path)]
|
|
614
|
-
```
|
|
391
|
+
ctx = LLM::Context.new(llm)
|
|
615
392
|
|
|
616
|
-
|
|
393
|
+
ctx.respond("Your task is to answer the user's questions", role: :developer)
|
|
394
|
+
res = ctx.respond("What is the capital of France?")
|
|
395
|
+
puts res.output_text
|
|
396
|
+
```
|
|
617
397
|
|
|
618
|
-
####
|
|
398
|
+
#### Context Persistence
|
|
619
399
|
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
to create an audio file from a text prompt. The audio is then moved to
|
|
624
|
-
`${HOME}/hello.mp3` as the final step:
|
|
400
|
+
Contexts can be serialized and restored across process boundaries. This makes
|
|
401
|
+
it possible to persist conversation state in a file, database, or queue and
|
|
402
|
+
resume work later:
|
|
625
403
|
|
|
626
404
|
```ruby
|
|
627
405
|
#!/usr/bin/env ruby
|
|
628
406
|
require "llm"
|
|
629
407
|
|
|
630
408
|
llm = LLM.openai(key: ENV["KEY"])
|
|
631
|
-
|
|
632
|
-
|
|
409
|
+
ctx = LLM::Context.new(llm)
|
|
410
|
+
ctx.talk("Hello")
|
|
411
|
+
ctx.talk("Remember that my favorite language is Ruby")
|
|
412
|
+
ctx.save(path: "context.json")
|
|
413
|
+
|
|
414
|
+
restored = LLM::Context.new(llm)
|
|
415
|
+
restored.restore(path: "context.json")
|
|
416
|
+
res = restored.talk("What is my favorite language?")
|
|
417
|
+
puts res.content
|
|
633
418
|
```
|
|
634
419
|
|
|
635
|
-
####
|
|
420
|
+
#### Agents
|
|
636
421
|
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
422
|
+
Agents in llm.rb are reusable, preconfigured assistants that automatically
|
|
423
|
+
execute tool calls and maintain conversation state. Unlike contexts which
|
|
424
|
+
require manual tool execution, agents automatically handle the tool call loop,
|
|
425
|
+
making them ideal for autonomous workflows where you want the LLM to
|
|
426
|
+
independently use available tools to accomplish tasks:
|
|
641
427
|
|
|
642
428
|
```ruby
|
|
643
429
|
#!/usr/bin/env ruby
|
|
644
430
|
require "llm"
|
|
645
431
|
|
|
432
|
+
class SystemAdmin < LLM::Agent
|
|
433
|
+
model "gpt-4.1"
|
|
434
|
+
instructions "You are a Linux system admin"
|
|
435
|
+
tools Shell
|
|
436
|
+
schema Result
|
|
437
|
+
end
|
|
438
|
+
|
|
646
439
|
llm = LLM.openai(key: ENV["KEY"])
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
)
|
|
650
|
-
puts res.text # => "Hello world."
|
|
440
|
+
agent = SystemAdmin.new(llm)
|
|
441
|
+
res = agent.talk("Run 'date'")
|
|
651
442
|
```
|
|
652
443
|
|
|
653
|
-
####
|
|
444
|
+
#### Cost Tracking
|
|
654
445
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
446
|
+
llm.rb provides built-in cost estimation that works without making additional
|
|
447
|
+
API calls. The cost tracking system uses the local model registry to calculate
|
|
448
|
+
estimated costs based on token usage, giving you visibility into spending
|
|
449
|
+
before bills arrive. This is particularly useful for monitoring usage in
|
|
450
|
+
production applications and setting budget alerts:
|
|
659
451
|
|
|
660
452
|
```ruby
|
|
661
453
|
#!/usr/bin/env ruby
|
|
662
454
|
require "llm"
|
|
663
455
|
|
|
664
456
|
llm = LLM.openai(key: ENV["KEY"])
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
457
|
+
ctx = LLM::Context.new(llm)
|
|
458
|
+
ctx.talk "Hello"
|
|
459
|
+
puts "Estimated cost so far: $#{ctx.cost}"
|
|
460
|
+
ctx.talk "Tell me a joke"
|
|
461
|
+
puts "Estimated cost so far: $#{ctx.cost}"
|
|
669
462
|
```
|
|
670
463
|
|
|
671
|
-
|
|
464
|
+
#### Multimodal Prompts
|
|
672
465
|
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
can create new images from a prompt, or edit an existing image with a
|
|
677
|
-
prompt. The following example uses the OpenAI provider to create an
|
|
678
|
-
image of a dog on a rocket to the moon. The image is then moved to
|
|
679
|
-
`${HOME}/dogonrocket.png` as the final step:
|
|
466
|
+
Contexts provide helpers for composing multimodal prompts from URLs, local
|
|
467
|
+
files, and provider-managed remote files. These tagged objects let providers
|
|
468
|
+
adapt the input into the format they expect:
|
|
680
469
|
|
|
681
470
|
```ruby
|
|
682
471
|
#!/usr/bin/env ruby
|
|
683
472
|
require "llm"
|
|
684
|
-
require "open-uri"
|
|
685
|
-
require "fileutils"
|
|
686
473
|
|
|
687
474
|
llm = LLM.openai(key: ENV["KEY"])
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
end
|
|
475
|
+
ctx = LLM::Context.new(llm)
|
|
476
|
+
|
|
477
|
+
res = ctx.talk ["Describe this image", ctx.image_url("https://example.com/cat.jpg")]
|
|
478
|
+
puts res.content
|
|
693
479
|
```
|
|
694
480
|
|
|
695
|
-
####
|
|
481
|
+
#### Audio Generation
|
|
696
482
|
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
483
|
+
llm.rb supports OpenAI's audio API for text-to-speech generation, allowing you
|
|
484
|
+
to create speech from text with configurable voices and output formats. The
|
|
485
|
+
audio API returns binary audio data that can be streamed directly to files or
|
|
486
|
+
other IO objects, enabling integration with multimedia applications:
|
|
701
487
|
|
|
702
488
|
```ruby
|
|
703
489
|
#!/usr/bin/env ruby
|
|
704
490
|
require "llm"
|
|
705
|
-
require "open-uri"
|
|
706
|
-
require "fileutils"
|
|
707
491
|
|
|
708
492
|
llm = LLM.openai(key: ENV["KEY"])
|
|
709
|
-
res = llm.
|
|
710
|
-
|
|
711
|
-
prompt: "add a hat to the logo",
|
|
712
|
-
)
|
|
713
|
-
res.urls.each do |url|
|
|
714
|
-
FileUtils.mv OpenURI.open_uri(url).path,
|
|
715
|
-
File.join(Dir.home, "logo-with-hat.png")
|
|
716
|
-
end
|
|
493
|
+
res = llm.audio.create_speech(input: "Hello world")
|
|
494
|
+
IO.copy_stream res.audio, File.join(Dir.home, "hello.mp3")
|
|
717
495
|
```
|
|
718
496
|
|
|
719
|
-
####
|
|
497
|
+
#### Image Generation
|
|
720
498
|
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
499
|
+
llm.rb provides access to OpenAI's DALL-E image generation API through a
|
|
500
|
+
unified interface. The API supports multiple response formats including
|
|
501
|
+
base64-encoded images and temporary URLs, with automatic handling of binary
|
|
502
|
+
data streaming for efficient file operations:
|
|
725
503
|
|
|
726
504
|
```ruby
|
|
727
505
|
#!/usr/bin/env ruby
|
|
728
506
|
require "llm"
|
|
729
|
-
require "open-uri"
|
|
730
|
-
require "fileutils"
|
|
731
507
|
|
|
732
508
|
llm = LLM.openai(key: ENV["KEY"])
|
|
733
|
-
res = llm.images.
|
|
734
|
-
|
|
735
|
-
n: 5
|
|
736
|
-
)
|
|
737
|
-
res.urls.each.with_index do |url, index|
|
|
738
|
-
FileUtils.mv OpenURI.open_uri(url).path,
|
|
739
|
-
File.join(Dir.home, "logo-variation#{index}.png")
|
|
740
|
-
end
|
|
509
|
+
res = llm.images.create(prompt: "a dog on a rocket to the moon")
|
|
510
|
+
IO.copy_stream res.images[0], File.join(Dir.home, "dogonrocket.png")
|
|
741
511
|
```
|
|
742
512
|
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
#### Text
|
|
513
|
+
#### Embeddings
|
|
746
514
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
515
|
+
llm.rb's embedding API generates vector representations of text for semantic
|
|
516
|
+
search and retrieval-augmented generation (RAG) workflows. The API supports
|
|
517
|
+
batch processing of multiple inputs and returns normalized vectors suitable for
|
|
518
|
+
vector similarity operations, with consistent dimensionality across providers:
|
|
751
519
|
|
|
752
520
|
```ruby
|
|
753
521
|
#!/usr/bin/env ruby
|
|
@@ -759,52 +527,32 @@ puts res.class
|
|
|
759
527
|
puts res.embeddings.size
|
|
760
528
|
puts res.embeddings[0].size
|
|
761
529
|
|
|
762
|
-
##
|
|
763
530
|
# LLM::Response
|
|
764
531
|
# 3
|
|
765
532
|
# 1536
|
|
766
533
|
```
|
|
767
534
|
|
|
768
|
-
|
|
535
|
+
## Real-World Example: Relay
|
|
769
536
|
|
|
770
|
-
|
|
537
|
+
See how these pieces come together in a complete application architecture with
|
|
538
|
+
[Relay](https://github.com/llmrb/relay), a production-ready LLM application
|
|
539
|
+
built on llm.rb that demonstrates:
|
|
771
540
|
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
541
|
+
- Context management across requests
|
|
542
|
+
- Tool composition and execution
|
|
543
|
+
- Concurrent workflows
|
|
544
|
+
- Cost tracking and observability
|
|
545
|
+
- Production deployment patterns
|
|
776
546
|
|
|
777
|
-
|
|
778
|
-
#!/usr/bin/env ruby
|
|
779
|
-
require "llm"
|
|
780
|
-
require "pp"
|
|
781
|
-
|
|
782
|
-
##
|
|
783
|
-
# List all models
|
|
784
|
-
llm = LLM.openai(key: ENV["KEY"])
|
|
785
|
-
llm.models.all.each do |model|
|
|
786
|
-
puts "model: #{model.id}"
|
|
787
|
-
end
|
|
788
|
-
|
|
789
|
-
##
|
|
790
|
-
# Select a model
|
|
791
|
-
model = llm.models.all.find { |m| m.id == "gpt-3.5-turbo" }
|
|
792
|
-
ses = LLM::Session.new(llm, model: model.id)
|
|
793
|
-
res = ses.talk "Hello #{model.id} :)"
|
|
794
|
-
pp res.content
|
|
795
|
-
```
|
|
547
|
+
Watch the screencast:
|
|
796
548
|
|
|
797
|
-
|
|
549
|
+
[](https://www.youtube.com/watch?v=x1K4wMeO_QA)
|
|
798
550
|
|
|
799
|
-
|
|
551
|
+
## Installation
|
|
800
552
|
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
* [GitHub.com](https://github.com/llmrb/llm.rb)
|
|
806
|
-
* [GitLab.com](https://gitlab.com/llmrb/llm.rb)
|
|
807
|
-
* [Codeberg.org](https://codeberg.org/llmrb/llm.rb)
|
|
553
|
+
```bash
|
|
554
|
+
gem install llm.rb
|
|
555
|
+
```
|
|
808
556
|
|
|
809
557
|
## License
|
|
810
558
|
|