llm.rb 4.7.0 → 4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +335 -587
  3. data/data/anthropic.json +770 -0
  4. data/data/deepseek.json +75 -0
  5. data/data/google.json +1050 -0
  6. data/data/openai.json +1421 -0
  7. data/data/xai.json +792 -0
  8. data/data/zai.json +330 -0
  9. data/lib/llm/agent.rb +42 -41
  10. data/lib/llm/bot.rb +1 -263
  11. data/lib/llm/buffer.rb +7 -0
  12. data/lib/llm/{session → context}/deserializer.rb +4 -3
  13. data/lib/llm/context.rb +292 -0
  14. data/lib/llm/cost.rb +26 -0
  15. data/lib/llm/error.rb +8 -0
  16. data/lib/llm/eventstream/parser.rb +0 -5
  17. data/lib/llm/function/array.rb +61 -0
  18. data/lib/llm/function/fiber_group.rb +91 -0
  19. data/lib/llm/function/task_group.rb +89 -0
  20. data/lib/llm/function/thread_group.rb +94 -0
  21. data/lib/llm/function.rb +75 -10
  22. data/lib/llm/mcp/command.rb +108 -0
  23. data/lib/llm/mcp/error.rb +31 -0
  24. data/lib/llm/mcp/pipe.rb +82 -0
  25. data/lib/llm/mcp/rpc.rb +118 -0
  26. data/lib/llm/mcp/transport/stdio.rb +85 -0
  27. data/lib/llm/mcp.rb +102 -0
  28. data/lib/llm/message.rb +13 -11
  29. data/lib/llm/model.rb +115 -0
  30. data/lib/llm/prompt.rb +17 -7
  31. data/lib/llm/provider.rb +60 -32
  32. data/lib/llm/providers/anthropic/error_handler.rb +1 -1
  33. data/lib/llm/providers/anthropic/files.rb +3 -3
  34. data/lib/llm/providers/anthropic/models.rb +1 -1
  35. data/lib/llm/providers/anthropic/request_adapter.rb +20 -3
  36. data/lib/llm/providers/anthropic/response_adapter/models.rb +13 -0
  37. data/lib/llm/providers/anthropic/response_adapter.rb +2 -0
  38. data/lib/llm/providers/anthropic.rb +21 -5
  39. data/lib/llm/providers/deepseek.rb +10 -3
  40. data/lib/llm/providers/{gemini → google}/audio.rb +6 -6
  41. data/lib/llm/providers/{gemini → google}/error_handler.rb +20 -5
  42. data/lib/llm/providers/{gemini → google}/files.rb +11 -11
  43. data/lib/llm/providers/{gemini → google}/images.rb +7 -7
  44. data/lib/llm/providers/{gemini → google}/models.rb +5 -5
  45. data/lib/llm/providers/{gemini → google}/request_adapter/completion.rb +7 -3
  46. data/lib/llm/providers/{gemini → google}/request_adapter.rb +1 -1
  47. data/lib/llm/providers/{gemini → google}/response_adapter/completion.rb +7 -7
  48. data/lib/llm/providers/{gemini → google}/response_adapter/embedding.rb +1 -1
  49. data/lib/llm/providers/{gemini → google}/response_adapter/file.rb +1 -1
  50. data/lib/llm/providers/{gemini → google}/response_adapter/files.rb +1 -1
  51. data/lib/llm/providers/{gemini → google}/response_adapter/image.rb +1 -1
  52. data/lib/llm/providers/google/response_adapter/models.rb +13 -0
  53. data/lib/llm/providers/{gemini → google}/response_adapter/web_search.rb +2 -2
  54. data/lib/llm/providers/{gemini → google}/response_adapter.rb +8 -8
  55. data/lib/llm/providers/{gemini → google}/stream_parser.rb +3 -3
  56. data/lib/llm/providers/{gemini.rb → google.rb} +41 -26
  57. data/lib/llm/providers/llamacpp.rb +10 -3
  58. data/lib/llm/providers/ollama/error_handler.rb +1 -1
  59. data/lib/llm/providers/ollama/models.rb +1 -1
  60. data/lib/llm/providers/ollama/response_adapter/models.rb +13 -0
  61. data/lib/llm/providers/ollama/response_adapter.rb +2 -0
  62. data/lib/llm/providers/ollama.rb +19 -4
  63. data/lib/llm/providers/openai/error_handler.rb +18 -3
  64. data/lib/llm/providers/openai/files.rb +3 -3
  65. data/lib/llm/providers/openai/images.rb +17 -11
  66. data/lib/llm/providers/openai/models.rb +1 -1
  67. data/lib/llm/providers/openai/response_adapter/completion.rb +9 -1
  68. data/lib/llm/providers/openai/response_adapter/models.rb +13 -0
  69. data/lib/llm/providers/openai/response_adapter/responds.rb +9 -1
  70. data/lib/llm/providers/openai/response_adapter.rb +2 -0
  71. data/lib/llm/providers/openai/responses.rb +16 -1
  72. data/lib/llm/providers/openai/stream_parser.rb +2 -0
  73. data/lib/llm/providers/openai.rb +28 -6
  74. data/lib/llm/providers/xai/images.rb +7 -6
  75. data/lib/llm/providers/xai.rb +10 -3
  76. data/lib/llm/providers/zai.rb +9 -2
  77. data/lib/llm/registry.rb +81 -0
  78. data/lib/llm/schema/enum.rb +16 -0
  79. data/lib/llm/schema/parser.rb +109 -0
  80. data/lib/llm/schema.rb +5 -0
  81. data/lib/llm/server_tool.rb +5 -5
  82. data/lib/llm/session.rb +10 -1
  83. data/lib/llm/tool/param.rb +1 -1
  84. data/lib/llm/tool.rb +86 -5
  85. data/lib/llm/tracer/langsmith.rb +144 -0
  86. data/lib/llm/tracer/logger.rb +9 -1
  87. data/lib/llm/tracer/null.rb +8 -0
  88. data/lib/llm/tracer/telemetry.rb +98 -78
  89. data/lib/llm/tracer.rb +108 -4
  90. data/lib/llm/usage.rb +5 -0
  91. data/lib/llm/version.rb +1 -1
  92. data/lib/llm.rb +40 -6
  93. data/llm.gemspec +45 -8
  94. metadata +87 -28
  95. data/lib/llm/providers/gemini/response_adapter/models.rb +0 -15
data/README.md CHANGED
@@ -4,409 +4,267 @@
4
4
  <p align="center">
5
5
  <a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
6
6
  <a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
7
- <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.6.0-green.svg?" alt="Version"></a>
7
+ <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.9.0-green.svg?" alt="Version"></a>
8
8
  </p>
9
9
 
10
10
  ## About
11
11
 
12
- llm.rb is a zero-dependency Ruby toolkit for Large Language Models that
13
- includes OpenAI, Gemini, Anthropic, xAI (Grok), zAI, DeepSeek, Ollama,
14
- and LlamaCpp. The toolkit includes full support for chat, streaming,
15
- tool calling, audio, images, files, and structured outputs.
12
+ llm.rb is a Ruby-centric toolkit for building real LLM-powered systems — where
13
+ LLMs are part of your architecture, not just API calls. It gives you explicit
14
+ control over contexts, tools, concurrency, and providers, so you can compose
15
+ reliable, production-ready workflows without hidden abstractions.
16
16
 
17
- And it is licensed under the [0BSD License](https://choosealicense.com/licenses/0bsd/) &ndash;
18
- one of the most permissive open source licenses, with minimal conditions for use,
19
- modification, and/or distribution. Attribution is appreciated, but not required
20
- by the license. Built with [good music](https://www.youtube.com/watch?v=SNvaqwTbn14)
21
- and a lot of ☕️.
17
+ Built for engineers who want to understand and control their LLM systems. No
18
+ frameworks, no hidden magic just composable primitives for building real
19
+ applications, from scripts to full systems like [Relay](https://github.com/llmrb/relay).
22
20
 
23
- ## Quick start
21
+ Jump to [Quick start](#quick-start), discover its [capabilities](#capabilities), read about
22
+ its [architecture](#architecture--execution-model) or watch the
23
+ [Screencast](https://www.youtube.com/watch?v=x1K4wMeO_QA) for a deep dive into the design
24
+ and capabilities of llm.rb.
24
25
 
25
- #### REPL
26
+ ## What Makes It Different
26
27
 
27
- The [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html) class provides
28
- a session with an LLM provider that maintains conversation history and context across
29
- multiple requests. The following example implements a simple REPL loop, and the response
30
- is streamed to the terminal in real-time as it arrives from the provider. The provider
31
- happens to be OpenAI in this case but it could be any other provider, and `$stdout`
32
- could be any object that implements the `#<<` method:
28
+ Most LLM libraries stop at requests and responses. <br>
29
+ llm.rb is built around the state and execution model around them:
33
30
 
34
- ```ruby
35
- #!/usr/bin/env ruby
36
- require "llm"
37
-
38
- llm = LLM.openai(key: ENV["KEY"])
39
- ses = LLM::Session.new(llm, stream: $stdout)
40
- loop do
41
- print "> "
42
- ses.talk(STDIN.gets || break)
43
- puts
44
- end
45
- ```
46
-
47
- #### Schema
48
-
49
- The [LLM::Schema](https://0x1eef.github.io/x/llm.rb/LLM/Schema.html) class provides
50
- a simple DSL for describing the structure of a response that an LLM emits according
51
- to a JSON schema. The schema lets a client describe what JSON object an LLM should
52
- emit, and the LLM will abide by the schema to the best of its ability:
53
-
54
- ```ruby
55
- #!/usr/bin/env ruby
56
- require "llm"
57
- require "pp"
31
+ - **Contexts are central** <br>
32
+ They hold history, tools, schema, usage, cost, persistence, and execution state.
33
+ - **Tool execution is explicit** <br>
34
+ Run local, provider-native, and MCP tools sequentially or concurrently with threads, fibers, or async tasks.
35
+ - **One API across providers and capabilities** <br>
36
+ The same model covers chat, files, images, audio, embeddings, vector stores, and more.
37
+ - **Thread-safe where it matters** <br>
38
+ Providers are shareable, while contexts stay isolated and stateful.
39
+ - **Local metadata, fewer extra API calls** <br>
40
+ A built-in registry provides model capabilities, limits, pricing, and cost estimation.
41
+ - **Stdlib-only by default** <br>
42
+ llm.rb runs on the Ruby standard library by default, with providers, optional features, and the model registry loaded only when you use them.
58
43
 
59
- class Report < LLM::Schema
60
- property :category, String, "Report category", required: true
61
- property :summary, String, "Short summary", required: true
62
- property :services, Array[String], "Impacted services", required: true
63
- property :timestamp, String, "When it happened", optional: true
64
- end
44
+ ## Architecture & Execution Model
65
45
 
66
- llm = LLM.openai(key: ENV["KEY"])
67
- ses = LLM::Session.new(llm, schema: Report)
68
- res = ses.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
69
- pp res.content!
46
+ llm.rb is built in layers, each providing explicit control:
70
47
 
71
- ##
72
- # {
73
- # "category" => "Performance Incident",
74
- # "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
75
- # "services" => ["Database"],
76
- # "timestamp" => "2024-06-05T10:42:00Z"
77
- # }
78
48
  ```
79
-
80
- #### Tools
81
-
82
- The [LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) class lets you
83
- define callable tools for the model. Each tool is described to the LLM as a function
84
- it can invoke to fetch information or perform an action. The model decides when to
85
- call tools based on the conversation; when it does, llm.rb runs the tool and sends
86
- the result back on the next request. The following example implements a simple tool
87
- that runs shell commands:
88
-
89
- ```ruby
90
- #!/usr/bin/env ruby
91
- require "llm"
92
-
93
- class System < LLM::Tool
94
- name "system"
95
- description "Run a shell command"
96
- param :command, String, "Command to execute", required: true
97
-
98
- def call(command:)
99
- {success: system(command)}
100
- end
101
- end
102
-
103
- llm = LLM.openai(key: ENV["KEY"])
104
- ses = LLM::Session.new(llm, tools: [System])
105
- ses.talk("Run `date`.")
106
- ses.talk(ses.functions.map(&:call)) # report return value to the LLM
49
+ ┌─────────────────────────────────────────┐
50
+ │ Your Application │
51
+ ├─────────────────────────────────────────┤
52
+ │ Contexts & Agents │ Stateful workflows
53
+ ├─────────────────────────────────────────┤
54
+ │ Tools & Functions │ Concurrent execution
55
+ ├─────────────────────────────────────────┤
56
+ │ Unified Provider API (OpenAI, etc.) │ Provider abstraction
57
+ ├─────────────────────────────────────────┤
58
+ │ HTTP, JSON, Thread Safety │ ← Infrastructure
59
+ └─────────────────────────────────────────┘
107
60
  ```
108
61
 
109
- #### Agents
110
-
111
- The [LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
112
- class provides a class-level DSL for defining reusable, preconfigured
113
- assistants with defaults for model, tools, schema, and instructions.
114
- Instructions are injected only on the first request, and unlike
115
- [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html),
116
- an [LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
117
- will automatically call tools when needed:
62
+ ### Key Design Decisions
63
+
64
+ - **Thread-safe providers** - `LLM::Provider` instances are safe to share across threads
65
+ - **Thread-local contexts** - `LLM::Context` should generally be kept thread-local
66
+ - **Lazy loading** - Providers, optional features, and the model registry load on demand
67
+ - **JSON adapter system** - Swap JSON libraries (JSON/Oj/Yajl) for performance
68
+ - **Registry system** - Local metadata for model capabilities, limits, and pricing
69
+ - **Provider adaptation** - Normalizes differences between OpenAI, Anthropic, Google, and other providers
70
+ - **Structured tool execution** - Errors are captured and returned as data, not raised unpredictably
71
+ - **Function vs Tool APIs** - Choose between class-based tools and closure-based functions
72
+
73
+ ## Capabilities
74
+
75
+ llm.rb provides a complete set of primitives for building LLM-powered systems:
76
+
77
+ - **Chat & Contexts** — stateless and stateful interactions with persistence
78
+ - **Streaming** — real-time responses across providers
79
+ - **Tool Calling** — define and execute functions with automatic orchestration
80
+ - **Concurrent Execution** — threads, async tasks, and fibers
81
+ - **Agents** — reusable, preconfigured assistants with tool auto-execution
82
+ - **Structured Outputs** — JSON schema-based responses
83
+ - **MCP Support** — integrate external tool servers dynamically
84
+ - **Multimodal Inputs** — text, images, audio, documents, URLs
85
+ - **Audio** — text-to-speech, transcription, translation
86
+ - **Images** — generation and editing
87
+ - **Files API** — upload and reference files in prompts
88
+ - **Embeddings** — vector generation for search and RAG
89
+ - **Vector Stores** — OpenAI-based retrieval workflows
90
+ - **Cost Tracking** — estimate usage without API calls
91
+ - **Observability** — tracing, logging, telemetry
92
+ - **Model Registry** — local metadata for capabilities, limits, pricing
93
+
94
+ ## Quick Start
95
+
96
+ #### Concurrent Tools
97
+
98
+ llm.rb provides explicit concurrency control for tool execution. The
99
+ `wait(:thread)` method spawns each pending function in its own thread and waits
100
+ for all to complete. You can also use `:fiber` for cooperative multitasking or
101
+ `:task` for async/await patterns (requires the `async` gem). The context
102
+ automatically collects all results and reports them back to the LLM in a
103
+ single turn, maintaining conversation flow while parallelizing independent
104
+ operations:
118
105
 
119
106
  ```ruby
120
107
  #!/usr/bin/env ruby
121
108
  require "llm"
122
109
 
123
- class SystemAdmin < LLM::Agent
124
- model "gpt-4.1"
125
- instructions "You are a Linux system admin"
126
- tools Shell
127
- schema Result
128
- end
129
-
130
110
  llm = LLM.openai(key: ENV["KEY"])
131
- agent = SystemAdmin.new(llm)
132
- res = agent.talk("Run 'date'")
111
+ ctx = LLM::Context.new(llm, stream: $stdout, tools: [FetchWeather, FetchNews, FetchStock])
112
+
113
+ # Execute multiple independent tools concurrently
114
+ ctx.talk("Summarize the weather, headlines, and stock price.")
115
+ ctx.talk(ctx.functions.wait(:thread)) while ctx.functions.any?
133
116
  ```
134
117
 
135
- #### Prompts
118
+ #### MCP
136
119
 
137
- The [LLM::Prompt](https://0x1eef.github.io/x/llm.rb/LLM/Prompt.html)
138
- class represents a single request composed of multiple messages.
139
- It is useful when a single turn needs more than one message, for example:
140
- system instructions plus one or more user messages, or a replay of
141
- prior context:
120
+ llm.rb integrates with the Model Context Protocol (MCP) to dynamically discover
121
+ and use tools from external servers. This example starts a filesystem MCP
122
+ server over stdio and makes its tools available to a context, enabling the LLM
123
+ to interact with the local file system through a standardized interface:
142
124
 
143
125
  ```ruby
144
126
  #!/usr/bin/env ruby
145
127
  require "llm"
146
128
 
147
129
  llm = LLM.openai(key: ENV["KEY"])
148
- ses = LLM::Session.new(llm)
149
-
150
- prompt = ses.prompt do
151
- system "Be concise and show your reasoning briefly."
152
- user "If a train goes 60 mph for 1.5 hours, how far does it travel?"
153
- user "Now double the speed for the same time."
130
+ mcp = LLM.mcp(stdio: {argv: ["npx", "-y", "@modelcontextprotocol/server-filesystem", Dir.pwd]})
131
+
132
+ begin
133
+ mcp.start
134
+ ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
135
+ ctx.talk("List the directories in this project.")
136
+ ctx.talk(ctx.functions.call) while ctx.functions.any?
137
+ ensure
138
+ mcp.stop
154
139
  end
155
-
156
- ses.talk(prompt)
157
140
  ```
158
141
 
159
- But prompts are not session-scoped. [LLM::Prompt](https://0x1eef.github.io/x/llm.rb/LLM/Prompt.html)
160
- is a first-class object that you can build and pass around independently of a session.
161
- This enables patterns where you compose a prompt in one part of your code,
162
- and execute it through a session elsewhere:
142
+ #### Streaming Chat
143
+
144
+ This example demonstrates llm.rb's streaming support. The `stream: $stdout`
145
+ parameter tells the context to write responses incrementally as they arrive
146
+ from the LLM. The `Context` object manages the conversation history, and
147
+ `talk()` sends your input while automatically appending both your message and
148
+ the LLM's response to the context. Streams accept any object with `#<<`,
149
+ giving you flexibility to pipe output to files, network sockets, or custom
150
+ buffers:
163
151
 
164
152
  ```ruby
165
153
  #!/usr/bin/env ruby
166
154
  require "llm"
167
155
 
168
156
  llm = LLM.openai(key: ENV["KEY"])
169
- ses = LLM::Session.new(llm)
170
-
171
- prompt = LLM::Prompt.new(llm) do
172
- system "Be concise and show your reasoning briefly."
173
- user "If a train goes 60 mph for 1.5 hours, how far does it travel?"
174
- user "Now double the speed for the same time."
157
+ ctx = LLM::Context.new(llm, stream: $stdout)
158
+ loop do
159
+ print "> "
160
+ ctx.talk(STDIN.gets || break)
161
+ puts
175
162
  end
176
-
177
- ses.talk(prompt)
178
163
  ```
179
164
 
180
- #### Threads
165
+ #### Tool Calling
181
166
 
182
- llm.rb is designed for threaded environments with throughput in mind.
183
- Locks are used selectively, and localized state is preferred wherever
184
- possible. Blanket locking across every class would help guarantee
185
- correctness but it would also add contention, reduce throughput,
186
- and increase complexity.
187
-
188
- That's why we decided to optimize for both correctness and throughput
189
- instead. An important part of that design is guaranteeing that
190
- [LLM::Provider](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html)
191
- is safe to share across threads. [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html) and
192
- [LLM::Agent](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) are
193
- stateful objects that should be kept local to a single thread. So the
194
- recommended pattern is to keep one session or agent per thread,
195
- and share a provider across multiple threads:
167
+ Tools in llm.rb can be defined as classes inheriting from `LLM::Tool` or as
168
+ closures using `LLM.function`. When the LLM requests a tool call, the context
169
+ stores `Function` objects in `ctx.functions`. The `call()` method executes all
170
+ pending functions and returns their results to the LLM. Tools support
171
+ structured parameters with JSON Schema validation and automatically adapt to
172
+ each provider's API format (OpenAI, Anthropic, Google, etc.):
196
173
 
197
174
  ```ruby
198
175
  #!/usr/bin/env ruby
199
176
  require "llm"
200
177
 
201
- llm = LLM.openai(key: ENV["KEY"]).persist!
202
- schema = llm.schema.object(answer: llm.schema.integer.required)
178
+ class System < LLM::Tool
179
+ name "system"
180
+ description "Run a shell command"
181
+ param :command, String, "Command to execute", required: true
203
182
 
204
- vals = 10.times.map do |x|
205
- Thread.new do
206
- ses = LLM::Session.new(llm, schema:)
207
- res = ses.talk "#{x} + 5 = ?"
208
- res.content!
183
+ def call(command:)
184
+ {success: system(command)}
209
185
  end
210
- end.map(&:value)
186
+ end
211
187
 
212
- vals.each { |val| puts val }
188
+ llm = LLM.openai(key: ENV["KEY"])
189
+ ctx = LLM::Context.new(llm, stream: $stdout, tools: [System])
190
+ ctx.talk("Run `date`.")
191
+ ctx.talk(ctx.functions.call) while ctx.functions.any?
213
192
  ```
214
193
 
215
- ## Features
216
-
217
- #### General
218
- - ✅ Unified API across providers
219
- - 📦 Zero runtime deps (stdlib-only)
220
- - 🧵 Thread-safe providers for multi-threaded workloads
221
- - 🧩 Pluggable JSON adapters (JSON, Oj, Yajl, etc)
222
- - 🧱 Builtin tracer API ([LLM::Tracer](https://0x1eef.github.io/x/llm.rb/LLM/Tracer.html))
223
-
224
- #### Optionals
194
+ #### Structured Outputs
225
195
 
226
- - ♻️ Optional persistent HTTP pool via net-http-persistent ([net-http-persistent](https://github.com/drbrain/net-http-persistent))
227
- - 📈 Optional telemetry support via OpenTelemetry ([opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby))
228
- - 🪵 Optional logging support via Ruby's standard library ([ruby/logger](https://github.com/ruby/logger))
229
-
230
- #### Chat, Agents
231
- - 🧠 Stateless + stateful chat (completions + responses)
232
- - 💾 Save and restore sessions across processes
233
- - 🤖 Tool calling / function execution
234
- - 🔁 Agent tool-call auto-execution (bounded)
235
- - 🗂️ JSON Schema structured output
236
- - 📡 Streaming responses
237
-
238
- #### Media
239
- - 🗣️ TTS, transcription, translation
240
- - 🖼️ Image generation + editing
241
- - 📎 Files API + prompt-aware file inputs
242
- - 📦 Streaming multipart uploads (no full buffering)
243
- - 💡 Multimodal prompts (text, documents, audio, images, video, URLs)
244
-
245
- #### Embeddings
246
- - 🧮 Embeddings
247
- - 🧱 OpenAI vector stores (RAG)
248
-
249
- #### Miscellaneous
250
- - 📜 Models API
251
- - 🔧 OpenAI responses + moderations
252
-
253
- ## Matrix
254
-
255
- | Feature / Provider | OpenAI | Anthropic | Gemini | DeepSeek | xAI (Grok) | zAI | Ollama | LlamaCpp |
256
- |--------------------------------------|:------:|:---------:|:------:|:--------:|:----------:|:------:|:------:|:--------:|
257
- | **Chat Completions** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
258
- | **Streaming** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
259
- | **Tool Calling** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
260
- | **JSON Schema / Structured Output** | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ✅* | ✅* |
261
- | **Embeddings** | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
262
- | **Multimodal Prompts** *(text, documents, audio, images, videos, URLs, etc)* | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
263
- | **Files API** | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
264
- | **Models API** | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
265
- | **Audio (TTS / Transcribe / Translate)** | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
266
- | **Image Generation & Editing** | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ |
267
- | **Local Model Support** | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ |
268
- | **Vector Stores (RAG)** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
269
- | **Responses** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
270
- | **Moderations** | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
271
-
272
- \* JSON Schema support in Ollama/LlamaCpp depends on the model, not the API.
273
-
274
-
275
- ## Examples
276
-
277
- ### Providers
278
-
279
- #### LLM::Provider
280
-
281
- All providers inherit from [LLM::Provider](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html) &ndash;
282
- they share a common interface and set of functionality. Each provider can be instantiated
283
- using an API key (if required) and an optional set of configuration options via
284
- [the singleton methods of LLM](https://0x1eef.github.io/x/llm.rb/LLM.html). For example:
196
+ The `LLM::Schema` system lets you define JSON schemas that LLMs must follow.
197
+ Schemas can be defined as classes with `property` declarations or built
198
+ programmatically using a fluent interface. When you pass a schema to a context,
199
+ llm.rb automatically configures the provider's JSON mode and validates
200
+ responses against your schema. The `content!` method returns the parsed JSON
201
+ object, while errors are captured as structured data rather than raising
202
+ exceptions:
285
203
 
286
204
  ```ruby
287
205
  #!/usr/bin/env ruby
288
206
  require "llm"
207
+ require "pp"
289
208
 
290
- ##
291
- # remote providers
292
- llm = LLM.openai(key: "yourapikey")
293
- llm = LLM.gemini(key: "yourapikey")
294
- llm = LLM.anthropic(key: "yourapikey")
295
- llm = LLM.xai(key: "yourapikey")
296
- llm = LLM.zai(key: "yourapikey")
297
- llm = LLM.deepseek(key: "yourapikey")
298
-
299
- ##
300
- # local providers
301
- llm = LLM.ollama(key: nil)
302
- llm = LLM.llamacpp(key: nil)
303
- ```
304
-
305
- #### LLM::Response
306
-
307
- All provider methods that perform requests return an
308
- [LLM::Response](https://0x1eef.github.io/x/llm.rb/LLM/Response.html).
309
- If the HTTP response is JSON (`content-type: application/json`),
310
- `response.body` is parsed into an
311
- [LLM::Object](https://0x1eef.github.io/x/llm.rb/LLM/Object.html) for
312
- dot-access. For non-JSON responses, `response.body` is a raw string.
313
- It is also possible to access top-level keys directly on the response
314
- (eg: `res.object` instead of `res.body.object`):
315
-
316
- ```ruby
317
- #!/usr/bin/env ruby
318
- require "llm"
209
+ class Report < LLM::Schema
210
+ property :category, Enum["performance", "security", "outage"], "Report category", required: true
211
+ property :summary, String, "Short summary", required: true
212
+ property :services, Array[String], "Impacted services", required: true
213
+ property :timestamp, String, "When it happened", optional: true
214
+ end
319
215
 
320
216
  llm = LLM.openai(key: ENV["KEY"])
321
- res = llm.models.all
322
- puts res.object
323
- puts res.data.first.id
324
- ```
325
-
326
- #### Persistence
327
-
328
- The llm.rb library can maintain a process-wide connection pool
329
- for each provider that is instantiated. This feature can improve
330
- performance but it is optional, the implementation depends on
331
- [net-http-persistent](https://github.com/drbrain/net-http-persistent),
332
- and the gem should be installed separately:
333
-
334
- ```ruby
335
- #!/usr/bin/env ruby
336
- require "llm"
217
+ ctx = LLM::Context.new(llm, schema: Report)
218
+ res = ctx.talk("Structure this report: 'Database latency spiked at 10:42 UTC, causing 5% request timeouts for 12 minutes.'")
219
+ pp res.content!
337
220
 
338
- llm = LLM.openai(key: ENV["KEY"]).persist!
339
- res1 = llm.responses.create "message 1"
340
- res2 = llm.responses.create "message 2", previous_response_id: res1.response_id
341
- res3 = llm.responses.create "message 3", previous_response_id: res2.response_id
342
- puts res3.output_text
221
+ # {
222
+ # "category" => "performance",
223
+ # "summary" => "Database latency spiked, causing 5% request timeouts for 12 minutes.",
224
+ # "services" => ["Database"],
225
+ # "timestamp" => "2024-06-05T10:42:00Z"
226
+ # }
343
227
  ```
344
228
 
345
- #### Telemetry
229
+ ## Providers
346
230
 
347
- The llm.rb library includes telemetry support through its tracer API, and it
348
- can be used to trace LLM requests. It can be useful for debugging, monitoring,
349
- and observability. The primary use case in mind is integration with tools like
350
- [LangSmith](https://www.langsmith.com/).
231
+ llm.rb supports multiple LLM providers with a unified API.
232
+ All providers share the same context, tool, and concurrency interfaces, making
233
+ it easy to switch between cloud and local models:
351
234
 
352
- The telemetry implementation uses the [opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby)
353
- and is based on the [gen-ai telemetry spec(s)](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/).
354
- This feature is optional, disabled by default, and the [opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby)
355
- gem should be installed separately. Please also note that llm.rb will take care of
356
- loading and configuring the [opentelemetry-sdk](https://github.com/open-telemetry/opentelemetry-ruby)
357
- library for you, and llm.rb configures an in-memory exporter that doesn't have
358
- external dependencies by default:
235
+ - **OpenAI** (`LLM.openai`)
236
+ - **Anthropic** (`LLM.anthropic`)
237
+ - **Google** (`LLM.google`)
238
+ - **DeepSeek** (`LLM.deepseek`)
239
+ - **xAI** (`LLM.xai`)
240
+ - **zAI** (`LLM.zai`)
241
+ - **Ollama** (`LLM.ollama`)
242
+ - **Llama.cpp** (`LLM.llamacpp`)
359
243
 
360
- ```ruby
361
- #!/usr/bin/env ruby
362
- require "llm"
363
- require "pp"
364
-
365
- llm = LLM.openai(key: ENV["KEY"])
366
- llm.tracer = LLM::Tracer::Telemetry.new(llm)
367
-
368
- ses = LLM::Session.new(llm)
369
- ses.talk "Hello world!"
370
- ses.talk "Adios."
371
- ses.tracer.spans.each { |span| pp span }
372
- ```
373
-
374
- The llm.rb library also supports export through the OpenTelemetry Protocol (OTLP).
375
- OTLP is a standard protocol for exporting telemetry data, and it is supported by
376
- multiple observability tools. By default the export is batched in the background,
377
- and happens automatically but short lived scripts might need to
378
- [explicitly flush](https://0x1eef.github.io/x/llm.rb/LLM/Tracer/Telemetry#flush!-instance_method)
379
- the exporter before they exit &ndash; otherwise some telemetry data could be lost:
380
-
381
- ```ruby
382
- #!/usr/bin/env ruby
383
- require "llm"
384
- require "opentelemetry-exporter-otlp"
244
+ ## Production
385
245
 
386
- endpoint = "https://api.smith.langchain.com/otel/v1/traces"
387
- exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(endpoint:)
246
+ #### Ready for production
388
247
 
389
- llm = LLM.openai(key: ENV["KEY"])
390
- llm.tracer = LLM::Tracer::Telemetry.new(llm, exporter:)
248
+ llm.rb is designed for production use from the ground up:
391
249
 
392
- ses = LLM::Session.new(llm)
393
- ses.talk "hello"
394
- ses.talk "how are you?"
250
+ - **Thread-safe providers** - Share `LLM::Provider` instances across your application
251
+ - **Thread-local contexts** - Keep `LLM::Context` instances thread-local for state isolation
252
+ - **Cost tracking** - Know your spend before the bill arrives
253
+ - **Observability** - Built-in tracing with OpenTelemetry support
254
+ - **Persistence** - Save and restore contexts across processes
255
+ - **Performance** - Swap JSON adapters and enable HTTP connection pooling
256
+ - **Error handling** - Structured errors, not unpredictable exceptions
395
257
 
396
- at_exit do
397
- # Helpful for short-lived scripts, otherwise the exporter
398
- # might not have time to flush pending telemetry data
399
- ses.tracer.flush!
400
- end
401
- ```
258
+ #### Tracing
402
259
 
403
- #### Logger
260
+ llm.rb includes built-in tracers for local logging, OpenTelemetry, and
261
+ LangSmith. Assign a tracer to a provider and all context requests and tool
262
+ calls made through that provider will be instrumented. Tracers are local to
263
+ the current fiber, so the same provider can use different tracers in different
264
+ concurrent tasks without interfering with each other.
404
265
 
405
- The llm.rb library includes simple logging support through its
406
- tracer API, and Ruby's standard library ([ruby/logger](https://github.com/ruby/logger)).
407
- This feature is optional, disabled by default, and it can be useful for debugging and/or
408
- monitoring requests to LLM providers. The `path` or `io` options can be used to choose
409
- where logs are written to, and by default it is set to `$stdout`:
266
+ Use the logger tracer when you want structured logs through Ruby's standard
267
+ library:
410
268
 
411
269
  ```ruby
412
270
  #!/usr/bin/env ruby
@@ -415,339 +273,249 @@ require "llm"
415
273
  llm = LLM.openai(key: ENV["KEY"])
416
274
  llm.tracer = LLM::Tracer::Logger.new(llm, io: $stdout)
417
275
 
418
- ses = LLM::Session.new(llm)
419
- ses.talk "Hello world!"
420
- ses.talk "Adios."
276
+ ctx = LLM::Context.new(llm)
277
+ ctx.talk("Hello")
421
278
  ```
422
279
 
423
- #### Serialization
280
+ Use the telemetry tracer when you want OpenTelemetry spans. This requires the
281
+ `opentelemetry-sdk` gem, and exporters such as OTLP can be added separately:
424
282
 
425
- [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html) can be
426
- serialized and deserialized across process boundaries and persisted to
427
- storage such as files, a `jsonb` column (PostgreSQL), or other backends
428
- through a JSON representation of the history encapsulated by
429
- [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html)
430
- &ndash; inclusive of tool metadata as well:
431
-
432
- * Process 1
433
283
  ```ruby
434
284
  #!/usr/bin/env ruby
435
285
  require "llm"
436
286
 
437
287
  llm = LLM.openai(key: ENV["KEY"])
438
- ses = LLM::Session.new(llm)
439
- ses.talk "Howdy partner"
440
- ses.talk "I'll see you later"
441
- ses.save(path: "session.json")
442
- ```
443
- * Process 2
444
- ```ruby
445
- #!/usr/bin/env ruby
446
- require "llm"
447
- require "pp"
288
+ llm.tracer = LLM::Tracer::Telemetry.new(llm)
448
289
 
449
- llm = LLM.openai(key: ENV["KEY"])
450
- ses = LLM::Session.new(llm)
451
- ses.restore(path: "session.json")
452
- ses.talk "Howdy partner. I'm back"
453
- pp ses.messages
290
+ ctx = LLM::Context.new(llm)
291
+ ctx.talk("Hello")
292
+ pp llm.tracer.spans
454
293
  ```
455
294
 
456
- But how does it work without a file ? The [LLM::Session](https://0x1eef.github.io/x/llm.rb/LLM/Session.html)
457
- class implements `#to_json` and it can be used to obtain a JSON representation
458
- of a session that can be stored in a `jsonb` column in PostgreSQL, or any
459
- other storage backend. The session can then be restored from the JSON
460
- representation via the restore method and its `string` argument:
295
+ Use the LangSmith tracer when you want LangSmith-compatible metadata and trace
296
+ grouping on top of the telemetry tracer:
461
297
 
462
298
  ```ruby
463
299
  #!/usr/bin/env ruby
464
300
  require "llm"
465
301
 
466
302
  llm = LLM.openai(key: ENV["KEY"])
467
- ses1 = LLM::Session.new(llm)
468
- ses1.talk "Howdy partner"
469
- ses1.talk "I'll see you later"
470
-
471
- json = ses1.to_json
472
- ses2 = LLM::Session.new(llm)
473
- ses2.restore(string: json)
474
- ses2.talk "Howdy partner. I'm back"
475
- ```
303
+ llm.tracer = LLM::Tracer::Langsmith.new(
304
+ llm,
305
+ metadata: {env: "dev"},
306
+ tags: ["chatbot"]
307
+ )
476
308
 
477
- ### Tools
309
+ ctx = LLM::Context.new(llm)
310
+ ctx.talk("Hello")
311
+ ```
478
312
 
479
- #### LLM::Function
313
+ #### Thread Safety
480
314
 
481
- The following example demonstrates [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
482
- and how it can define a local function (which happens to be a tool), and how
483
- a provider (such as OpenAI) can then detect when we should call the function.
484
- Its most notable feature is that it can act as a closure and has access to
485
- its surrounding scope, which can be useful in some situations:
315
+ llm.rb uses Ruby's `Monitor` class to ensure thread safety at the provider
316
+ level, allowing you to share a single provider instance across multiple threads
317
+ while maintaining state isolation through thread-local contexts. This design
318
+ enables efficient resource sharing while preventing race conditions in
319
+ concurrent applications:
486
320
 
487
321
  ```ruby
488
322
  #!/usr/bin/env ruby
489
323
  require "llm"
490
324
 
491
- llm = LLM.openai(key: ENV["KEY"])
492
- tool = LLM.function(:system) do |fn|
493
- fn.description "Run a shell command"
494
- fn.params do |schema|
495
- schema.object(command: schema.string.required)
496
- end
497
- fn.define do |command:|
498
- ro, wo = IO.pipe
499
- re, we = IO.pipe
500
- Process.wait Process.spawn(command, out: wo, err: we)
501
- [wo,we].each(&:close)
502
- {stderr: re.read, stdout: ro.read}
503
- end
504
- end
505
-
506
- ses = LLM::Session.new(llm, tools: [tool])
507
- ses.talk "Your task is to run shell commands via a tool.", role: :user
508
-
509
- ses.talk "What is the current date?", role: :user
510
- ses.talk ses.functions.map(&:call) # report return value to the LLM
325
+ # Thread-safe providers - create once, use everywhere
326
+ llm = LLM.openai(key: ENV["KEY"])
511
327
 
512
- ses.talk "What operating system am I running?", role: :user
513
- ses.talk ses.functions.map(&:call) # report return value to the LLM
328
+ # Each thread should have its own context for state isolation
329
+ Thread.new do
330
+ ctx = LLM::Context.new(llm) # Thread-local context
331
+ ctx.talk("Hello from thread 1")
332
+ end
514
333
 
515
- ##
516
- # {stderr: "", stdout: "Thu May 1 10:01:02 UTC 2025"}
517
- # {stderr: "", stdout: "FreeBSD"}
334
+ Thread.new do
335
+ ctx = LLM::Context.new(llm) # Thread-local context
336
+ ctx.talk("Hello from thread 2")
337
+ end
518
338
  ```
519
339
 
520
- #### LLM::Tool
340
+ #### Performance Tuning
521
341
 
522
- The [LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) class can be used
523
- to implement a [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
524
- as a class. Under the hood, a subclass of [LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html)
525
- wraps an instance of [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
526
- and delegates to it.
527
-
528
- The choice between [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
529
- and [LLM::Tool](https://0x1eef.github.io/x/llm.rb/LLM/Tool.html) is often a matter of
530
- preference but each carry their own benefits. For example, [LLM::Function](https://0x1eef.github.io/x/llm.rb/LLM/Function.html)
531
- has the benefit of being a closure that has access to its surrounding context and
532
- sometimes that is useful:
342
+ llm.rb's JSON adapter system lets you swap JSON libraries for better
343
+ performance in high-throughput applications. The library supports stdlib JSON,
344
+ Oj, and Yajl, with Oj typically offering the best performance. Additionally,
345
+ you can enable HTTP connection pooling using the optional `net-http-persistent`
346
+ gem to reduce connection overhead in production environments:
533
347
 
534
348
  ```ruby
535
349
  #!/usr/bin/env ruby
536
350
  require "llm"
537
351
 
538
- class System < LLM::Tool
539
- name "system"
540
- description "Run a shell command"
541
- param :command, String, "The command to execute", required: true
542
-
543
- def call(command:)
544
- ro, wo = IO.pipe
545
- re, we = IO.pipe
546
- Process.wait Process.spawn(command, out: wo, err: we)
547
- [wo,we].each(&:close)
548
- {stderr: re.read, stdout: ro.read}
549
- end
550
- end
551
-
552
- llm = LLM.openai(key: ENV["KEY"])
553
- ses = LLM::Session.new(llm, tools: [System])
554
- ses.talk "Your task is to run shell commands via a tool.", role: :user
352
+ # Swap JSON libraries for better performance
353
+ LLM.json = :oj # Use Oj for faster JSON parsing
555
354
 
556
- ses.talk "What is the current date?", role: :user
557
- ses.talk ses.functions.map(&:call) # report return value to the LLM
558
-
559
- ses.talk "What operating system am I running?", role: :user
560
- ses.talk ses.functions.map(&:call) # report return value to the LLM
561
-
562
- ##
563
- # {stderr: "", stdout: "Thu May 1 10:01:02 UTC 2025"}
564
- # {stderr: "", stdout: "FreeBSD"}
355
+ # Enable HTTP connection pooling for high-throughput applications
356
+ llm = LLM.openai(key: ENV["KEY"]).persist! # Uses net-http-persistent when available
565
357
  ```
566
358
 
567
- ### Files
568
-
569
- #### Create
359
+ #### Model Registry
570
360
 
571
- The OpenAI and Gemini providers provide a Files API where a client can upload files
572
- that can be referenced from a prompt, and with other APIs as well. The following
573
- example uses the OpenAI provider to describe the contents of a PDF file after
574
- it has been uploaded. The file (a specialized instance of
575
- [LLM::Response](https://0x1eef.github.io/x/llm.rb/LLM/Response.html)
576
- ) is given as part of a prompt that is understood by llm.rb:
361
+ llm.rb includes a local model registry that provides metadata about model
362
+ capabilities, pricing, and limits without requiring API calls. The registry is
363
+ shipped with the gem and sourced from https://models.dev, giving you access to
364
+ up-to-date information about context windows, token costs, and supported
365
+ modalities for each provider:
577
366
 
578
367
  ```ruby
579
368
  #!/usr/bin/env ruby
580
369
  require "llm"
581
- require "pp"
582
370
 
583
- llm = LLM.openai(key: ENV["KEY"])
584
- ses = LLM::Session.new(llm)
585
- file = llm.files.create(file: "/tmp/llm-book.pdf")
586
- res = ses.talk ["Tell me about this file", file]
587
- pp res.content
371
+ # Access model metadata, capabilities, and pricing
372
+ registry = LLM.registry_for(:openai)
373
+ model_info = registry.limit(model: "gpt-4.1")
374
+ puts "Context window: #{model_info.context} tokens"
375
+ puts "Cost: $#{model_info.cost.input}/1M input tokens"
588
376
  ```
589
377
 
590
- ### Prompts
591
-
592
- #### Multimodal
378
+ ## More Examples
593
379
 
594
- LLMs are great with text, but many can also handle images, audio, video,
595
- and URLs. With llm.rb you pass those inputs by tagging them with one of
596
- the following methods. And for multipart prompts, we can pass an array
597
- where each element is a part of the input. See the example below for
598
- details, in the meantime here are the methods to know for multimodal
599
- inputs:
380
+ #### Responses API
600
381
 
601
- * `ses.image_url` for an image URL
602
- * `ses.local_file` for a local file
603
- * `ses.remote_file` for a file already uploaded via the provider's Files API
382
+ llm.rb also supports OpenAI's Responses API through `llm.responses` and
383
+ `ctx.respond`. This API can maintain response state server-side and can reduce
384
+ how much conversation state needs to be sent on each turn:
604
385
 
605
386
  ```ruby
606
387
  #!/usr/bin/env ruby
607
388
  require "llm"
608
389
 
609
390
  llm = LLM.openai(key: ENV["KEY"])
610
- ses = LLM::Session.new(llm)
611
- res = ses.talk ["Tell me about this image URL", ses.image_url(url)]
612
- res = ses.talk ["Tell me about this PDF", ses.remote_file(file)]
613
- res = ses.talk ["Tell me about this image", ses.local_file(path)]
614
- ```
391
+ ctx = LLM::Context.new(llm)
615
392
 
616
- ### Audio
393
+ ctx.respond("Your task is to answer the user's questions", role: :developer)
394
+ res = ctx.respond("What is the capital of France?")
395
+ puts res.output_text
396
+ ```
617
397
 
618
- #### Speech
398
+ #### Context Persistence
619
399
 
620
- Some but not all providers implement audio generation capabilities that
621
- can create speech from text, transcribe audio to text, or translate
622
- audio to text (usually English). The following example uses the OpenAI provider
623
- to create an audio file from a text prompt. The audio is then moved to
624
- `${HOME}/hello.mp3` as the final step:
400
+ Contexts can be serialized and restored across process boundaries. This makes
401
+ it possible to persist conversation state in a file, database, or queue and
402
+ resume work later:
625
403
 
626
404
  ```ruby
627
405
  #!/usr/bin/env ruby
628
406
  require "llm"
629
407
 
630
408
  llm = LLM.openai(key: ENV["KEY"])
631
- res = llm.audio.create_speech(input: "Hello world")
632
- IO.copy_stream res.audio, File.join(Dir.home, "hello.mp3")
409
+ ctx = LLM::Context.new(llm)
410
+ ctx.talk("Hello")
411
+ ctx.talk("Remember that my favorite language is Ruby")
412
+ ctx.save(path: "context.json")
413
+
414
+ restored = LLM::Context.new(llm)
415
+ restored.restore(path: "context.json")
416
+ res = restored.talk("What is my favorite language?")
417
+ puts res.content
633
418
  ```
634
419
 
635
- #### Transcribe
420
+ #### Agents
636
421
 
637
- The following example transcribes an audio file to text. The audio file
638
- (`${HOME}/hello.mp3`) was theoretically created in the previous example,
639
- and the result is printed to the console. The example uses the OpenAI
640
- provider to transcribe the audio file:
422
+ Agents in llm.rb are reusable, preconfigured assistants that automatically
423
+ execute tool calls and maintain conversation state. Unlike contexts which
424
+ require manual tool execution, agents automatically handle the tool call loop,
425
+ making them ideal for autonomous workflows where you want the LLM to
426
+ independently use available tools to accomplish tasks:
641
427
 
642
428
  ```ruby
643
429
  #!/usr/bin/env ruby
644
430
  require "llm"
645
431
 
432
+ class SystemAdmin < LLM::Agent
433
+ model "gpt-4.1"
434
+ instructions "You are a Linux system admin"
435
+ tools Shell
436
+ schema Result
437
+ end
438
+
646
439
  llm = LLM.openai(key: ENV["KEY"])
647
- res = llm.audio.create_transcription(
648
- file: File.join(Dir.home, "hello.mp3")
649
- )
650
- puts res.text # => "Hello world."
440
+ agent = SystemAdmin.new(llm)
441
+ res = agent.talk("Run 'date'")
651
442
  ```
652
443
 
653
- #### Translate
444
+ #### Cost Tracking
654
445
 
655
- The following example translates an audio file to text. In this example
656
- the audio file (`${HOME}/bomdia.mp3`) is theoretically in Portuguese,
657
- and it is translated to English. The example uses the OpenAI provider,
658
- and at the time of writing, it can only translate to English:
446
+ llm.rb provides built-in cost estimation that works without making additional
447
+ API calls. The cost tracking system uses the local model registry to calculate
448
+ estimated costs based on token usage, giving you visibility into spending
449
+ before bills arrive. This is particularly useful for monitoring usage in
450
+ production applications and setting budget alerts:
659
451
 
660
452
  ```ruby
661
453
  #!/usr/bin/env ruby
662
454
  require "llm"
663
455
 
664
456
  llm = LLM.openai(key: ENV["KEY"])
665
- res = llm.audio.create_translation(
666
- file: File.join(Dir.home, "bomdia.mp3")
667
- )
668
- puts res.text # => "Good morning."
457
+ ctx = LLM::Context.new(llm)
458
+ ctx.talk "Hello"
459
+ puts "Estimated cost so far: $#{ctx.cost}"
460
+ ctx.talk "Tell me a joke"
461
+ puts "Estimated cost so far: $#{ctx.cost}"
669
462
  ```
670
463
 
671
- ### Images
464
+ #### Multimodal Prompts
672
465
 
673
- #### Create
674
-
675
- Some but not all LLM providers implement image generation capabilities that
676
- can create new images from a prompt, or edit an existing image with a
677
- prompt. The following example uses the OpenAI provider to create an
678
- image of a dog on a rocket to the moon. The image is then moved to
679
- `${HOME}/dogonrocket.png` as the final step:
466
+ Contexts provide helpers for composing multimodal prompts from URLs, local
467
+ files, and provider-managed remote files. These tagged objects let providers
468
+ adapt the input into the format they expect:
680
469
 
681
470
  ```ruby
682
471
  #!/usr/bin/env ruby
683
472
  require "llm"
684
- require "open-uri"
685
- require "fileutils"
686
473
 
687
474
  llm = LLM.openai(key: ENV["KEY"])
688
- res = llm.images.create(prompt: "a dog on a rocket to the moon")
689
- res.urls.each do |url|
690
- FileUtils.mv OpenURI.open_uri(url).path,
691
- File.join(Dir.home, "dogonrocket.png")
692
- end
475
+ ctx = LLM::Context.new(llm)
476
+
477
+ res = ctx.talk ["Describe this image", ctx.image_url("https://example.com/cat.jpg")]
478
+ puts res.content
693
479
  ```
694
480
 
695
- #### Edit
481
+ #### Audio Generation
696
482
 
697
- The following example is focused on editing a local image with the aid
698
- of a prompt. The image (`/tmp/llm-logo.png`) is returned to us with a hat.
699
- The image is then moved to `${HOME}/logo-with-hat.png` as
700
- the final step:
483
+ llm.rb supports OpenAI's audio API for text-to-speech generation, allowing you
484
+ to create speech from text with configurable voices and output formats. The
485
+ audio API returns binary audio data that can be streamed directly to files or
486
+ other IO objects, enabling integration with multimedia applications:
701
487
 
702
488
  ```ruby
703
489
  #!/usr/bin/env ruby
704
490
  require "llm"
705
- require "open-uri"
706
- require "fileutils"
707
491
 
708
492
  llm = LLM.openai(key: ENV["KEY"])
709
- res = llm.images.edit(
710
- image: "/tmp/llm-logo.png",
711
- prompt: "add a hat to the logo",
712
- )
713
- res.urls.each do |url|
714
- FileUtils.mv OpenURI.open_uri(url).path,
715
- File.join(Dir.home, "logo-with-hat.png")
716
- end
493
+ res = llm.audio.create_speech(input: "Hello world")
494
+ IO.copy_stream res.audio, File.join(Dir.home, "hello.mp3")
717
495
  ```
718
496
 
719
- #### Variations
497
+ #### Image Generation
720
498
 
721
- The following example is focused on creating variations of a local image.
722
- The image (`/tmp/llm-logo.png`) is returned to us with five different variations.
723
- The images are then moved to `${HOME}/logo-variation0.png`, `${HOME}/logo-variation1.png`
724
- and so on as the final step:
499
+ llm.rb provides access to OpenAI's DALL-E image generation API through a
500
+ unified interface. The API supports multiple response formats including
501
+ base64-encoded images and temporary URLs, with automatic handling of binary
502
+ data streaming for efficient file operations:
725
503
 
726
504
  ```ruby
727
505
  #!/usr/bin/env ruby
728
506
  require "llm"
729
- require "open-uri"
730
- require "fileutils"
731
507
 
732
508
  llm = LLM.openai(key: ENV["KEY"])
733
- res = llm.images.create_variation(
734
- image: "/tmp/llm-logo.png",
735
- n: 5
736
- )
737
- res.urls.each.with_index do |url, index|
738
- FileUtils.mv OpenURI.open_uri(url).path,
739
- File.join(Dir.home, "logo-variation#{index}.png")
740
- end
509
+ res = llm.images.create(prompt: "a dog on a rocket to the moon")
510
+ IO.copy_stream res.images[0], File.join(Dir.home, "dogonrocket.png")
741
511
  ```
742
512
 
743
- ### Embeddings
744
-
745
- #### Text
513
+ #### Embeddings
746
514
 
747
- The
748
- [`LLM::Provider#embed`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html#embed-instance_method)
749
- method returns vector embeddings for one or more text inputs. A common
750
- use is semantic search (store vectors, then query for similar text):
515
+ llm.rb's embedding API generates vector representations of text for semantic
516
+ search and retrieval-augmented generation (RAG) workflows. The API supports
517
+ batch processing of multiple inputs and returns normalized vectors suitable for
518
+ vector similarity operations, with consistent dimensionality across providers:
751
519
 
752
520
  ```ruby
753
521
  #!/usr/bin/env ruby
@@ -759,52 +527,32 @@ puts res.class
759
527
  puts res.embeddings.size
760
528
  puts res.embeddings[0].size
761
529
 
762
- ##
763
530
  # LLM::Response
764
531
  # 3
765
532
  # 1536
766
533
  ```
767
534
 
768
- ### Models
535
+ ## Real-World Example: Relay
769
536
 
770
- #### List
537
+ See how these pieces come together in a complete application architecture with
538
+ [Relay](https://github.com/llmrb/relay), a production-ready LLM application
539
+ built on llm.rb that demonstrates:
771
540
 
772
- Almost all LLM providers provide a models endpoint that allows a client to
773
- query the list of models that are available to use. The list is dynamic,
774
- maintained by LLM providers, and it is independent of a specific llm.rb
775
- release:
541
+ - Context management across requests
542
+ - Tool composition and execution
543
+ - Concurrent workflows
544
+ - Cost tracking and observability
545
+ - Production deployment patterns
776
546
 
777
- ```ruby
778
- #!/usr/bin/env ruby
779
- require "llm"
780
- require "pp"
781
-
782
- ##
783
- # List all models
784
- llm = LLM.openai(key: ENV["KEY"])
785
- llm.models.all.each do |model|
786
- puts "model: #{model.id}"
787
- end
788
-
789
- ##
790
- # Select a model
791
- model = llm.models.all.find { |m| m.id == "gpt-3.5-turbo" }
792
- ses = LLM::Session.new(llm, model: model.id)
793
- res = ses.talk "Hello #{model.id} :)"
794
- pp res.content
795
- ```
547
+ Watch the screencast:
796
548
 
797
- ## Install
549
+ [![Watch the llm.rb screencast](https://img.youtube.com/vi/Jb7LNUYlCf4/maxresdefault.jpg)](https://www.youtube.com/watch?v=x1K4wMeO_QA)
798
550
 
799
- llm.rb can be installed via rubygems.org:
551
+ ## Installation
800
552
 
801
- gem install llm.rb
802
-
803
- ## Sources
804
-
805
- * [GitHub.com](https://github.com/llmrb/llm.rb)
806
- * [GitLab.com](https://gitlab.com/llmrb/llm.rb)
807
- * [Codeberg.org](https://codeberg.org/llmrb/llm.rb)
553
+ ```bash
554
+ gem install llm.rb
555
+ ```
808
556
 
809
557
  ## License
810
558