ruby_llm 0.1.0.pre36 → 0.1.0.pre37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/docs.yml +53 -0
- data/.rspec_status +7 -35
- data/.rubocop.yml +7 -2
- data/.yardopts +12 -0
- data/Gemfile +27 -0
- data/bin/console +4 -4
- data/docs/.gitignore +7 -0
- data/docs/Gemfile +11 -0
- data/docs/_config.yml +43 -0
- data/docs/_data/navigation.yml +25 -0
- data/docs/guides/chat.md +206 -0
- data/docs/guides/embeddings.md +325 -0
- data/docs/guides/error-handling.md +301 -0
- data/docs/guides/getting-started.md +164 -0
- data/docs/guides/image-generation.md +274 -0
- data/docs/guides/index.md +45 -0
- data/docs/guides/rails.md +401 -0
- data/docs/guides/streaming.md +242 -0
- data/docs/guides/tools.md +247 -0
- data/docs/index.md +53 -0
- data/docs/installation.md +98 -0
- data/lib/ruby_llm/active_record/acts_as.rb +2 -2
- data/lib/ruby_llm/chat.rb +7 -7
- data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -2
- data/lib/ruby_llm/providers/anthropic/chat.rb +2 -3
- data/lib/ruby_llm/providers/deepseek/capabilities.rb +0 -1
- data/lib/ruby_llm/providers/gemini/capabilities.rb +1 -1
- data/lib/ruby_llm/providers/openai/capabilities.rb +7 -12
- data/lib/ruby_llm/providers/openai/embeddings.rb +1 -1
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/tasks/models.rake +2 -2
- data/ruby_llm.gemspec +10 -32
- metadata +22 -296
@@ -0,0 +1,401 @@
|
|
1
|
+
---
|
2
|
+
layout: default
|
3
|
+
title: Rails Integration
|
4
|
+
parent: Guides
|
5
|
+
nav_order: 5
|
6
|
+
permalink: /guides/rails
|
7
|
+
---
|
8
|
+
|
9
|
+
# Rails Integration
|
10
|
+
|
11
|
+
RubyLLM provides seamless integration with Rails through ActiveRecord models. This allows you to easily persist chats, messages, and tool calls in your database.
|
12
|
+
|
13
|
+
## Setup
|
14
|
+
|
15
|
+
### 1. Create Migrations
|
16
|
+
|
17
|
+
First, create the necessary tables in your database:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
# db/migrate/YYYYMMDDHHMMSS_create_chats.rb
|
21
|
+
class CreateChats < ActiveRecord::Migration[8.0]
|
22
|
+
def change
|
23
|
+
create_table :chats do |t|
|
24
|
+
t.string :model_id
|
25
|
+
t.timestamps
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# db/migrate/YYYYMMDDHHMMSS_create_messages.rb
|
31
|
+
class CreateMessages < ActiveRecord::Migration[8.0]
|
32
|
+
def change
|
33
|
+
create_table :messages do |t|
|
34
|
+
t.references :chat, null: false, foreign_key: true
|
35
|
+
t.string :role
|
36
|
+
t.text :content
|
37
|
+
t.string :model_id
|
38
|
+
t.integer :input_tokens
|
39
|
+
t.integer :output_tokens
|
40
|
+
t.references :tool_call
|
41
|
+
t.timestamps
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# db/migrate/YYYYMMDDHHMMSS_create_tool_calls.rb
|
47
|
+
class CreateToolCalls < ActiveRecord::Migration[8.0]
|
48
|
+
def change
|
49
|
+
create_table :tool_calls do |t|
|
50
|
+
t.references :message, null: false, foreign_key: true
|
51
|
+
t.string :tool_call_id, null: false
|
52
|
+
t.string :name, null: false
|
53
|
+
t.jsonb :arguments, default: {}
|
54
|
+
t.timestamps
|
55
|
+
end
|
56
|
+
|
57
|
+
add_index :tool_calls, :tool_call_id
|
58
|
+
end
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
Run the migrations:
|
63
|
+
|
64
|
+
```bash
|
65
|
+
rails db:migrate
|
66
|
+
```
|
67
|
+
|
68
|
+
### 2. Set Up Models
|
69
|
+
|
70
|
+
Create the model classes:
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
# app/models/chat.rb
|
74
|
+
class Chat < ApplicationRecord
|
75
|
+
acts_as_chat
|
76
|
+
end
|
77
|
+
|
78
|
+
# app/models/message.rb
|
79
|
+
class Message < ApplicationRecord
|
80
|
+
acts_as_message
|
81
|
+
end
|
82
|
+
|
83
|
+
# app/models/tool_call.rb
|
84
|
+
class ToolCall < ApplicationRecord
|
85
|
+
acts_as_tool_call
|
86
|
+
end
|
87
|
+
```
|
88
|
+
|
89
|
+
### 3. Configure RubyLLM
|
90
|
+
|
91
|
+
In an initializer (e.g., `config/initializers/ruby_llm.rb`):
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
RubyLLM.configure do |config|
|
95
|
+
config.openai_api_key = ENV['OPENAI_API_KEY']
|
96
|
+
config.anthropic_api_key = ENV['ANTHROPIC_API_KEY']
|
97
|
+
config.gemini_api_key = ENV['GEMINI_API_KEY']
|
98
|
+
config.deepseek_api_key = ENV['DEEPSEEK_API_KEY']
|
99
|
+
end
|
100
|
+
```
|
101
|
+
|
102
|
+
## Basic Usage
|
103
|
+
|
104
|
+
Once your models are set up, you can use them like any other Rails model:
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
# Create a new chat
|
108
|
+
chat = Chat.create!(model_id: 'gpt-4o-mini')
|
109
|
+
|
110
|
+
# Ask a question
|
111
|
+
chat.ask "What's the capital of France?"
|
112
|
+
|
113
|
+
# The response is automatically persisted
|
114
|
+
puts chat.messages.last.content
|
115
|
+
|
116
|
+
# Continue the conversation
|
117
|
+
chat.ask "Tell me more about that city"
|
118
|
+
|
119
|
+
# All messages are stored in the database
|
120
|
+
chat.messages.order(:created_at).each do |message|
|
121
|
+
puts "#{message.role}: #{message.content}"
|
122
|
+
end
|
123
|
+
```
|
124
|
+
|
125
|
+
## Streaming Responses
|
126
|
+
|
127
|
+
You can stream responses while still persisting the final result:
|
128
|
+
|
129
|
+
```ruby
|
130
|
+
chat = Chat.create!(model_id: 'gpt-4o-mini')
|
131
|
+
|
132
|
+
chat.ask "Write a short poem about Ruby" do |chunk|
|
133
|
+
# Stream content to the user
|
134
|
+
ActionCable.server.broadcast "chat_#{chat.id}", { content: chunk.content }
|
135
|
+
end
|
136
|
+
|
137
|
+
# The complete message is saved in the database
|
138
|
+
puts chat.messages.last.content
|
139
|
+
```
|
140
|
+
|
141
|
+
## Using with Hotwire
|
142
|
+
|
143
|
+
RubyLLM's Rails integration works seamlessly with Hotwire for real-time updates:
|
144
|
+
|
145
|
+
```ruby
|
146
|
+
# app/models/chat.rb
|
147
|
+
class Chat < ApplicationRecord
|
148
|
+
acts_as_chat
|
149
|
+
|
150
|
+
# Add broadcast capabilities
|
151
|
+
broadcasts_to ->(chat) { "chat_#{chat.id}" }
|
152
|
+
end
|
153
|
+
```
|
154
|
+
|
155
|
+
In your controller:
|
156
|
+
|
157
|
+
```ruby
|
158
|
+
# app/controllers/chats_controller.rb
|
159
|
+
class ChatsController < ApplicationController
|
160
|
+
def show
|
161
|
+
@chat = Chat.find(params[:id])
|
162
|
+
end
|
163
|
+
|
164
|
+
def ask
|
165
|
+
@chat = Chat.find(params[:id])
|
166
|
+
|
167
|
+
# Use a background job to avoid blocking
|
168
|
+
ChatJob.perform_later(@chat.id, params[:message])
|
169
|
+
|
170
|
+
# Let the user know we're working on it
|
171
|
+
respond_to do |format|
|
172
|
+
format.turbo_stream
|
173
|
+
format.html { redirect_to @chat }
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
```
|
178
|
+
|
179
|
+
Create a background job:
|
180
|
+
|
181
|
+
```ruby
|
182
|
+
# app/jobs/chat_job.rb
|
183
|
+
class ChatJob < ApplicationJob
|
184
|
+
queue_as :default
|
185
|
+
|
186
|
+
def perform(chat_id, message)
|
187
|
+
chat = Chat.find(chat_id)
|
188
|
+
|
189
|
+
# Start with a "typing" indicator
|
190
|
+
Turbo::StreamsChannel.broadcast_append_to(
|
191
|
+
chat,
|
192
|
+
target: "messages",
|
193
|
+
partial: "messages/typing"
|
194
|
+
)
|
195
|
+
|
196
|
+
chat.ask(message) do |chunk|
|
197
|
+
# Remove typing indicator after first chunk
|
198
|
+
if chunk == chat.messages.last.to_llm.content[0...chunk.content.length]
|
199
|
+
Turbo::StreamsChannel.broadcast_remove_to(
|
200
|
+
chat,
|
201
|
+
target: "typing"
|
202
|
+
)
|
203
|
+
end
|
204
|
+
|
205
|
+
# Update the streaming message
|
206
|
+
Turbo::StreamsChannel.broadcast_replace_to(
|
207
|
+
chat,
|
208
|
+
target: "assistant_message_#{chat.messages.last.id}",
|
209
|
+
partial: "messages/message",
|
210
|
+
locals: { message: chat.messages.last, content: chunk.content }
|
211
|
+
)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
```
|
216
|
+
|
217
|
+
In your views:
|
218
|
+
|
219
|
+
```erb
|
220
|
+
<!-- app/views/chats/show.html.erb -->
|
221
|
+
<%= turbo_stream_from @chat %>
|
222
|
+
|
223
|
+
<div id="messages">
|
224
|
+
<%= render @chat.messages %>
|
225
|
+
</div>
|
226
|
+
|
227
|
+
<%= form_with(url: ask_chat_path(@chat), method: :post) do |f| %>
|
228
|
+
<%= f.text_area :message %>
|
229
|
+
<%= f.submit "Send" %>
|
230
|
+
<% end %>
|
231
|
+
```
|
232
|
+
|
233
|
+
## Using Tools
|
234
|
+
|
235
|
+
Tools work seamlessly with Rails integration:
|
236
|
+
|
237
|
+
```ruby
|
238
|
+
class Calculator < RubyLLM::Tool
|
239
|
+
description "Performs arithmetic calculations"
|
240
|
+
|
241
|
+
param :expression,
|
242
|
+
type: :string,
|
243
|
+
desc: "Math expression to evaluate"
|
244
|
+
|
245
|
+
def execute(expression:)
|
246
|
+
eval(expression).to_s
|
247
|
+
rescue StandardError => e
|
248
|
+
"Error: #{e.message}"
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# Add the tool to your chat
|
253
|
+
chat = Chat.create!(model_id: 'gpt-4o-mini')
|
254
|
+
chat.with_tool(Calculator)
|
255
|
+
|
256
|
+
# Ask a question that requires calculation
|
257
|
+
chat.ask "What's 123 * 456?"
|
258
|
+
|
259
|
+
# Tool calls are persisted
|
260
|
+
tool_call = chat.messages.find_by(role: 'assistant').tool_calls.first
|
261
|
+
puts "Tool: #{tool_call.name}"
|
262
|
+
puts "Arguments: #{tool_call.arguments}"
|
263
|
+
```
|
264
|
+
|
265
|
+
## Customizing Models
|
266
|
+
|
267
|
+
You can customize the behavior of your models:
|
268
|
+
|
269
|
+
```ruby
|
270
|
+
class Chat < ApplicationRecord
|
271
|
+
acts_as_chat
|
272
|
+
|
273
|
+
# Add custom behavior
|
274
|
+
belongs_to :user
|
275
|
+
has_many :tags
|
276
|
+
|
277
|
+
# Add custom scopes
|
278
|
+
scope :recent, -> { order(created_at: :desc).limit(10) }
|
279
|
+
scope :by_model, ->(model_id) { where(model_id: model_id) }
|
280
|
+
|
281
|
+
# Add custom methods
|
282
|
+
def summarize
|
283
|
+
self.ask "Please summarize our conversation so far."
|
284
|
+
end
|
285
|
+
|
286
|
+
def token_count
|
287
|
+
messages.sum { |m| (m.input_tokens || 0) + (m.output_tokens || 0) }
|
288
|
+
end
|
289
|
+
end
|
290
|
+
```
|
291
|
+
|
292
|
+
## Message Content Customization
|
293
|
+
|
294
|
+
You can customize how message content is stored or extracted:
|
295
|
+
|
296
|
+
```ruby
|
297
|
+
class Message < ApplicationRecord
|
298
|
+
acts_as_message
|
299
|
+
|
300
|
+
# Override content handling
|
301
|
+
def extract_content
|
302
|
+
# For example, compress or expand content
|
303
|
+
JSON.parse(content) rescue content
|
304
|
+
end
|
305
|
+
end
|
306
|
+
```
|
307
|
+
|
308
|
+
## Advanced Integration
|
309
|
+
|
310
|
+
### User Association
|
311
|
+
|
312
|
+
Associate chats with users:
|
313
|
+
|
314
|
+
```ruby
|
315
|
+
# Migration
|
316
|
+
add_reference :chats, :user, foreign_key: true
|
317
|
+
|
318
|
+
# Model
|
319
|
+
class Chat < ApplicationRecord
|
320
|
+
acts_as_chat
|
321
|
+
belongs_to :user
|
322
|
+
end
|
323
|
+
|
324
|
+
# Usage
|
325
|
+
user.chats.create!(model_id: 'gpt-4o-mini').ask("Hello!")
|
326
|
+
```
|
327
|
+
|
328
|
+
### Metadata and Tagging
|
329
|
+
|
330
|
+
Add metadata to chats:
|
331
|
+
|
332
|
+
```ruby
|
333
|
+
# Migration
|
334
|
+
add_column :chats, :metadata, :jsonb, default: {}
|
335
|
+
|
336
|
+
# Model
|
337
|
+
class Chat < ApplicationRecord
|
338
|
+
acts_as_chat
|
339
|
+
end
|
340
|
+
|
341
|
+
# Usage
|
342
|
+
chat = Chat.create!(
|
343
|
+
model_id: 'gpt-4o-mini',
|
344
|
+
metadata: {
|
345
|
+
purpose: 'customer_support',
|
346
|
+
category: 'billing',
|
347
|
+
priority: 'high'
|
348
|
+
}
|
349
|
+
)
|
350
|
+
```
|
351
|
+
|
352
|
+
### Scoping and Filtering
|
353
|
+
|
354
|
+
Create scopes for easier querying:
|
355
|
+
|
356
|
+
```ruby
|
357
|
+
class Chat < ApplicationRecord
|
358
|
+
acts_as_chat
|
359
|
+
|
360
|
+
scope :using_gpt, -> { where("model_id LIKE ?", "gpt-%") }
|
361
|
+
scope :using_claude, -> { where("model_id LIKE ?", "claude-%") }
|
362
|
+
scope :recent, -> { order(created_at: :desc).limit(10) }
|
363
|
+
scope :with_high_token_count, -> {
|
364
|
+
joins(:messages)
|
365
|
+
.group(:id)
|
366
|
+
.having("SUM(messages.input_tokens + messages.output_tokens) > ?", 10000)
|
367
|
+
}
|
368
|
+
end
|
369
|
+
```
|
370
|
+
|
371
|
+
## Performance Considerations
|
372
|
+
|
373
|
+
For high-volume applications:
|
374
|
+
|
375
|
+
1. **Background Processing**: Use background jobs for AI requests
|
376
|
+
2. **Connection Pooling**: Ensure your database connection pool is sized appropriately
|
377
|
+
3. **Pagination**: Use pagination when showing chat histories
|
378
|
+
4. **Archiving**: Consider archiving old chats to maintain performance
|
379
|
+
|
380
|
+
```ruby
|
381
|
+
# Example background job
|
382
|
+
class AskAiJob < ApplicationJob
|
383
|
+
queue_as :ai_requests
|
384
|
+
|
385
|
+
def perform(chat_id, message)
|
386
|
+
chat = Chat.find(chat_id)
|
387
|
+
chat.ask(message)
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
# Usage
|
392
|
+
AskAiJob.perform_later(chat.id, "Tell me about Ruby")
|
393
|
+
```
|
394
|
+
|
395
|
+
## Next Steps
|
396
|
+
|
397
|
+
Now that you've integrated RubyLLM with Rails, you might want to explore:
|
398
|
+
|
399
|
+
- [Using Tools]({% link guides/tools.md %}) to add capabilities to your chats
|
400
|
+
- [Streaming Responses]({% link guides/streaming.md %}) for a better user experience
|
401
|
+
- [Error Handling]({% link guides/error-handling.md %}) to handle AI service issues gracefully
|
@@ -0,0 +1,242 @@
|
|
1
|
+
---
|
2
|
+
layout: default
|
3
|
+
title: Streaming
|
4
|
+
parent: Guides
|
5
|
+
nav_order: 4
|
6
|
+
permalink: /guides/streaming
|
7
|
+
---
|
8
|
+
|
9
|
+
# Streaming Responses
|
10
|
+
|
11
|
+
RubyLLM provides streaming capabilities that allow you to receive AI responses in real-time as they're being generated, rather than waiting for the complete response. This creates a more interactive experience and is especially useful for long responses or applications with real-time UI updates.
|
12
|
+
|
13
|
+
## Basic Streaming
|
14
|
+
|
15
|
+
To stream responses, simply provide a block to the `ask` method:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
chat = RubyLLM.chat
|
19
|
+
|
20
|
+
chat.ask "Write a short story about a programmer" do |chunk|
|
21
|
+
# Each chunk contains a portion of the response
|
22
|
+
print chunk.content
|
23
|
+
$stdout.flush # Ensure content is displayed immediately
|
24
|
+
end
|
25
|
+
```
|
26
|
+
|
27
|
+
## Understanding Chunks
|
28
|
+
|
29
|
+
Each streamed chunk is an instance of `RubyLLM::Chunk` (which inherits from `RubyLLM::Message`) and provides:
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
chunk.content # The text fragment in this chunk
|
33
|
+
chunk.role # Always :assistant for streamed chunks
|
34
|
+
chunk.model_id # The model generating the response
|
35
|
+
chunk.input_tokens # Input token count (usually only in the final chunk)
|
36
|
+
chunk.output_tokens # Output token count (usually only in the final chunk)
|
37
|
+
```
|
38
|
+
|
39
|
+
## Accumulated Response
|
40
|
+
|
41
|
+
Even when streaming, RubyLLM still returns the complete final message:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
final_message = chat.ask "Write a poem" do |chunk|
|
45
|
+
print chunk.content
|
46
|
+
end
|
47
|
+
|
48
|
+
# You can use the final message as normal
|
49
|
+
puts "\nFinal message length: #{final_message.content.length}"
|
50
|
+
puts "Token usage: #{final_message.output_tokens} tokens"
|
51
|
+
```
|
52
|
+
|
53
|
+
## Web Application Integration
|
54
|
+
|
55
|
+
### Rails with ActionCable
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
# In your controller
|
59
|
+
def ask
|
60
|
+
@chat = Chat.find(params[:id])
|
61
|
+
|
62
|
+
@chat.ask(params[:message]) do |chunk|
|
63
|
+
ActionCable.server.broadcast(
|
64
|
+
"chat_#{@chat.id}",
|
65
|
+
{ content: chunk.content }
|
66
|
+
)
|
67
|
+
end
|
68
|
+
|
69
|
+
head :ok
|
70
|
+
end
|
71
|
+
|
72
|
+
# In your JavaScript
|
73
|
+
const channel = consumer.subscriptions.create({ channel: "ChatChannel", id: chatId }, {
|
74
|
+
received(data) {
|
75
|
+
// Append incoming chunk to the display
|
76
|
+
document.getElementById('response').innerHTML += data.content;
|
77
|
+
}
|
78
|
+
});
|
79
|
+
```
|
80
|
+
|
81
|
+
### Rails with Turbo Streams
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
class ChatJob < ApplicationJob
|
85
|
+
queue_as :default
|
86
|
+
|
87
|
+
def perform(chat_id, message)
|
88
|
+
chat = Chat.find(chat_id)
|
89
|
+
|
90
|
+
chat.ask(message) do |chunk|
|
91
|
+
Turbo::StreamsChannel.broadcast_update_to(
|
92
|
+
"chat_#{chat.id}",
|
93
|
+
target: "response",
|
94
|
+
html: chunk.content,
|
95
|
+
append: true
|
96
|
+
)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
```
|
101
|
+
|
102
|
+
### Sinatra with Server-Sent Events (SSE)
|
103
|
+
|
104
|
+
```ruby
|
105
|
+
get '/chat/:id/ask' do
|
106
|
+
content_type 'text/event-stream'
|
107
|
+
|
108
|
+
chat = Chat.find(params[:id])
|
109
|
+
|
110
|
+
chat.ask(params[:message]) do |chunk|
|
111
|
+
# Send chunk as SSE event
|
112
|
+
out << "data: #{chunk.content}\n\n"
|
113
|
+
end
|
114
|
+
|
115
|
+
# Send completion signal
|
116
|
+
out << "event: complete\ndata: {}\n\n"
|
117
|
+
end
|
118
|
+
```
|
119
|
+
|
120
|
+
## Error Handling
|
121
|
+
|
122
|
+
Errors that occur during streaming need special handling:
|
123
|
+
|
124
|
+
```ruby
|
125
|
+
begin
|
126
|
+
chat.ask("Tell me a story") do |chunk|
|
127
|
+
print chunk.content
|
128
|
+
end
|
129
|
+
rescue RubyLLM::Error => e
|
130
|
+
puts "\nError during streaming: #{e.message}"
|
131
|
+
end
|
132
|
+
```
|
133
|
+
|
134
|
+
Common errors during streaming:
|
135
|
+
|
136
|
+
- `ServiceUnavailableError` - The AI service is temporarily unavailable
|
137
|
+
- `RateLimitError` - You've exceeded your API rate limit
|
138
|
+
- `BadRequestError` - There was a problem with your request parameters
|
139
|
+
|
140
|
+
## Provider-Specific Considerations
|
141
|
+
|
142
|
+
### OpenAI
|
143
|
+
|
144
|
+
OpenAI's streaming implementation provides small, frequent chunks for a smooth experience.
|
145
|
+
|
146
|
+
### Anthropic
|
147
|
+
|
148
|
+
Claude models may return slightly larger chunks with potentially longer pauses between them.
|
149
|
+
|
150
|
+
### Google Gemini
|
151
|
+
|
152
|
+
Gemini streaming is highly responsive but may show slightly different chunking behavior.
|
153
|
+
|
154
|
+
## Streaming with Tools
|
155
|
+
|
156
|
+
When using tools, streaming works a bit differently:
|
157
|
+
|
158
|
+
```ruby
|
159
|
+
chat.with_tool(Calculator)
|
160
|
+
.ask("What's 123 * 456?") do |chunk|
|
161
|
+
# Tool call execution isn't streamed
|
162
|
+
# You'll receive chunks after tool execution completes
|
163
|
+
print chunk.content
|
164
|
+
end
|
165
|
+
```
|
166
|
+
|
167
|
+
The tool call execution introduces a pause in the streaming, as the model waits for the tool response before continuing.
|
168
|
+
|
169
|
+
## Performance Considerations
|
170
|
+
|
171
|
+
Streaming typically uses the same number of tokens as non-streaming responses but establishes longer-lived connections to the AI provider. Consider these best practices:
|
172
|
+
|
173
|
+
1. Set appropriate timeouts for streaming connections
|
174
|
+
2. Handle network interruptions gracefully
|
175
|
+
3. Consider background processing for long-running streams
|
176
|
+
4. Implement rate limiting to avoid overwhelming your servers
|
177
|
+
|
178
|
+
## Tracking Token Usage
|
179
|
+
|
180
|
+
Token usage information is typically only available in the final chunk or completed message:
|
181
|
+
|
182
|
+
```ruby
|
183
|
+
total_tokens = 0
|
184
|
+
|
185
|
+
chat.ask("Write a detailed explanation of quantum computing") do |chunk|
|
186
|
+
print chunk.content
|
187
|
+
|
188
|
+
# Only count tokens in the final chunk
|
189
|
+
if chunk.output_tokens
|
190
|
+
total_tokens = chunk.input_tokens + chunk.output_tokens
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
puts "\nTotal tokens: #{total_tokens}"
|
195
|
+
```
|
196
|
+
|
197
|
+
## Custom Processing of Streamed Content
|
198
|
+
|
199
|
+
You can process streamed content in real-time:
|
200
|
+
|
201
|
+
```ruby
|
202
|
+
accumulated_text = ""
|
203
|
+
|
204
|
+
chat.ask("Write a list of 10 fruits") do |chunk|
|
205
|
+
new_content = chunk.content
|
206
|
+
accumulated_text += new_content
|
207
|
+
|
208
|
+
# Count fruits as they come in
|
209
|
+
if new_content.include?("\n")
|
210
|
+
fruit_count = accumulated_text.scan(/\d+\./).count
|
211
|
+
print "\rFruits listed: #{fruit_count}/10"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
```
|
215
|
+
|
216
|
+
## Rails Integration
|
217
|
+
|
218
|
+
When using RubyLLM's Rails integration with `acts_as_chat`, streaming still works seamlessly:
|
219
|
+
|
220
|
+
```ruby
|
221
|
+
class Chat < ApplicationRecord
|
222
|
+
acts_as_chat
|
223
|
+
end
|
224
|
+
|
225
|
+
chat = Chat.create!(model_id: 'gpt-4o-mini')
|
226
|
+
|
227
|
+
# Stream responses while persisting the final result
|
228
|
+
chat.ask("Tell me about Ruby") do |chunk|
|
229
|
+
ActionCable.server.broadcast("chat_#{chat.id}", { content: chunk.content })
|
230
|
+
end
|
231
|
+
|
232
|
+
# The complete message is saved in the database
|
233
|
+
puts chat.messages.last.content
|
234
|
+
```
|
235
|
+
|
236
|
+
## Next Steps
|
237
|
+
|
238
|
+
Now that you understand streaming, you might want to explore:
|
239
|
+
|
240
|
+
- [Using Tools]({% link guides/tools.md %}) to add capabilities to your AI interactions
|
241
|
+
- [Rails Integration]({% link guides/rails.md %}) to persist conversations
|
242
|
+
- [Error Handling]({% link guides/error-handling.md %}) for reliable applications
|