ruby_llm 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +13 -14
  3. data/lib/generators/ruby_llm/install/templates/INSTALL_INFO.md.tt +108 -0
  4. data/lib/generators/ruby_llm/install/templates/chat_model.rb.tt +3 -0
  5. data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +8 -0
  6. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +15 -0
  7. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +14 -0
  8. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +6 -0
  9. data/lib/generators/ruby_llm/install/templates/message_model.rb.tt +3 -0
  10. data/lib/generators/ruby_llm/install/templates/tool_call_model.rb.tt +3 -0
  11. data/lib/generators/ruby_llm/install_generator.rb +121 -0
  12. data/lib/ruby_llm/active_record/acts_as.rb +23 -5
  13. data/lib/ruby_llm/aliases.json +20 -39
  14. data/lib/ruby_llm/attachment.rb +1 -1
  15. data/lib/ruby_llm/chat.rb +68 -15
  16. data/lib/ruby_llm/configuration.rb +2 -0
  17. data/lib/ruby_llm/error.rb +1 -0
  18. data/lib/ruby_llm/message.rb +3 -1
  19. data/lib/ruby_llm/models.json +7117 -7084
  20. data/lib/ruby_llm/models.rb +2 -1
  21. data/lib/ruby_llm/provider.rb +13 -7
  22. data/lib/ruby_llm/providers/anthropic/chat.rb +13 -12
  23. data/lib/ruby_llm/providers/anthropic/media.rb +2 -0
  24. data/lib/ruby_llm/providers/anthropic/tools.rb +23 -13
  25. data/lib/ruby_llm/providers/bedrock/chat.rb +4 -5
  26. data/lib/ruby_llm/providers/bedrock/media.rb +2 -0
  27. data/lib/ruby_llm/providers/bedrock/streaming/base.rb +2 -2
  28. data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +4 -4
  29. data/lib/ruby_llm/providers/gemini/chat.rb +37 -2
  30. data/lib/ruby_llm/providers/gemini/embeddings.rb +4 -2
  31. data/lib/ruby_llm/providers/gemini/media.rb +2 -0
  32. data/lib/ruby_llm/providers/gpustack/chat.rb +17 -0
  33. data/lib/ruby_llm/providers/gpustack/models.rb +55 -0
  34. data/lib/ruby_llm/providers/gpustack.rb +36 -0
  35. data/lib/ruby_llm/providers/ollama/media.rb +2 -0
  36. data/lib/ruby_llm/providers/openai/chat.rb +17 -2
  37. data/lib/ruby_llm/providers/openai/embeddings.rb +4 -3
  38. data/lib/ruby_llm/providers/openai/media.rb +2 -0
  39. data/lib/ruby_llm/providers/openai/streaming.rb +14 -0
  40. data/lib/ruby_llm/railtie.rb +5 -0
  41. data/lib/ruby_llm/stream_accumulator.rb +3 -2
  42. data/lib/ruby_llm/streaming.rb +25 -7
  43. data/lib/ruby_llm/utils.rb +10 -0
  44. data/lib/ruby_llm/version.rb +1 -1
  45. data/lib/ruby_llm.rb +3 -0
  46. data/lib/tasks/models_docs.rake +3 -2
  47. metadata +15 -3
@@ -92,7 +92,8 @@ module RubyLLM
92
92
  f.response :json, parser_options: { symbolize_names: true }
93
93
  end
94
94
  response = connection.get 'https://api.parsera.org/v1/llm-specs'
95
- response.body.map { |data| Model::Info.new(data) }
95
+ models = response.body.map { |data| Model::Info.new(data) }
96
+ models.reject { |model| model.provider.nil? || model.id.nil? }
96
97
  end
97
98
 
98
99
  def merge_models(provider_models, parsera_models)
@@ -10,14 +10,20 @@ module RubyLLM
10
10
  module Methods
11
11
  extend Streaming
12
12
 
13
- def complete(messages, tools:, temperature:, model:, connection:, &)
13
+ def complete(messages, tools:, temperature:, model:, connection:, params: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
14
14
  normalized_temperature = maybe_normalize_temperature(temperature, model)
15
15
 
16
- payload = render_payload(messages,
17
- tools: tools,
18
- temperature: normalized_temperature,
19
- model: model,
20
- stream: block_given?)
16
+ payload = Utils.deep_merge(
17
+ params,
18
+ render_payload(
19
+ messages,
20
+ tools: tools,
21
+ temperature: normalized_temperature,
22
+ model: model,
23
+ stream: block_given?,
24
+ schema: schema
25
+ )
26
+ )
21
27
 
22
28
  if block_given?
23
29
  stream_response connection, payload, &
@@ -34,7 +40,7 @@ module RubyLLM
34
40
  def embed(text, model:, connection:, dimensions:)
35
41
  payload = render_embedding_payload(text, model:, dimensions:)
36
42
  response = connection.post(embedding_url(model:), payload)
37
- parse_embedding_response(response, model:)
43
+ parse_embedding_response(response, model:, text:)
38
44
  end
39
45
 
40
46
  def paint(prompt, model:, size:, connection:)
@@ -11,12 +11,12 @@ module RubyLLM
11
11
  '/v1/messages'
12
12
  end
13
13
 
14
- def render_payload(messages, tools:, temperature:, model:, stream: false)
14
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
15
15
  system_messages, chat_messages = separate_messages(messages)
16
16
  system_content = build_system_content(system_messages)
17
17
 
18
- build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
19
- add_optional_fields(payload, system_content:, tools:)
18
+ build_base_payload(chat_messages, model, stream).tap do |payload|
19
+ add_optional_fields(payload, system_content:, tools:, temperature:)
20
20
  end
21
21
  end
22
22
 
@@ -32,22 +32,22 @@ module RubyLLM
32
32
  )
33
33
  end
34
34
 
35
- system_messages.map { |msg| format_message(msg)[:content] }.join("\n\n")
35
+ system_messages.map(&:content).join("\n\n")
36
36
  end
37
37
 
38
- def build_base_payload(chat_messages, temperature, model, stream)
38
+ def build_base_payload(chat_messages, model, stream)
39
39
  {
40
40
  model: model,
41
41
  messages: chat_messages.map { |msg| format_message(msg) },
42
- temperature: temperature,
43
42
  stream: stream,
44
43
  max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
45
44
  }
46
45
  end
47
46
 
48
- def add_optional_fields(payload, system_content:, tools:)
47
+ def add_optional_fields(payload, system_content:, tools:, temperature:)
49
48
  payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
50
49
  payload[:system] = system_content unless system_content.empty?
50
+ payload[:temperature] = temperature unless temperature.nil?
51
51
  end
52
52
 
53
53
  def parse_completion_response(response)
@@ -55,9 +55,9 @@ module RubyLLM
55
55
  content_blocks = data['content'] || []
56
56
 
57
57
  text_content = extract_text_content(content_blocks)
58
- tool_use = Tools.find_tool_use(content_blocks)
58
+ tool_use_blocks = Tools.find_tool_uses(content_blocks)
59
59
 
60
- build_message(data, text_content, tool_use)
60
+ build_message(data, text_content, tool_use_blocks, response)
61
61
  end
62
62
 
63
63
  def extract_text_content(blocks)
@@ -65,14 +65,15 @@ module RubyLLM
65
65
  text_blocks.map { |c| c['text'] }.join
66
66
  end
67
67
 
68
- def build_message(data, content, tool_use)
68
+ def build_message(data, content, tool_use_blocks, response)
69
69
  Message.new(
70
70
  role: :assistant,
71
71
  content: content,
72
- tool_calls: Tools.parse_tool_calls(tool_use),
72
+ tool_calls: Tools.parse_tool_calls(tool_use_blocks),
73
73
  input_tokens: data.dig('usage', 'input_tokens'),
74
74
  output_tokens: data.dig('usage', 'output_tokens'),
75
- model_id: data['model']
75
+ model_id: data['model'],
76
+ raw: response
76
77
  )
77
78
  end
78
79
 
@@ -8,6 +8,8 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_content(content)
11
+ # Convert Hash/Array back to JSON string for API
12
+ return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
11
13
  return [format_text(content)] unless content.is_a?(Content)
12
14
 
13
15
  parts = []
@@ -7,16 +7,18 @@ module RubyLLM
7
7
  module Tools
8
8
  module_function
9
9
 
10
- def find_tool_use(blocks)
11
- blocks.find { |c| c['type'] == 'tool_use' }
10
+ def find_tool_uses(blocks)
11
+ blocks.select { |c| c['type'] == 'tool_use' }
12
12
  end
13
13
 
14
14
  def format_tool_call(msg)
15
- tool_call = msg.tool_calls.values.first
16
-
17
15
  content = []
16
+
18
17
  content << Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
19
- content << format_tool_use_block(tool_call)
18
+
19
+ msg.tool_calls.each_value do |tool_call|
20
+ content << format_tool_use_block(tool_call)
21
+ end
20
22
 
21
23
  {
22
24
  role: 'assistant',
@@ -68,16 +70,24 @@ module RubyLLM
68
70
  end
69
71
  end
70
72
 
71
- def parse_tool_calls(content_block)
72
- return nil unless content_block && content_block['type'] == 'tool_use'
73
+ def parse_tool_calls(content_blocks)
74
+ return nil if content_blocks.nil?
73
75
 
74
- {
75
- content_block['id'] => ToolCall.new(
76
- id: content_block['id'],
77
- name: content_block['name'],
78
- arguments: content_block['input']
76
+ # Handle single content block (backward compatibility)
77
+ content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
78
+
79
+ tool_calls = {}
80
+ content_blocks.each do |block|
81
+ next unless block && block['type'] == 'tool_use'
82
+
83
+ tool_calls[block['id']] = ToolCall.new(
84
+ id: block['id'],
85
+ name: block['name'],
86
+ arguments: block['input']
79
87
  )
80
- }
88
+ end
89
+
90
+ tool_calls.empty? ? nil : tool_calls
81
91
  end
82
92
 
83
93
  def clean_parameters(parameters)
@@ -39,23 +39,22 @@ module RubyLLM
39
39
  "model/#{@model_id}/invoke"
40
40
  end
41
41
 
42
- def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
42
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
43
43
  # Hold model_id in instance variable for use in completion_url and stream_url
44
44
  @model_id = model
45
45
 
46
46
  system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
47
47
  system_content = Anthropic::Chat.build_system_content(system_messages)
48
48
 
49
- build_base_payload(chat_messages, temperature, model).tap do |payload|
50
- Anthropic::Chat.add_optional_fields(payload, system_content:, tools:)
49
+ build_base_payload(chat_messages, model).tap do |payload|
50
+ Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:)
51
51
  end
52
52
  end
53
53
 
54
- def build_base_payload(chat_messages, temperature, model)
54
+ def build_base_payload(chat_messages, model)
55
55
  {
56
56
  anthropic_version: 'bedrock-2023-05-31',
57
57
  messages: chat_messages.map { |msg| format_message(msg) },
58
- temperature: temperature,
59
58
  max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
60
59
  }
61
60
  end
@@ -11,6 +11,8 @@ module RubyLLM
11
11
  module_function
12
12
 
13
13
  def format_content(content)
14
+ # Convert Hash/Array back to JSON string for API
15
+ return [Anthropic::Media.format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
14
16
  return [Anthropic::Media.format_text(content)] unless content.is_a?(Content)
15
17
 
16
18
  parts = []
@@ -34,7 +34,7 @@ module RubyLLM
34
34
  payload:)
35
35
  accumulator = StreamAccumulator.new
36
36
 
37
- connection.post stream_url, payload do |req|
37
+ response = connection.post stream_url, payload do |req|
38
38
  req.headers.merge! build_headers(signature.headers, streaming: block_given?)
39
39
  req.options.on_data = handle_stream do |chunk|
40
40
  accumulator.add chunk
@@ -42,7 +42,7 @@ module RubyLLM
42
42
  end
43
43
  end
44
44
 
45
- accumulator.to_message
45
+ accumulator.to_message(response)
46
46
  end
47
47
 
48
48
  def handle_stream(&block)
@@ -24,8 +24,8 @@ module RubyLLM
24
24
  end
25
25
 
26
26
  def read_prelude(chunk, offset)
27
- total_length = chunk[offset...offset + 4].unpack1('N')
28
- headers_length = chunk[offset + 4...offset + 8].unpack1('N')
27
+ total_length = chunk[offset...(offset + 4)].unpack1('N')
28
+ headers_length = chunk[(offset + 4)...(offset + 8)].unpack1('N')
29
29
  [total_length, headers_length]
30
30
  end
31
31
 
@@ -72,8 +72,8 @@ module RubyLLM
72
72
 
73
73
  def extract_potential_lengths(chunk, pos)
74
74
  [
75
- chunk[pos...pos + 4].unpack1('N'),
76
- chunk[pos + 4...pos + 8].unpack1('N')
75
+ chunk[pos...(pos + 4)].unpack1('N'),
76
+ chunk[(pos + 4)...(pos + 8)].unpack1('N')
77
77
  ]
78
78
  end
79
79
 
@@ -11,7 +11,7 @@ module RubyLLM
11
11
  "models/#{@model}:generateContent"
12
12
  end
13
13
 
14
- def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
14
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
15
15
  @model = model # Store model for completion_url/stream_url
16
16
  payload = {
17
17
  contents: format_messages(messages),
@@ -19,6 +19,12 @@ module RubyLLM
19
19
  temperature: temperature
20
20
  }
21
21
  }
22
+
23
+ if schema
24
+ payload[:generationConfig][:responseMimeType] = 'application/json'
25
+ payload[:generationConfig][:responseSchema] = convert_schema_to_gemini(schema)
26
+ end
27
+
22
28
  payload[:tools] = format_tools(tools) if tools.any?
23
29
  payload
24
30
  end
@@ -75,10 +81,39 @@ module RubyLLM
75
81
  tool_calls: tool_calls,
76
82
  input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
77
83
  output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
78
- model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0]
84
+ model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
85
+ raw: response
79
86
  )
80
87
  end
81
88
 
89
+ def convert_schema_to_gemini(schema) # rubocop:disable Metrics/PerceivedComplexity
90
+ return nil unless schema
91
+
92
+ case schema[:type]
93
+ when 'object'
94
+ {
95
+ type: 'OBJECT',
96
+ properties: schema[:properties]&.transform_values { |prop| convert_schema_to_gemini(prop) } || {},
97
+ required: schema[:required] || []
98
+ }
99
+ when 'array'
100
+ {
101
+ type: 'ARRAY',
102
+ items: schema[:items] ? convert_schema_to_gemini(schema[:items]) : { type: 'STRING' }
103
+ }
104
+ when 'string'
105
+ result = { type: 'STRING' }
106
+ result[:enum] = schema[:enum] if schema[:enum]
107
+ result
108
+ when 'number', 'integer'
109
+ { type: 'NUMBER' }
110
+ when 'boolean'
111
+ { type: 'BOOLEAN' }
112
+ else
113
+ { type: 'STRING' }
114
+ end
115
+ end
116
+
82
117
  def extract_content(data)
83
118
  candidate = data.dig('candidates', 0)
84
119
  return '' unless candidate
@@ -15,9 +15,11 @@ module RubyLLM
15
15
  { requests: [text].flatten.map { |t| single_embedding_payload(t, model:, dimensions:) } }
16
16
  end
17
17
 
18
- def parse_embedding_response(response, model:)
18
+ def parse_embedding_response(response, model:, text:)
19
19
  vectors = response.body['embeddings']&.map { |e| e['values'] }
20
- vectors in [vectors]
20
+ # If we only got one embedding AND the input was a single string (not an array),
21
+ # return it as a single vector
22
+ vectors = vectors.first if vectors&.length == 1 && !text.is_a?(Array)
21
23
 
22
24
  Embedding.new(vectors:, model:, input_tokens: 0)
23
25
  end
@@ -8,6 +8,8 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_content(content)
11
+ # Convert Hash/Array back to JSON string for API
12
+ return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
11
13
  return [format_text(content)] unless content.is_a?(Content)
12
14
 
13
15
  parts = []
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module GPUStack
6
+ # Chat methods of the GPUStack API integration
7
+ module Chat
8
+ module_function
9
+
10
+ def format_role(role)
11
+ # GPUStack doesn't use the new OpenAI convention for system prompts
12
+ role.to_s
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module GPUStack
6
+ # Models methods of the GPUStack API integration
7
+ module Models
8
+ module_function
9
+
10
+ def models_url
11
+ 'models'
12
+ end
13
+
14
+ def parse_list_models_response(response, slug, _capabilities)
15
+ items = response.body['items'] || []
16
+ items.map do |model|
17
+ Model::Info.new(
18
+ id: model['name'],
19
+ created_at: model['created_at'] ? Time.parse(model['created_at']) : nil,
20
+ display_name: "#{model['source']}/#{model['name']}",
21
+ provider: slug,
22
+ type: determine_model_type(model),
23
+ metadata: {
24
+ description: model['description'],
25
+ source: model['source'],
26
+ huggingface_repo_id: model['huggingface_repo_id'],
27
+ ollama_library_model_name: model['ollama_library_model_name'],
28
+ backend: model['backend'],
29
+ meta: model['meta'],
30
+ categories: model['categories']
31
+ },
32
+ context_window: model.dig('meta', 'n_ctx'),
33
+ # Using context window as max tokens since it's not explicitly provided
34
+ max_tokens: model.dig('meta', 'n_ctx'),
35
+ supports_vision: model.dig('meta', 'support_vision') || false,
36
+ supports_functions: model.dig('meta', 'support_tool_calls') || false,
37
+ supports_json_mode: true, # Assuming all models support JSON mode
38
+ input_price_per_million: 0.0, # Price information not available in new format
39
+ output_price_per_million: 0.0 # Price information not available in new format
40
+ )
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def determine_model_type(model)
47
+ return 'embedding' if model['categories']&.include?('embedding')
48
+ return 'chat' if model['categories']&.include?('llm')
49
+
50
+ 'other'
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ # GPUStack API integration based on Ollama.
6
+ module GPUStack
7
+ extend OpenAI
8
+ extend GPUStack::Chat
9
+ extend GPUStack::Models
10
+
11
+ module_function
12
+
13
+ def api_base(config)
14
+ config.gpustack_api_base
15
+ end
16
+
17
+ def headers(config)
18
+ {
19
+ 'Authorization' => "Bearer #{config.gpustack_api_key}"
20
+ }
21
+ end
22
+
23
+ def slug
24
+ 'gpustack'
25
+ end
26
+
27
+ def local?
28
+ true
29
+ end
30
+
31
+ def configuration_requirements
32
+ %i[gpustack_api_base gpustack_api_key]
33
+ end
34
+ end
35
+ end
36
+ end
@@ -10,6 +10,8 @@ module RubyLLM
10
10
  module_function
11
11
 
12
12
  def format_content(content)
13
+ # Convert Hash/Array back to JSON string for API
14
+ return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
13
15
  return content unless content.is_a?(Content)
14
16
 
15
17
  parts = []
@@ -11,7 +11,7 @@ module RubyLLM
11
11
 
12
12
  module_function
13
13
 
14
- def render_payload(messages, tools:, temperature:, model:, stream: false)
14
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
15
15
  payload = {
16
16
  model: model,
17
17
  messages: format_messages(messages),
@@ -26,6 +26,20 @@ module RubyLLM
26
26
  payload[:tool_choice] = 'auto'
27
27
  end
28
28
 
29
+ if schema
30
+ # Use strict mode from schema if specified, default to true
31
+ strict = schema[:strict] != false
32
+
33
+ payload[:response_format] = {
34
+ type: 'json_schema',
35
+ json_schema: {
36
+ name: 'response',
37
+ schema: schema,
38
+ strict: strict
39
+ }
40
+ }
41
+ end
42
+
29
43
  payload[:stream_options] = { include_usage: true } if stream
30
44
  payload
31
45
  end
@@ -45,7 +59,8 @@ module RubyLLM
45
59
  tool_calls: parse_tool_calls(message_data['tool_calls']),
46
60
  input_tokens: data['usage']['prompt_tokens'],
47
61
  output_tokens: data['usage']['completion_tokens'],
48
- model_id: data['model']
62
+ model_id: data['model'],
63
+ raw: response
49
64
  )
50
65
  end
51
66
 
@@ -19,13 +19,14 @@ module RubyLLM
19
19
  }.compact
20
20
  end
21
21
 
22
- def parse_embedding_response(response, model:)
22
+ def parse_embedding_response(response, model:, text:)
23
23
  data = response.body
24
24
  input_tokens = data.dig('usage', 'prompt_tokens') || 0
25
25
  vectors = data['data'].map { |d| d['embedding'] }
26
26
 
27
- # If we only got one embedding, return it as a single vector
28
- vectors in [vectors]
27
+ # If we only got one embedding AND the input was a single string (not an array),
28
+ # return it as a single vector
29
+ vectors = vectors.first if vectors.length == 1 && !text.is_a?(Array)
29
30
 
30
31
  Embedding.new(vectors:, model:, input_tokens:)
31
32
  end
@@ -8,6 +8,8 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_content(content)
11
+ # Convert Hash/Array back to JSON string for API
12
+ return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
11
13
  return content unless content.is_a?(Content)
12
14
 
13
15
  parts = []
@@ -21,6 +21,20 @@ module RubyLLM
21
21
  output_tokens: data.dig('usage', 'completion_tokens')
22
22
  )
23
23
  end
24
+
25
+ def parse_streaming_error(data)
26
+ error_data = JSON.parse(data)
27
+ return unless error_data['error']
28
+
29
+ case error_data.dig('error', 'type')
30
+ when 'server_error'
31
+ [500, error_data['error']['message']]
32
+ when 'rate_limit_exceeded', 'insufficient_quota'
33
+ [429, error_data['error']['message']]
34
+ else
35
+ [400, error_data['error']['message']]
36
+ end
37
+ end
24
38
  end
25
39
  end
26
40
  end
@@ -8,5 +8,10 @@ module RubyLLM
8
8
  include RubyLLM::ActiveRecord::ActsAs
9
9
  end
10
10
  end
11
+
12
+ # Register generators
13
+ generators do
14
+ require 'generators/ruby_llm/install_generator'
15
+ end
11
16
  end
12
17
  end
@@ -29,14 +29,15 @@ module RubyLLM
29
29
  RubyLLM.logger.debug inspect
30
30
  end
31
31
 
32
- def to_message
32
+ def to_message(response)
33
33
  Message.new(
34
34
  role: :assistant,
35
35
  content: content.empty? ? nil : content,
36
36
  model_id: model_id,
37
37
  tool_calls: tool_calls_from_stream,
38
38
  input_tokens: @input_tokens.positive? ? @input_tokens : nil,
39
- output_tokens: @output_tokens.positive? ? @output_tokens : nil
39
+ output_tokens: @output_tokens.positive? ? @output_tokens : nil,
40
+ raw: response
40
41
  )
41
42
  end
42
43