raif 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +148 -4
- data/app/assets/builds/raif.css +26 -1
- data/app/assets/stylesheets/raif/loader.scss +27 -1
- data/app/models/raif/concerns/llm_response_parsing.rb +22 -16
- data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +56 -0
- data/app/models/raif/concerns/llms/{bedrock_claude → bedrock}/message_formatting.rb +4 -4
- data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +37 -0
- data/app/models/raif/concerns/llms/message_formatting.rb +7 -6
- data/app/models/raif/concerns/llms/open_ai/json_schema_validation.rb +138 -0
- data/app/models/raif/concerns/llms/{open_ai → open_ai_completions}/message_formatting.rb +1 -1
- data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +26 -0
- data/app/models/raif/concerns/llms/open_ai_responses/message_formatting.rb +43 -0
- data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +42 -0
- data/app/models/raif/conversation.rb +17 -4
- data/app/models/raif/conversation_entry.rb +18 -2
- data/app/models/raif/embedding_models/{bedrock_titan.rb → bedrock.rb} +2 -2
- data/app/models/raif/llm.rb +73 -7
- data/app/models/raif/llms/anthropic.rb +56 -36
- data/app/models/raif/llms/{bedrock_claude.rb → bedrock.rb} +62 -45
- data/app/models/raif/llms/open_ai_base.rb +66 -0
- data/app/models/raif/llms/open_ai_completions.rb +100 -0
- data/app/models/raif/llms/open_ai_responses.rb +144 -0
- data/app/models/raif/llms/open_router.rb +38 -43
- data/app/models/raif/model_completion.rb +2 -0
- data/app/models/raif/model_tool.rb +4 -0
- data/app/models/raif/model_tools/provider_managed/base.rb +9 -0
- data/app/models/raif/model_tools/provider_managed/code_execution.rb +5 -0
- data/app/models/raif/model_tools/provider_managed/image_generation.rb +5 -0
- data/app/models/raif/model_tools/provider_managed/web_search.rb +5 -0
- data/app/models/raif/streaming_responses/anthropic.rb +63 -0
- data/app/models/raif/streaming_responses/bedrock.rb +89 -0
- data/app/models/raif/streaming_responses/open_ai_completions.rb +76 -0
- data/app/models/raif/streaming_responses/open_ai_responses.rb +54 -0
- data/app/views/raif/admin/conversations/_conversation_entry.html.erb +48 -0
- data/app/views/raif/admin/conversations/show.html.erb +1 -1
- data/app/views/raif/admin/model_completions/_model_completion.html.erb +7 -0
- data/app/views/raif/admin/model_completions/index.html.erb +1 -0
- data/app/views/raif/admin/model_completions/show.html.erb +28 -0
- data/app/views/raif/conversation_entries/_citations.html.erb +9 -0
- data/app/views/raif/conversation_entries/_conversation_entry.html.erb +5 -1
- data/app/views/raif/conversation_entries/_message.html.erb +4 -0
- data/config/locales/admin.en.yml +2 -0
- data/config/locales/en.yml +22 -0
- data/db/migrate/20250224234252_create_raif_tables.rb +1 -1
- data/db/migrate/20250421202149_add_response_format_to_raif_conversations.rb +1 -1
- data/db/migrate/20250424200755_add_cost_columns_to_raif_model_completions.rb +1 -1
- data/db/migrate/20250424232946_add_created_at_indexes.rb +1 -1
- data/db/migrate/20250502155330_add_status_indexes_to_raif_tasks.rb +1 -1
- data/db/migrate/20250527213016_add_response_id_and_response_array_to_model_completions.rb +14 -0
- data/db/migrate/20250603140622_add_citations_to_raif_model_completions.rb +13 -0
- data/db/migrate/20250603202013_add_stream_response_to_raif_model_completions.rb +7 -0
- data/lib/generators/raif/conversation/templates/conversation.rb.tt +3 -3
- data/lib/generators/raif/install/templates/initializer.rb +14 -2
- data/lib/raif/configuration.rb +27 -5
- data/lib/raif/embedding_model_registry.rb +1 -1
- data/lib/raif/engine.rb +25 -9
- data/lib/raif/errors/streaming_error.rb +18 -0
- data/lib/raif/errors.rb +1 -0
- data/lib/raif/llm_registry.rb +157 -47
- data/lib/raif/migration_checker.rb +74 -0
- data/lib/raif/utils/html_fragment_processor.rb +169 -0
- data/lib/raif/utils.rb +1 -0
- data/lib/raif/version.rb +1 -1
- data/lib/raif.rb +2 -0
- metadata +45 -8
- data/app/models/raif/llms/open_ai.rb +0 -256
@@ -1,34 +1,59 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
class Raif::Llms::
|
4
|
-
include Raif::Concerns::Llms::
|
3
|
+
class Raif::Llms::Bedrock < Raif::Llm
|
4
|
+
include Raif::Concerns::Llms::Bedrock::MessageFormatting
|
5
|
+
include Raif::Concerns::Llms::Bedrock::ToolFormatting
|
5
6
|
|
6
|
-
def perform_model_completion!(model_completion)
|
7
|
+
def perform_model_completion!(model_completion, &block)
|
7
8
|
if Raif.config.aws_bedrock_model_name_prefix.present?
|
8
9
|
model_completion.model_api_name = "#{Raif.config.aws_bedrock_model_name_prefix}.#{model_completion.model_api_name}"
|
9
10
|
end
|
10
11
|
|
11
12
|
params = build_request_parameters(model_completion)
|
12
|
-
resp = bedrock_client.converse(params)
|
13
13
|
|
14
|
+
if model_completion.stream_response?
|
15
|
+
bedrock_client.converse_stream(params) do |stream|
|
16
|
+
stream.on_error_event do |event|
|
17
|
+
raise Raif::Errors::StreamingError.new(
|
18
|
+
message: event.error_message,
|
19
|
+
type: event.event_type,
|
20
|
+
code: event.error_code,
|
21
|
+
event: event
|
22
|
+
)
|
23
|
+
end
|
24
|
+
|
25
|
+
handler = streaming_chunk_handler(model_completion, &block)
|
26
|
+
stream.on_event do |event|
|
27
|
+
handler.call(event)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
else
|
31
|
+
response = bedrock_client.converse(params)
|
32
|
+
update_model_completion(model_completion, response)
|
33
|
+
end
|
34
|
+
|
35
|
+
model_completion
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def bedrock_client
|
41
|
+
@bedrock_client ||= Aws::BedrockRuntime::Client.new(region: Raif.config.aws_bedrock_region)
|
42
|
+
end
|
43
|
+
|
44
|
+
def update_model_completion(model_completion, resp)
|
14
45
|
model_completion.raw_response = if model_completion.response_format_json?
|
15
46
|
extract_json_response(resp)
|
16
47
|
else
|
17
48
|
extract_text_response(resp)
|
18
49
|
end
|
19
50
|
|
51
|
+
model_completion.response_array = resp.output.message.content
|
52
|
+
model_completion.response_tool_calls = extract_response_tool_calls(resp)
|
20
53
|
model_completion.completion_tokens = resp.usage.output_tokens
|
21
54
|
model_completion.prompt_tokens = resp.usage.input_tokens
|
22
55
|
model_completion.total_tokens = resp.usage.total_tokens
|
23
56
|
model_completion.save!
|
24
|
-
|
25
|
-
model_completion
|
26
|
-
end
|
27
|
-
|
28
|
-
protected
|
29
|
-
|
30
|
-
def bedrock_client
|
31
|
-
@bedrock_client ||= Aws::BedrockRuntime::Client.new(region: Raif.config.aws_bedrock_region)
|
32
57
|
end
|
33
58
|
|
34
59
|
def build_request_parameters(model_completion)
|
@@ -44,8 +69,10 @@ protected
|
|
44
69
|
|
45
70
|
params[:system] = [{ text: model_completion.system_prompt }] if model_completion.system_prompt.present?
|
46
71
|
|
47
|
-
|
48
|
-
|
72
|
+
if supports_native_tool_use?
|
73
|
+
tools = build_tools_parameter(model_completion)
|
74
|
+
params[:tool_config] = tools unless tools.blank?
|
75
|
+
end
|
49
76
|
|
50
77
|
params
|
51
78
|
end
|
@@ -65,38 +92,7 @@ protected
|
|
65
92
|
end
|
66
93
|
end
|
67
94
|
|
68
|
-
def build_tool_parameters(model_completion)
|
69
|
-
tools = []
|
70
|
-
|
71
|
-
# If we're looking for a JSON response, add a tool to the request that the model can use to provide a JSON response
|
72
|
-
if model_completion.response_format_json? && model_completion.json_response_schema.present?
|
73
|
-
tools << {
|
74
|
-
name: "json_response",
|
75
|
-
description: "Generate a structured JSON response based on the provided schema.",
|
76
|
-
input_schema: { json: model_completion.json_response_schema }
|
77
|
-
}
|
78
|
-
end
|
79
|
-
|
80
|
-
# If we support native tool use and have tools available, add them to the request
|
81
|
-
if supports_native_tool_use? && model_completion.available_model_tools.any?
|
82
|
-
model_completion.available_model_tools_map.each do |_tool_name, tool|
|
83
|
-
tools << {
|
84
|
-
name: tool.tool_name,
|
85
|
-
description: tool.tool_description,
|
86
|
-
input_schema: { json: tool.tool_arguments_schema }
|
87
|
-
}
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
return if tools.blank?
|
92
|
-
|
93
|
-
{
|
94
|
-
tools: tools.map{|tool| { tool_spec: tool } }
|
95
|
-
}
|
96
|
-
end
|
97
|
-
|
98
95
|
def extract_text_response(resp)
|
99
|
-
# Get the message from the response object
|
100
96
|
message = resp.output.message
|
101
97
|
|
102
98
|
# Find the first text content block
|
@@ -145,4 +141,25 @@ protected
|
|
145
141
|
end
|
146
142
|
end
|
147
143
|
|
144
|
+
def streaming_chunk_handler(model_completion, &block)
|
145
|
+
return unless model_completion.stream_response?
|
146
|
+
|
147
|
+
streaming_response = Raif::StreamingResponses::Bedrock.new
|
148
|
+
accumulated_delta = ""
|
149
|
+
|
150
|
+
proc do |event|
|
151
|
+
delta, finish_reason = streaming_response.process_streaming_event(event.class, event)
|
152
|
+
accumulated_delta += delta if delta.present?
|
153
|
+
|
154
|
+
if accumulated_delta.length >= Raif.config.streaming_update_chunk_size_threshold || finish_reason.present?
|
155
|
+
update_model_completion(model_completion, streaming_response.current_response)
|
156
|
+
|
157
|
+
if accumulated_delta.present?
|
158
|
+
block.call(model_completion, accumulated_delta, event)
|
159
|
+
accumulated_delta = ""
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
148
165
|
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Raif::Llms::OpenAiBase < Raif::Llm
|
4
|
+
include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
|
5
|
+
|
6
|
+
def perform_model_completion!(model_completion, &block)
|
7
|
+
if supports_temperature?
|
8
|
+
model_completion.temperature ||= default_temperature
|
9
|
+
else
|
10
|
+
Raif.logger.warn "Temperature is not supported for #{api_name}. Ignoring temperature parameter."
|
11
|
+
model_completion.temperature = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
parameters = build_request_parameters(model_completion)
|
15
|
+
|
16
|
+
response = connection.post(api_path) do |req|
|
17
|
+
req.body = parameters
|
18
|
+
req.options.on_data = streaming_chunk_handler(model_completion, &block) if model_completion.stream_response?
|
19
|
+
end
|
20
|
+
|
21
|
+
unless model_completion.stream_response?
|
22
|
+
update_model_completion(model_completion, response.body)
|
23
|
+
end
|
24
|
+
|
25
|
+
model_completion
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def connection
|
31
|
+
@connection ||= Faraday.new(url: "https://api.openai.com/v1") do |f|
|
32
|
+
f.headers["Authorization"] = "Bearer #{Raif.config.open_ai_api_key}"
|
33
|
+
f.request :json
|
34
|
+
f.response :json
|
35
|
+
f.response :raise_error
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def format_system_prompt(model_completion)
|
40
|
+
formatted_system_prompt = model_completion.system_prompt.to_s.strip
|
41
|
+
|
42
|
+
# If the response format is JSON, we need to include "as json" in the system prompt.
|
43
|
+
# OpenAI requires this and will throw an error if it's not included.
|
44
|
+
if model_completion.response_format_json?
|
45
|
+
# Ensure system prompt ends with a period if not empty
|
46
|
+
if formatted_system_prompt.present? && !formatted_system_prompt.end_with?(".", "?", "!")
|
47
|
+
formatted_system_prompt += "."
|
48
|
+
end
|
49
|
+
formatted_system_prompt += " Return your response as JSON."
|
50
|
+
formatted_system_prompt.strip!
|
51
|
+
end
|
52
|
+
|
53
|
+
formatted_system_prompt
|
54
|
+
end
|
55
|
+
|
56
|
+
def supports_structured_outputs?
|
57
|
+
# Not all OpenAI models support structured outputs:
|
58
|
+
# https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-models
|
59
|
+
provider_settings.key?(:supports_structured_outputs) ? provider_settings[:supports_structured_outputs] : true
|
60
|
+
end
|
61
|
+
|
62
|
+
def supports_temperature?
|
63
|
+
provider_settings.key?(:supports_temperature) ? provider_settings[:supports_temperature] : true
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Raif::Llms::OpenAiCompletions < Raif::Llms::OpenAiBase
|
4
|
+
include Raif::Concerns::Llms::OpenAiCompletions::MessageFormatting
|
5
|
+
include Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
def api_path
|
10
|
+
"chat/completions"
|
11
|
+
end
|
12
|
+
|
13
|
+
def streaming_response_type
|
14
|
+
Raif::StreamingResponses::OpenAiCompletions
|
15
|
+
end
|
16
|
+
|
17
|
+
def update_model_completion(model_completion, response_json)
|
18
|
+
model_completion.update!(
|
19
|
+
response_id: response_json["id"],
|
20
|
+
response_tool_calls: extract_response_tool_calls(response_json),
|
21
|
+
raw_response: response_json.dig("choices", 0, "message", "content"),
|
22
|
+
response_array: response_json["choices"],
|
23
|
+
completion_tokens: response_json.dig("usage", "completion_tokens"),
|
24
|
+
prompt_tokens: response_json.dig("usage", "prompt_tokens"),
|
25
|
+
total_tokens: response_json.dig("usage", "total_tokens")
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def extract_response_tool_calls(resp)
|
30
|
+
return if resp.dig("choices", 0, "message", "tool_calls").blank?
|
31
|
+
|
32
|
+
resp.dig("choices", 0, "message", "tool_calls").map do |tool_call|
|
33
|
+
{
|
34
|
+
"name" => tool_call["function"]["name"],
|
35
|
+
"arguments" => JSON.parse(tool_call["function"]["arguments"])
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def build_request_parameters(model_completion)
|
41
|
+
formatted_system_prompt = format_system_prompt(model_completion)
|
42
|
+
|
43
|
+
messages = model_completion.messages
|
44
|
+
messages_with_system = if formatted_system_prompt.blank?
|
45
|
+
messages
|
46
|
+
else
|
47
|
+
[{ "role" => "system", "content" => formatted_system_prompt }] + messages
|
48
|
+
end
|
49
|
+
|
50
|
+
parameters = {
|
51
|
+
model: api_name,
|
52
|
+
messages: messages_with_system
|
53
|
+
}
|
54
|
+
|
55
|
+
if supports_temperature?
|
56
|
+
parameters[:temperature] = model_completion.temperature.to_f
|
57
|
+
end
|
58
|
+
|
59
|
+
# If the LLM supports native tool use and there are available tools, add them to the parameters
|
60
|
+
if supports_native_tool_use?
|
61
|
+
tools = build_tools_parameter(model_completion)
|
62
|
+
parameters[:tools] = tools unless tools.blank?
|
63
|
+
end
|
64
|
+
|
65
|
+
if model_completion.stream_response?
|
66
|
+
parameters[:stream] = true
|
67
|
+
# Ask for usage stats in the last chunk
|
68
|
+
parameters[:stream_options] = { include_usage: true }
|
69
|
+
end
|
70
|
+
|
71
|
+
# Add response format if needed
|
72
|
+
response_format = determine_response_format(model_completion)
|
73
|
+
parameters[:response_format] = response_format if response_format
|
74
|
+
model_completion.response_format_parameter = response_format[:type] if response_format
|
75
|
+
|
76
|
+
parameters
|
77
|
+
end
|
78
|
+
|
79
|
+
def determine_response_format(model_completion)
|
80
|
+
# Only configure response format for JSON outputs
|
81
|
+
return unless model_completion.response_format_json?
|
82
|
+
|
83
|
+
if model_completion.json_response_schema.present? && supports_structured_outputs?
|
84
|
+
validate_json_schema!(model_completion.json_response_schema)
|
85
|
+
|
86
|
+
{
|
87
|
+
type: "json_schema",
|
88
|
+
json_schema: {
|
89
|
+
name: "json_response_schema",
|
90
|
+
strict: true,
|
91
|
+
schema: model_completion.json_response_schema
|
92
|
+
}
|
93
|
+
}
|
94
|
+
else
|
95
|
+
# Default JSON mode for OpenAI models that don't support structured outputs or no schema is provided
|
96
|
+
{ type: "json_object" }
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Raif::Llms::OpenAiResponses < Raif::Llms::OpenAiBase
|
4
|
+
include Raif::Concerns::Llms::OpenAiResponses::MessageFormatting
|
5
|
+
include Raif::Concerns::Llms::OpenAiResponses::ToolFormatting
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
def api_path
|
10
|
+
"responses"
|
11
|
+
end
|
12
|
+
|
13
|
+
def streaming_response_type
|
14
|
+
Raif::StreamingResponses::OpenAiResponses
|
15
|
+
end
|
16
|
+
|
17
|
+
def update_model_completion(model_completion, response_json)
|
18
|
+
model_completion.update!(
|
19
|
+
response_id: response_json["id"],
|
20
|
+
response_tool_calls: extract_response_tool_calls(response_json),
|
21
|
+
raw_response: extract_raw_response(response_json),
|
22
|
+
response_array: response_json["output"],
|
23
|
+
citations: extract_citations(response_json),
|
24
|
+
completion_tokens: response_json.dig("usage", "output_tokens"),
|
25
|
+
prompt_tokens: response_json.dig("usage", "input_tokens"),
|
26
|
+
total_tokens: response_json.dig("usage", "total_tokens")
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
def extract_response_tool_calls(resp)
|
31
|
+
return if resp["output"].blank?
|
32
|
+
|
33
|
+
tool_calls = []
|
34
|
+
resp["output"].each do |output_item|
|
35
|
+
next unless output_item["type"] == "function_call"
|
36
|
+
|
37
|
+
tool_calls << {
|
38
|
+
"name" => output_item["name"],
|
39
|
+
"arguments" => JSON.parse(output_item["arguments"])
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
tool_calls.any? ? tool_calls : nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def extract_raw_response(resp)
|
47
|
+
text_outputs = []
|
48
|
+
|
49
|
+
output_messages = resp["output"]&.select{ |output_item| output_item["type"] == "message" }
|
50
|
+
output_messages&.each do |output_message|
|
51
|
+
output_message["content"].each do |content_item|
|
52
|
+
text_outputs << content_item["text"] if content_item["type"] == "output_text"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
text_outputs.join("\n").presence
|
57
|
+
end
|
58
|
+
|
59
|
+
def extract_citations(resp)
|
60
|
+
return [] if resp["output"].blank?
|
61
|
+
|
62
|
+
citations = []
|
63
|
+
|
64
|
+
# Look through output messages for citations in annotations
|
65
|
+
output_messages = resp["output"].select{|output_item| output_item["type"] == "message" }
|
66
|
+
output_messages.each do |output_message|
|
67
|
+
next unless output_message["content"].present?
|
68
|
+
|
69
|
+
output_message["content"].each do |content_item|
|
70
|
+
next unless content_item["type"] == "output_text" && content_item["annotations"].present?
|
71
|
+
|
72
|
+
content_item["annotations"].each do |annotation|
|
73
|
+
next unless annotation["type"] == "url_citation"
|
74
|
+
|
75
|
+
citations << {
|
76
|
+
"url" => Raif::Utils::HtmlFragmentProcessor.strip_tracking_parameters(annotation["url"]),
|
77
|
+
"title" => annotation["title"]
|
78
|
+
}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
citations.uniq{|citation| citation["url"] }
|
84
|
+
end
|
85
|
+
|
86
|
+
def build_request_parameters(model_completion)
|
87
|
+
parameters = {
|
88
|
+
model: api_name,
|
89
|
+
input: model_completion.messages,
|
90
|
+
}
|
91
|
+
|
92
|
+
if supports_temperature?
|
93
|
+
parameters[:temperature] = model_completion.temperature.to_f
|
94
|
+
end
|
95
|
+
|
96
|
+
parameters[:stream] = true if model_completion.stream_response?
|
97
|
+
|
98
|
+
# Add instructions (system prompt) if present
|
99
|
+
formatted_system_prompt = format_system_prompt(model_completion)
|
100
|
+
if formatted_system_prompt.present?
|
101
|
+
parameters[:instructions] = formatted_system_prompt
|
102
|
+
end
|
103
|
+
|
104
|
+
# Add max_output_tokens if specified
|
105
|
+
if model_completion.max_completion_tokens.present?
|
106
|
+
parameters[:max_output_tokens] = model_completion.max_completion_tokens
|
107
|
+
end
|
108
|
+
|
109
|
+
# If the LLM supports native tool use and there are available tools, add them to the parameters
|
110
|
+
if supports_native_tool_use?
|
111
|
+
tools = build_tools_parameter(model_completion)
|
112
|
+
parameters[:tools] = tools unless tools.blank?
|
113
|
+
end
|
114
|
+
|
115
|
+
# Add response format if needed. Default will be { "type": "text" }
|
116
|
+
response_format = determine_response_format(model_completion)
|
117
|
+
if response_format.present?
|
118
|
+
parameters[:text] = { format: response_format }
|
119
|
+
model_completion.response_format_parameter = response_format[:type]
|
120
|
+
end
|
121
|
+
|
122
|
+
parameters
|
123
|
+
end
|
124
|
+
|
125
|
+
def determine_response_format(model_completion)
|
126
|
+
# Only configure response format for JSON outputs
|
127
|
+
return unless model_completion.response_format_json?
|
128
|
+
|
129
|
+
if model_completion.json_response_schema.present? && supports_structured_outputs?
|
130
|
+
validate_json_schema!(model_completion.json_response_schema)
|
131
|
+
|
132
|
+
{
|
133
|
+
type: "json_schema",
|
134
|
+
name: "json_response_schema",
|
135
|
+
strict: true,
|
136
|
+
schema: model_completion.json_response_schema
|
137
|
+
}
|
138
|
+
else
|
139
|
+
# Default JSON mode for OpenAI models that don't support structured outputs or no schema is provided
|
140
|
+
{ type: "json_object" }
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
@@ -1,28 +1,27 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class Raif::Llms::OpenRouter < Raif::Llm
|
4
|
-
include Raif::Concerns::Llms::
|
4
|
+
include Raif::Concerns::Llms::OpenAiCompletions::MessageFormatting
|
5
|
+
include Raif::Concerns::Llms::OpenAiCompletions::ToolFormatting
|
6
|
+
include Raif::Concerns::Llms::OpenAi::JsonSchemaValidation
|
5
7
|
|
6
|
-
def perform_model_completion!(model_completion)
|
8
|
+
def perform_model_completion!(model_completion, &block)
|
7
9
|
model_completion.temperature ||= default_temperature
|
8
10
|
parameters = build_request_parameters(model_completion)
|
9
11
|
response = connection.post("chat/completions") do |req|
|
10
12
|
req.body = parameters
|
13
|
+
req.options.on_data = streaming_chunk_handler(model_completion, &block) if model_completion.stream_response?
|
11
14
|
end
|
12
15
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
response_tool_calls: extract_response_tool_calls(response_json),
|
17
|
-
raw_response: response_json.dig("choices", 0, "message", "content"),
|
18
|
-
completion_tokens: response_json.dig("usage", "completion_tokens"),
|
19
|
-
prompt_tokens: response_json.dig("usage", "prompt_tokens"),
|
20
|
-
total_tokens: response_json.dig("usage", "total_tokens")
|
21
|
-
)
|
16
|
+
unless model_completion.stream_response?
|
17
|
+
update_model_completion(model_completion, response.body)
|
18
|
+
end
|
22
19
|
|
23
20
|
model_completion
|
24
21
|
end
|
25
22
|
|
23
|
+
private
|
24
|
+
|
26
25
|
def connection
|
27
26
|
@connection ||= Faraday.new(url: "https://openrouter.ai/api/v1") do |f|
|
28
27
|
f.headers["Authorization"] = "Bearer #{Raif.config.open_router_api_key}"
|
@@ -34,7 +33,20 @@ class Raif::Llms::OpenRouter < Raif::Llm
|
|
34
33
|
end
|
35
34
|
end
|
36
35
|
|
37
|
-
|
36
|
+
def streaming_response_type
|
37
|
+
Raif::StreamingResponses::OpenAiCompletions
|
38
|
+
end
|
39
|
+
|
40
|
+
def update_model_completion(model_completion, response_json)
|
41
|
+
model_completion.update!(
|
42
|
+
response_tool_calls: extract_response_tool_calls(response_json),
|
43
|
+
raw_response: response_json.dig("choices", 0, "message", "content"),
|
44
|
+
response_array: response_json["choices"],
|
45
|
+
completion_tokens: response_json.dig("usage", "completion_tokens"),
|
46
|
+
prompt_tokens: response_json.dig("usage", "prompt_tokens"),
|
47
|
+
total_tokens: response_json.dig("usage", "total_tokens")
|
48
|
+
)
|
49
|
+
end
|
38
50
|
|
39
51
|
def build_request_parameters(model_completion)
|
40
52
|
params = {
|
@@ -42,7 +54,6 @@ protected
|
|
42
54
|
messages: model_completion.messages,
|
43
55
|
temperature: model_completion.temperature.to_f,
|
44
56
|
max_tokens: model_completion.max_completion_tokens || default_max_completion_tokens,
|
45
|
-
stream: false
|
46
57
|
}
|
47
58
|
|
48
59
|
# Add system message to the messages array if present
|
@@ -50,44 +61,28 @@ protected
|
|
50
61
|
params[:messages].unshift({ "role" => "system", "content" => model_completion.system_prompt })
|
51
62
|
end
|
52
63
|
|
53
|
-
if
|
54
|
-
tools =
|
55
|
-
|
56
|
-
|
57
|
-
tools << {
|
58
|
-
type: "function",
|
59
|
-
function: {
|
60
|
-
name: tool.tool_name,
|
61
|
-
description: tool.tool_description,
|
62
|
-
parameters: tool.tool_arguments_schema
|
63
|
-
}
|
64
|
-
}
|
65
|
-
end
|
64
|
+
if supports_native_tool_use?
|
65
|
+
tools = build_tools_parameter(model_completion)
|
66
|
+
params[:tools] = tools unless tools.blank?
|
67
|
+
end
|
66
68
|
|
67
|
-
|
69
|
+
if model_completion.stream_response?
|
70
|
+
# Ask for usage stats in the last chunk
|
71
|
+
params[:stream] = true
|
72
|
+
params[:stream_options] = { include_usage: true }
|
68
73
|
end
|
69
74
|
|
70
75
|
params
|
71
76
|
end
|
72
77
|
|
73
|
-
def extract_response_tool_calls(
|
74
|
-
|
75
|
-
return [] unless tool_calls.is_a?(Array)
|
76
|
-
|
77
|
-
tool_calls.map do |tool_call|
|
78
|
-
next unless tool_call["type"] == "function"
|
79
|
-
|
80
|
-
function = tool_call["function"]
|
81
|
-
next unless function.is_a?(Hash)
|
78
|
+
def extract_response_tool_calls(resp)
|
79
|
+
return if resp.dig("choices", 0, "message", "tool_calls").blank?
|
82
80
|
|
81
|
+
resp.dig("choices", 0, "message", "tool_calls").map do |tool_call|
|
83
82
|
{
|
84
|
-
"
|
85
|
-
"
|
86
|
-
"function" => {
|
87
|
-
"name" => function["name"],
|
88
|
-
"arguments" => function["arguments"]
|
89
|
-
}
|
83
|
+
"name" => tool_call["function"]["name"],
|
84
|
+
"arguments" => JSON.parse(tool_call["function"]["arguments"])
|
90
85
|
}
|
91
|
-
end
|
86
|
+
end
|
92
87
|
end
|
93
88
|
end
|
@@ -16,6 +16,8 @@ class Raif::ModelCompletion < Raif::ApplicationRecord
|
|
16
16
|
|
17
17
|
after_initialize -> { self.messages ||= [] }
|
18
18
|
after_initialize -> { self.available_model_tools ||= [] }
|
19
|
+
after_initialize -> { self.response_array ||= [] }
|
20
|
+
after_initialize -> { self.citations ||= [] }
|
19
21
|
|
20
22
|
def json_response_schema
|
21
23
|
source.json_response_schema if source&.respond_to?(:json_response_schema)
|