raif 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/app/assets/builds/raif_admin.css +40 -2
- data/app/assets/builds/raif_admin_sprockets.js +2709 -0
- data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
- data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
- data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
- data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
- data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
- data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
- data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
- data/app/assets/javascript/raif_admin.js +23 -0
- data/app/assets/javascript/raif_admin_sprockets.js +24 -0
- data/app/assets/stylesheets/raif_admin.scss +50 -1
- data/app/controllers/raif/admin/agents_controller.rb +27 -1
- data/app/controllers/raif/admin/configs_controller.rb +1 -0
- data/app/controllers/raif/admin/llms_controller.rb +27 -0
- data/app/controllers/raif/admin/model_completions_controller.rb +6 -0
- data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
- data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
- data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
- data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
- data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
- data/app/controllers/raif/admin/tasks_controller.rb +5 -0
- data/app/helpers/raif/application_helper.rb +40 -0
- data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
- data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
- data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
- data/app/models/raif/agent.rb +36 -5
- data/app/models/raif/agents/native_tool_calling_agent.rb +101 -19
- data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
- data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
- data/app/models/raif/concerns/json_schema_definition.rb +16 -3
- data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
- data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +6 -0
- data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +5 -1
- data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +7 -0
- data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/llms/google/message_formatting.rb +5 -2
- data/app/models/raif/concerns/llms/google/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/llms/message_formatting.rb +30 -0
- data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +1 -1
- data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +4 -0
- data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
- data/app/models/raif/conversation.rb +24 -3
- data/app/models/raif/conversation_entry.rb +6 -3
- data/app/models/raif/embedding_models/bedrock.rb +10 -1
- data/app/models/raif/embedding_models/google.rb +37 -0
- data/app/models/raif/evals/llm_judge.rb +70 -0
- data/{lib → app/models}/raif/evals/llm_judges/binary.rb +38 -0
- data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +38 -0
- data/{lib → app/models}/raif/evals/llm_judges/scored.rb +38 -0
- data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +38 -0
- data/app/models/raif/llm.rb +82 -7
- data/app/models/raif/llms/anthropic.rb +26 -4
- data/app/models/raif/llms/bedrock.rb +59 -5
- data/app/models/raif/llms/google.rb +28 -2
- data/app/models/raif/llms/open_ai_base.rb +4 -0
- data/app/models/raif/llms/open_ai_completions.rb +9 -2
- data/app/models/raif/llms/open_ai_responses.rb +9 -2
- data/app/models/raif/llms/open_router.rb +10 -3
- data/app/models/raif/model_completion.rb +75 -34
- data/app/models/raif/model_tool.rb +45 -3
- data/app/models/raif/model_tool_invocation.rb +31 -1
- data/app/models/raif/prompt_studio_batch_run.rb +155 -0
- data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
- data/app/models/raif/streaming_responses/bedrock.rb +60 -1
- data/app/models/raif/task.rb +30 -6
- data/app/views/layouts/raif/admin.html.erb +31 -1
- data/app/views/raif/admin/agents/_agent.html.erb +1 -0
- data/app/views/raif/admin/agents/index.html.erb +48 -0
- data/app/views/raif/admin/agents/show.html.erb +4 -0
- data/app/views/raif/admin/llms/index.html.erb +110 -0
- data/app/views/raif/admin/model_completions/_model_completion.html.erb +3 -7
- data/app/views/raif/admin/model_completions/index.html.erb +14 -1
- data/app/views/raif/admin/model_completions/show.html.erb +164 -55
- data/app/views/raif/admin/model_tool_invocations/index.html.erb +1 -1
- data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
- data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
- data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
- data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
- data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
- data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
- data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
- data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
- data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
- data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
- data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
- data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
- data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
- data/app/views/raif/admin/tasks/_task.html.erb +1 -0
- data/app/views/raif/admin/tasks/index.html.erb +17 -5
- data/app/views/raif/admin/tasks/show.html.erb +20 -0
- data/app/views/raif/conversation_entries/_message.html.erb +10 -6
- data/config/importmap.rb +8 -0
- data/config/locales/admin.en.yml +128 -0
- data/config/locales/en.yml +36 -2
- data/config/routes.rb +8 -0
- data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
- data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
- data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
- data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
- data/lib/generators/raif/agent/agent_generator.rb +18 -0
- data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
- data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
- data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
- data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
- data/lib/generators/raif/install/templates/initializer.rb +68 -27
- data/lib/generators/raif/task/task_generator.rb +18 -0
- data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
- data/lib/generators/raif/task/templates/task.rb.tt +9 -8
- data/lib/raif/configuration.rb +10 -0
- data/lib/raif/embedding_model_registry.rb +8 -0
- data/lib/raif/engine.rb +16 -1
- data/lib/raif/errors/blank_response_error.rb +8 -0
- data/lib/raif/errors/prompt_template_error.rb +15 -0
- data/lib/raif/errors.rb +2 -0
- data/lib/raif/evals.rb +0 -6
- data/lib/raif/llm_registry.rb +230 -9
- data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
- data/lib/raif/token_estimator.rb +28 -0
- data/lib/raif/version.rb +1 -1
- data/lib/raif.rb +2 -0
- data/spec/support/rspec_helpers.rb +7 -1
- data/spec/support/test_task.rb +9 -0
- data/spec/support/test_template_task.rb +41 -0
- metadata +65 -7
- data/lib/raif/evals/llm_judge.rb +0 -32
- /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
data/lib/raif/llm_registry.rb
CHANGED
|
@@ -41,6 +41,13 @@ module Raif
|
|
|
41
41
|
|
|
42
42
|
def self.default_llms
|
|
43
43
|
open_ai_models = [
|
|
44
|
+
{
|
|
45
|
+
key: :open_ai_gpt_5_4,
|
|
46
|
+
api_name: "gpt-5.4",
|
|
47
|
+
input_token_cost: 2.5 / 1_000_000,
|
|
48
|
+
output_token_cost: 15.0 / 1_000_000,
|
|
49
|
+
model_provider_settings: { supports_temperature: false },
|
|
50
|
+
},
|
|
44
51
|
{
|
|
45
52
|
key: :open_ai_gpt_5_2,
|
|
46
53
|
api_name: "gpt-5.2",
|
|
@@ -48,6 +55,13 @@ module Raif
|
|
|
48
55
|
output_token_cost: 14.0 / 1_000_000,
|
|
49
56
|
model_provider_settings: { supports_temperature: false },
|
|
50
57
|
},
|
|
58
|
+
{
|
|
59
|
+
key: :open_ai_gpt_5_3,
|
|
60
|
+
api_name: "gpt-5.3",
|
|
61
|
+
input_token_cost: 1.75 / 1_000_000,
|
|
62
|
+
output_token_cost: 14.0 / 1_000_000,
|
|
63
|
+
model_provider_settings: { supports_temperature: false },
|
|
64
|
+
},
|
|
51
65
|
{
|
|
52
66
|
key: :open_ai_gpt_5_1,
|
|
53
67
|
api_name: "gpt-5.1",
|
|
@@ -62,6 +76,20 @@ module Raif
|
|
|
62
76
|
output_token_cost: 10.0 / 1_000_000,
|
|
63
77
|
model_provider_settings: { supports_temperature: false },
|
|
64
78
|
},
|
|
79
|
+
{
|
|
80
|
+
key: :open_ai_gpt_5_4_mini,
|
|
81
|
+
api_name: "gpt-5.4-mini",
|
|
82
|
+
input_token_cost: 0.75 / 1_000_000,
|
|
83
|
+
output_token_cost: 4.5 / 1_000_000,
|
|
84
|
+
model_provider_settings: { supports_temperature: false },
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
key: :open_ai_gpt_5_4_nano,
|
|
88
|
+
api_name: "gpt-5.4-nano",
|
|
89
|
+
input_token_cost: 0.20 / 1_000_000,
|
|
90
|
+
output_token_cost: 1.25 / 1_000_000,
|
|
91
|
+
model_provider_settings: { supports_temperature: false },
|
|
92
|
+
},
|
|
65
93
|
{
|
|
66
94
|
key: :open_ai_gpt_5_mini,
|
|
67
95
|
api_name: "gpt-5-mini",
|
|
@@ -123,8 +151,8 @@ module Raif
|
|
|
123
151
|
{
|
|
124
152
|
key: :open_ai_o1_mini,
|
|
125
153
|
api_name: "o1-mini",
|
|
126
|
-
input_token_cost: 1.
|
|
127
|
-
output_token_cost:
|
|
154
|
+
input_token_cost: 1.1 / 1_000_000,
|
|
155
|
+
output_token_cost: 4.4 / 1_000_000,
|
|
128
156
|
model_provider_settings: { supports_temperature: false },
|
|
129
157
|
},
|
|
130
158
|
{
|
|
@@ -164,7 +192,7 @@ module Raif
|
|
|
164
192
|
# o1-mini is not supported by the OpenAI Responses API.
|
|
165
193
|
open_ai_responses_models.delete_if{|model| model[:key] == :open_ai_o1_mini }
|
|
166
194
|
|
|
167
|
-
#
|
|
195
|
+
# These models are not supported by the OpenAI Completions API, but are supported by the OpenAI Responses API.
|
|
168
196
|
open_ai_responses_models << {
|
|
169
197
|
key: :open_ai_responses_o1_pro,
|
|
170
198
|
api_name: "o1-pro",
|
|
@@ -181,10 +209,79 @@ module Raif
|
|
|
181
209
|
model_provider_settings: { supports_temperature: false },
|
|
182
210
|
}
|
|
183
211
|
|
|
212
|
+
open_ai_responses_models << {
|
|
213
|
+
key: :open_ai_responses_gpt_5_pro,
|
|
214
|
+
api_name: "gpt-5-pro",
|
|
215
|
+
input_token_cost: 15.0 / 1_000_000,
|
|
216
|
+
output_token_cost: 120.0 / 1_000_000,
|
|
217
|
+
model_provider_settings: { supports_temperature: false },
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
open_ai_responses_models << {
|
|
221
|
+
key: :open_ai_responses_gpt_5_2_pro,
|
|
222
|
+
api_name: "gpt-5.2-pro",
|
|
223
|
+
input_token_cost: 21.0 / 1_000_000,
|
|
224
|
+
output_token_cost: 168.0 / 1_000_000,
|
|
225
|
+
model_provider_settings: { supports_temperature: false },
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
open_ai_responses_models << {
|
|
229
|
+
key: :open_ai_responses_gpt_5_4_pro,
|
|
230
|
+
api_name: "gpt-5.4-pro",
|
|
231
|
+
input_token_cost: 30.0 / 1_000_000,
|
|
232
|
+
output_token_cost: 180.0 / 1_000_000,
|
|
233
|
+
model_provider_settings: { supports_temperature: false, supports_structured_outputs: false },
|
|
234
|
+
}
|
|
235
|
+
|
|
184
236
|
{
|
|
185
237
|
Raif::Llms::OpenAiCompletions => open_ai_models,
|
|
186
238
|
Raif::Llms::OpenAiResponses => open_ai_responses_models,
|
|
187
239
|
Raif::Llms::Anthropic => [
|
|
240
|
+
{
|
|
241
|
+
key: :anthropic_claude_4_7_opus,
|
|
242
|
+
api_name: "claude-opus-4-7",
|
|
243
|
+
input_token_cost: 5.0 / 1_000_000,
|
|
244
|
+
output_token_cost: 25.0 / 1_000_000,
|
|
245
|
+
max_completion_tokens: 128_000,
|
|
246
|
+
model_provider_settings: { supports_temperature: false },
|
|
247
|
+
supported_provider_managed_tools: [
|
|
248
|
+
Raif::ModelTools::ProviderManaged::WebSearch,
|
|
249
|
+
Raif::ModelTools::ProviderManaged::CodeExecution
|
|
250
|
+
]
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
key: :anthropic_claude_4_6_opus,
|
|
254
|
+
api_name: "claude-opus-4-6",
|
|
255
|
+
input_token_cost: 5.0 / 1_000_000,
|
|
256
|
+
output_token_cost: 25.0 / 1_000_000,
|
|
257
|
+
max_completion_tokens: 128_000,
|
|
258
|
+
supported_provider_managed_tools: [
|
|
259
|
+
Raif::ModelTools::ProviderManaged::WebSearch,
|
|
260
|
+
Raif::ModelTools::ProviderManaged::CodeExecution
|
|
261
|
+
]
|
|
262
|
+
},
|
|
263
|
+
{
|
|
264
|
+
key: :anthropic_claude_4_6_sonnet,
|
|
265
|
+
api_name: "claude-sonnet-4-6",
|
|
266
|
+
input_token_cost: 3.0 / 1_000_000,
|
|
267
|
+
output_token_cost: 15.0 / 1_000_000,
|
|
268
|
+
max_completion_tokens: 64_000,
|
|
269
|
+
supported_provider_managed_tools: [
|
|
270
|
+
Raif::ModelTools::ProviderManaged::WebSearch,
|
|
271
|
+
Raif::ModelTools::ProviderManaged::CodeExecution
|
|
272
|
+
]
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
key: :anthropic_claude_4_5_opus,
|
|
276
|
+
api_name: "claude-opus-4-5",
|
|
277
|
+
input_token_cost: 5.0 / 1_000_000,
|
|
278
|
+
output_token_cost: 25.0 / 1_000_000,
|
|
279
|
+
max_completion_tokens: 64_000,
|
|
280
|
+
supported_provider_managed_tools: [
|
|
281
|
+
Raif::ModelTools::ProviderManaged::WebSearch,
|
|
282
|
+
Raif::ModelTools::ProviderManaged::CodeExecution
|
|
283
|
+
]
|
|
284
|
+
},
|
|
188
285
|
{
|
|
189
286
|
key: :anthropic_claude_4_5_sonnet,
|
|
190
287
|
api_name: "claude-sonnet-4-5",
|
|
@@ -223,7 +320,7 @@ module Raif
|
|
|
223
320
|
api_name: "claude-opus-4-20250514",
|
|
224
321
|
input_token_cost: 15.0 / 1_000_000,
|
|
225
322
|
output_token_cost: 75.0 / 1_000_000,
|
|
226
|
-
max_completion_tokens:
|
|
323
|
+
max_completion_tokens: 32_000,
|
|
227
324
|
supported_provider_managed_tools: [
|
|
228
325
|
Raif::ModelTools::ProviderManaged::WebSearch,
|
|
229
326
|
Raif::ModelTools::ProviderManaged::CodeExecution
|
|
@@ -234,7 +331,7 @@ module Raif
|
|
|
234
331
|
api_name: "claude-sonnet-4-20250514",
|
|
235
332
|
input_token_cost: 3.0 / 1_000_000,
|
|
236
333
|
output_token_cost: 15.0 / 1_000_000,
|
|
237
|
-
max_completion_tokens:
|
|
334
|
+
max_completion_tokens: 64_000,
|
|
238
335
|
supported_provider_managed_tools: [
|
|
239
336
|
Raif::ModelTools::ProviderManaged::WebSearch,
|
|
240
337
|
Raif::ModelTools::ProviderManaged::CodeExecution
|
|
@@ -282,6 +379,34 @@ module Raif
|
|
|
282
379
|
},
|
|
283
380
|
],
|
|
284
381
|
Raif::Llms::Bedrock => [
|
|
382
|
+
{
|
|
383
|
+
key: :bedrock_claude_4_7_opus,
|
|
384
|
+
api_name: "anthropic.claude-opus-4-7",
|
|
385
|
+
input_token_cost: 0.005 / 1000,
|
|
386
|
+
output_token_cost: 0.025 / 1000,
|
|
387
|
+
max_completion_tokens: 128_000
|
|
388
|
+
},
|
|
389
|
+
{
|
|
390
|
+
key: :bedrock_claude_4_6_opus,
|
|
391
|
+
api_name: "anthropic.claude-opus-4-6-v1",
|
|
392
|
+
input_token_cost: 0.005 / 1000,
|
|
393
|
+
output_token_cost: 0.025 / 1000,
|
|
394
|
+
max_completion_tokens: 128_000
|
|
395
|
+
},
|
|
396
|
+
{
|
|
397
|
+
key: :bedrock_claude_4_6_sonnet,
|
|
398
|
+
api_name: "anthropic.claude-sonnet-4-6",
|
|
399
|
+
input_token_cost: 0.003 / 1000,
|
|
400
|
+
output_token_cost: 0.015 / 1000,
|
|
401
|
+
max_completion_tokens: 64_000
|
|
402
|
+
},
|
|
403
|
+
{
|
|
404
|
+
key: :bedrock_claude_4_5_opus,
|
|
405
|
+
api_name: "anthropic.claude-opus-4-5-20251101-v1:0",
|
|
406
|
+
input_token_cost: 0.005 / 1000,
|
|
407
|
+
output_token_cost: 0.025 / 1000,
|
|
408
|
+
max_completion_tokens: 64_000
|
|
409
|
+
},
|
|
285
410
|
{
|
|
286
411
|
key: :bedrock_claude_4_5_sonnet,
|
|
287
412
|
api_name: "anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
@@ -308,14 +433,14 @@ module Raif
|
|
|
308
433
|
api_name: "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
309
434
|
input_token_cost: 0.003 / 1000,
|
|
310
435
|
output_token_cost: 0.015 / 1000,
|
|
311
|
-
max_completion_tokens:
|
|
436
|
+
max_completion_tokens: 64_000
|
|
312
437
|
},
|
|
313
438
|
{
|
|
314
439
|
key: :bedrock_claude_4_opus,
|
|
315
440
|
api_name: "anthropic.claude-opus-4-20250514-v1:0",
|
|
316
441
|
input_token_cost: 0.015 / 1000,
|
|
317
442
|
output_token_cost: 0.075 / 1000,
|
|
318
|
-
max_completion_tokens:
|
|
443
|
+
max_completion_tokens: 32_000
|
|
319
444
|
},
|
|
320
445
|
{
|
|
321
446
|
key: :bedrock_claude_3_7_sonnet,
|
|
@@ -365,6 +490,34 @@ module Raif
|
|
|
365
490
|
input_token_cost: 0.0002625 / 1000,
|
|
366
491
|
output_token_cost: 0.0042 / 1000,
|
|
367
492
|
max_completion_tokens: 4096
|
|
493
|
+
},
|
|
494
|
+
{
|
|
495
|
+
key: :bedrock_deepseek_v3_2,
|
|
496
|
+
api_name: "deepseek.v3.2",
|
|
497
|
+
input_token_cost: 0.62 / 1_000_000,
|
|
498
|
+
output_token_cost: 1.85 / 1_000_000,
|
|
499
|
+
max_completion_tokens: 32_768
|
|
500
|
+
},
|
|
501
|
+
{
|
|
502
|
+
key: :bedrock_deepseek_r1,
|
|
503
|
+
api_name: "deepseek.r1-v1:0",
|
|
504
|
+
input_token_cost: 0.00135 / 1_000,
|
|
505
|
+
output_token_cost: 0.0054 / 1_000,
|
|
506
|
+
max_completion_tokens: 32_768
|
|
507
|
+
},
|
|
508
|
+
{
|
|
509
|
+
key: :bedrock_gpt_oss_120b,
|
|
510
|
+
api_name: "openai.gpt-oss-120b-1:0",
|
|
511
|
+
input_token_cost: 0.15 / 1_000_000,
|
|
512
|
+
output_token_cost: 0.60 / 1_000_000,
|
|
513
|
+
max_completion_tokens: 32_768
|
|
514
|
+
},
|
|
515
|
+
{
|
|
516
|
+
key: :bedrock_gpt_oss_20b,
|
|
517
|
+
api_name: "openai.gpt-oss-20b-1:0",
|
|
518
|
+
input_token_cost: 0.07 / 1_000_000,
|
|
519
|
+
output_token_cost: 0.30 / 1_000_000,
|
|
520
|
+
max_completion_tokens: 32_768
|
|
368
521
|
}
|
|
369
522
|
],
|
|
370
523
|
Raif::Llms::OpenRouter => [
|
|
@@ -377,8 +530,8 @@ module Raif
|
|
|
377
530
|
{
|
|
378
531
|
key: :open_router_deepseek_chat_v3,
|
|
379
532
|
api_name: "deepseek/deepseek-chat-v3-0324",
|
|
380
|
-
input_token_cost: 0.
|
|
381
|
-
output_token_cost:
|
|
533
|
+
input_token_cost: 0.20 / 1_000_000,
|
|
534
|
+
output_token_cost: 0.77 / 1_000_000,
|
|
382
535
|
},
|
|
383
536
|
{
|
|
384
537
|
key: :open_router_deepseek_v3_1,
|
|
@@ -386,6 +539,12 @@ module Raif
|
|
|
386
539
|
input_token_cost: 0.25 / 1_000_000,
|
|
387
540
|
output_token_cost: 1.0 / 1_000_000,
|
|
388
541
|
},
|
|
542
|
+
{
|
|
543
|
+
key: :open_router_deepseek_v3_2,
|
|
544
|
+
api_name: "deepseek/deepseek-v3.2",
|
|
545
|
+
input_token_cost: 0.26 / 1_000_000,
|
|
546
|
+
output_token_cost: 0.38 / 1_000_000,
|
|
547
|
+
},
|
|
389
548
|
{
|
|
390
549
|
key: :open_router_gemini_2_0_flash,
|
|
391
550
|
api_name: "google/gemini-2.0-flash-001",
|
|
@@ -410,6 +569,18 @@ module Raif
|
|
|
410
569
|
input_token_cost: 2.0 / 1_000_000,
|
|
411
570
|
output_token_cost: 12.0 / 1_000_000,
|
|
412
571
|
},
|
|
572
|
+
{
|
|
573
|
+
key: :open_router_gemini_3_1_pro_preview,
|
|
574
|
+
api_name: "google/gemini-3.1-pro-preview",
|
|
575
|
+
input_token_cost: 2.0 / 1_000_000,
|
|
576
|
+
output_token_cost: 12.0 / 1_000_000,
|
|
577
|
+
},
|
|
578
|
+
{
|
|
579
|
+
key: :open_router_gemini_3_1_flash_lite_preview,
|
|
580
|
+
api_name: "google/gemini-3.1-flash-lite-preview",
|
|
581
|
+
input_token_cost: 0.25 / 1_000_000,
|
|
582
|
+
output_token_cost: 1.5 / 1_000_000,
|
|
583
|
+
},
|
|
413
584
|
{
|
|
414
585
|
key: :open_router_grok_4,
|
|
415
586
|
api_name: "x-ai/grok-4",
|
|
@@ -428,6 +599,12 @@ module Raif
|
|
|
428
599
|
input_token_cost: 0.45 / 1_000_000,
|
|
429
600
|
output_token_cost: 2.35 / 1_000_000,
|
|
430
601
|
},
|
|
602
|
+
{
|
|
603
|
+
key: :open_router_kimi_k2_5,
|
|
604
|
+
api_name: "moonshotai/kimi-k2.5",
|
|
605
|
+
input_token_cost: 0.45 / 1_000_000,
|
|
606
|
+
output_token_cost: 2.20 / 1_000_000,
|
|
607
|
+
},
|
|
431
608
|
{
|
|
432
609
|
key: :open_router_llama_3_3_70b_instruct,
|
|
433
610
|
api_name: "meta-llama/llama-3.3-70b-instruct",
|
|
@@ -458,6 +635,18 @@ module Raif
|
|
|
458
635
|
input_token_cost: 0.255 / 1_000_000,
|
|
459
636
|
output_token_cost: 1.02 / 1_000_000,
|
|
460
637
|
},
|
|
638
|
+
{
|
|
639
|
+
key: :open_router_minimax_m2_1,
|
|
640
|
+
api_name: "minimax/minimax-m2.1",
|
|
641
|
+
input_token_cost: 0.27 / 1_000_000,
|
|
642
|
+
output_token_cost: 0.95 / 1_000_000,
|
|
643
|
+
},
|
|
644
|
+
{
|
|
645
|
+
key: :open_router_minimax_m2_5,
|
|
646
|
+
api_name: "minimax/minimax-m2.5",
|
|
647
|
+
input_token_cost: 0.27 / 1_000_000,
|
|
648
|
+
output_token_cost: 0.95 / 1_000_000,
|
|
649
|
+
},
|
|
461
650
|
{
|
|
462
651
|
key: :open_router_mistral_large_3_2512,
|
|
463
652
|
api_name: "mistralai/mistral-large-2512",
|
|
@@ -482,8 +671,40 @@ module Raif
|
|
|
482
671
|
input_token_cost: 0.05 / 1_000_000,
|
|
483
672
|
output_token_cost: 0.2 / 1_000_000,
|
|
484
673
|
},
|
|
674
|
+
{
|
|
675
|
+
key: :open_router_grok_4_20,
|
|
676
|
+
api_name: "x-ai/grok-4.20",
|
|
677
|
+
input_token_cost: 2.0 / 1_000_000,
|
|
678
|
+
output_token_cost: 6.0 / 1_000_000,
|
|
679
|
+
},
|
|
680
|
+
{
|
|
681
|
+
key: :open_router_google_gemma_4_31b_it,
|
|
682
|
+
api_name: "google/gemma-4-31b-it",
|
|
683
|
+
input_token_cost: 0.14 / 1_000_000,
|
|
684
|
+
output_token_cost: 0.40 / 1_000_000,
|
|
685
|
+
},
|
|
485
686
|
],
|
|
486
687
|
Raif::Llms::Google => [
|
|
688
|
+
{
|
|
689
|
+
key: :google_gemini_3_1_pro,
|
|
690
|
+
api_name: "gemini-3.1-pro-preview",
|
|
691
|
+
input_token_cost: 2.0 / 1_000_000,
|
|
692
|
+
output_token_cost: 12.0 / 1_000_000,
|
|
693
|
+
supported_provider_managed_tools: [
|
|
694
|
+
Raif::ModelTools::ProviderManaged::WebSearch,
|
|
695
|
+
Raif::ModelTools::ProviderManaged::CodeExecution
|
|
696
|
+
]
|
|
697
|
+
},
|
|
698
|
+
{
|
|
699
|
+
key: :google_gemini_3_1_flash_lite,
|
|
700
|
+
api_name: "gemini-3.1-flash-lite-preview",
|
|
701
|
+
input_token_cost: 0.25 / 1_000_000,
|
|
702
|
+
output_token_cost: 1.5 / 1_000_000,
|
|
703
|
+
supported_provider_managed_tools: [
|
|
704
|
+
Raif::ModelTools::ProviderManaged::WebSearch,
|
|
705
|
+
Raif::ModelTools::ProviderManaged::CodeExecution
|
|
706
|
+
]
|
|
707
|
+
},
|
|
487
708
|
{
|
|
488
709
|
key: :google_gemini_3_0_pro,
|
|
489
710
|
api_name: "gemini-3-pro-preview",
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Raif
|
|
4
|
+
class PromptStudioComparisonBuilder
|
|
5
|
+
# Attempts to rebuild the prompt from current code for a given record.
|
|
6
|
+
# Returns a hash with the rendered prompts and any warnings.
|
|
7
|
+
def self.build(record)
|
|
8
|
+
new(record).build
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def initialize(record)
|
|
12
|
+
@record = record
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def build
|
|
16
|
+
{
|
|
17
|
+
original_prompt: original_prompt,
|
|
18
|
+
original_system_prompt: original_system_prompt,
|
|
19
|
+
current_prompt: current_prompt,
|
|
20
|
+
current_system_prompt: current_system_prompt,
|
|
21
|
+
prompt_changed: changed?(original_prompt, current_prompt),
|
|
22
|
+
system_prompt_changed: changed?(original_system_prompt, current_system_prompt),
|
|
23
|
+
has_stale_references: has_stale_references?,
|
|
24
|
+
warnings: warnings,
|
|
25
|
+
original_prompt_tokens: original_prompt_tokens,
|
|
26
|
+
original_prompt_token_cost: original_prompt_token_cost,
|
|
27
|
+
current_prompt_token_estimate: current_prompt_token_estimate,
|
|
28
|
+
current_prompt_cost_estimate: current_prompt_cost_estimate
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def original_prompt
|
|
35
|
+
@original_prompt ||= @record.respond_to?(:prompt) ? @record.prompt : nil
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def original_system_prompt
|
|
39
|
+
@original_system_prompt ||= @record.system_prompt
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def current_prompt
|
|
43
|
+
return @current_prompt if defined?(@current_prompt)
|
|
44
|
+
|
|
45
|
+
@current_prompt = begin
|
|
46
|
+
@record.build_prompt
|
|
47
|
+
rescue NotImplementedError
|
|
48
|
+
nil
|
|
49
|
+
rescue => e
|
|
50
|
+
warnings << "Error rendering current prompt: #{e.message}"
|
|
51
|
+
nil
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def current_system_prompt
|
|
56
|
+
return @current_system_prompt if defined?(@current_system_prompt)
|
|
57
|
+
|
|
58
|
+
@current_system_prompt = begin
|
|
59
|
+
@record.build_system_prompt
|
|
60
|
+
rescue NotImplementedError
|
|
61
|
+
nil
|
|
62
|
+
rescue => e
|
|
63
|
+
warnings << "Error rendering current system prompt: #{e.message}"
|
|
64
|
+
nil
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def warnings
|
|
69
|
+
@warnings ||= [].tap do |w|
|
|
70
|
+
w << I18n.t("raif.admin.prompt_studio.common.warning_stale_reference") if has_stale_references?
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def has_stale_references?
|
|
75
|
+
return @has_stale_references if defined?(@has_stale_references)
|
|
76
|
+
|
|
77
|
+
@has_stale_references = detect_stale_references
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def detect_stale_references
|
|
81
|
+
return false unless @record.respond_to?(:run_with) && @record.run_with.present?
|
|
82
|
+
|
|
83
|
+
@record.run_with.each do |_key, value|
|
|
84
|
+
if value.is_a?(String) && value.start_with?("gid://")
|
|
85
|
+
begin
|
|
86
|
+
return true if GlobalID::Locator.locate(value).nil?
|
|
87
|
+
rescue StandardError
|
|
88
|
+
return true
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
false
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def changed?(original, current)
|
|
97
|
+
original.present? && current.present? && original.strip != current.strip
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def original_prompt_tokens
|
|
101
|
+
return unless @record.respond_to?(:raif_model_completion)
|
|
102
|
+
|
|
103
|
+
@record.raif_model_completion&.prompt_tokens
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def original_prompt_token_cost
|
|
107
|
+
return unless @record.respond_to?(:raif_model_completion)
|
|
108
|
+
|
|
109
|
+
@record.raif_model_completion&.prompt_token_cost
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def current_prompt_token_estimate
|
|
113
|
+
return unless prompt_changed? || system_prompt_changed?
|
|
114
|
+
|
|
115
|
+
Raif::TokenEstimator.estimate_tokens(current_system_prompt, current_prompt)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def current_prompt_cost_estimate
|
|
119
|
+
token_estimate = current_prompt_token_estimate
|
|
120
|
+
return unless token_estimate
|
|
121
|
+
|
|
122
|
+
return unless @record.llm_model_key.present?
|
|
123
|
+
|
|
124
|
+
llm_config = Raif.llm_config(@record.llm_model_key.to_sym)
|
|
125
|
+
return unless llm_config&.dig(:input_token_cost)
|
|
126
|
+
|
|
127
|
+
llm_config[:input_token_cost] * token_estimate
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def prompt_changed?
|
|
131
|
+
changed?(original_prompt, current_prompt)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def system_prompt_changed?
|
|
135
|
+
changed?(original_system_prompt, current_system_prompt)
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Raif
|
|
4
|
+
class TokenEstimator
|
|
5
|
+
def self.available?
|
|
6
|
+
return true if defined?(::Tiktoken)
|
|
7
|
+
|
|
8
|
+
require "tiktoken_ruby"
|
|
9
|
+
!!defined?(::Tiktoken)
|
|
10
|
+
rescue LoadError
|
|
11
|
+
false
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Estimates the total token count for a prompt + system prompt combination.
|
|
15
|
+
# Returns nil if tiktoken_ruby is not installed.
|
|
16
|
+
def self.estimate_tokens(*texts)
|
|
17
|
+
return unless available?
|
|
18
|
+
|
|
19
|
+
encoder = encoder_for_model("gpt-4")
|
|
20
|
+
texts.compact.sum { |text| encoder.encode(text).length }
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.encoder_for_model(model)
|
|
24
|
+
@encoders ||= {}
|
|
25
|
+
@encoders[model] ||= ::Tiktoken.encoding_for_model(model)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
data/lib/raif/version.rb
CHANGED
data/lib/raif.rb
CHANGED
|
@@ -11,6 +11,8 @@ require "raif/embedding_model_registry"
|
|
|
11
11
|
require "raif/json_schema_builder"
|
|
12
12
|
require "raif/migration_checker"
|
|
13
13
|
require "raif/messages"
|
|
14
|
+
require "raif/prompt_studio_comparison_builder"
|
|
15
|
+
require "raif/token_estimator"
|
|
14
16
|
|
|
15
17
|
require "faraday"
|
|
16
18
|
require "event_stream_parser"
|
|
@@ -6,7 +6,7 @@ module Raif
|
|
|
6
6
|
def stubbed_llm(llm_model_key, source_instance, &block)
|
|
7
7
|
test_llm = Raif.llm(llm_model_key.to_sym)
|
|
8
8
|
|
|
9
|
-
allow(test_llm).to receive(:perform_model_completion!) do |model_completion|
|
|
9
|
+
allow(test_llm).to receive(:perform_model_completion!) do |model_completion, &streaming_block|
|
|
10
10
|
result = block.call(model_completion.messages, model_completion, source_instance)
|
|
11
11
|
model_completion.raw_response = result if result.is_a?(String)
|
|
12
12
|
model_completion.completion_tokens = rand(100..2000)
|
|
@@ -14,6 +14,12 @@ module Raif
|
|
|
14
14
|
model_completion.total_tokens = model_completion.completion_tokens + model_completion.prompt_tokens
|
|
15
15
|
model_completion.save!
|
|
16
16
|
|
|
17
|
+
if streaming_block && result.is_a?(String)
|
|
18
|
+
result.chars.each_slice(25) do |chunk|
|
|
19
|
+
streaming_block.call(model_completion, chunk.join, nil)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
17
23
|
model_completion
|
|
18
24
|
end
|
|
19
25
|
|
data/spec/support/test_task.rb
CHANGED
|
@@ -30,6 +30,15 @@ class Raif::TestJsonTask < Raif::Task
|
|
|
30
30
|
end
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
+
class Raif::TestCachedTask < Raif::Task
|
|
34
|
+
enable_anthropic_prompt_caching
|
|
35
|
+
enable_bedrock_prompt_caching
|
|
36
|
+
|
|
37
|
+
def build_prompt
|
|
38
|
+
"Tell me a joke"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
33
42
|
class Raif::TestHtmlTask < Raif::Task
|
|
34
43
|
llm_response_format :html
|
|
35
44
|
llm_response_allowed_tags %w[p b i u s a]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class Raif::TestTemplateTask < Raif::Task
|
|
4
|
+
run_with :topic
|
|
5
|
+
|
|
6
|
+
after_initialize -> { self.topic ||= "pirates" }
|
|
7
|
+
|
|
8
|
+
def topic_description
|
|
9
|
+
"the topic of #{topic}"
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
class Raif::TestTemplateSystemPromptTask < Raif::Task
|
|
14
|
+
run_with :persona
|
|
15
|
+
|
|
16
|
+
after_initialize -> { self.persona ||= "comedian" }
|
|
17
|
+
|
|
18
|
+
def build_prompt
|
|
19
|
+
"Tell me a joke"
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
class Raif::TestTemplateConversation < Raif::Conversation
|
|
24
|
+
attr_writer :persona
|
|
25
|
+
|
|
26
|
+
def persona
|
|
27
|
+
@persona || "helpful assistant"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
class Raif::TestTemplateWithPartialTask < Raif::Task
|
|
32
|
+
run_with :topic
|
|
33
|
+
|
|
34
|
+
after_initialize -> { self.topic ||= "dogs" }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class Raif::TestTemplateAgent < Raif::Agent
|
|
38
|
+
run_with :agent_role
|
|
39
|
+
|
|
40
|
+
after_initialize -> { self.agent_role ||= "researcher" }
|
|
41
|
+
end
|