raif 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/app/assets/builds/raif_admin.css +40 -2
  4. data/app/assets/builds/raif_admin_sprockets.js +2709 -0
  5. data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
  6. data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
  7. data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
  8. data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
  9. data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
  10. data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
  11. data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
  12. data/app/assets/javascript/raif_admin.js +23 -0
  13. data/app/assets/javascript/raif_admin_sprockets.js +24 -0
  14. data/app/assets/stylesheets/raif_admin.scss +50 -1
  15. data/app/controllers/raif/admin/agents_controller.rb +27 -1
  16. data/app/controllers/raif/admin/configs_controller.rb +1 -0
  17. data/app/controllers/raif/admin/llms_controller.rb +27 -0
  18. data/app/controllers/raif/admin/model_completions_controller.rb +6 -0
  19. data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
  20. data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
  21. data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
  22. data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
  23. data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
  24. data/app/controllers/raif/admin/tasks_controller.rb +5 -0
  25. data/app/helpers/raif/application_helper.rb +40 -0
  26. data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
  27. data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
  28. data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
  29. data/app/models/raif/agent.rb +36 -5
  30. data/app/models/raif/agents/native_tool_calling_agent.rb +101 -19
  31. data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
  32. data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
  33. data/app/models/raif/concerns/json_schema_definition.rb +16 -3
  34. data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
  35. data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +6 -0
  36. data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +5 -1
  37. data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +7 -0
  38. data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +4 -0
  39. data/app/models/raif/concerns/llms/google/message_formatting.rb +5 -2
  40. data/app/models/raif/concerns/llms/google/tool_formatting.rb +4 -0
  41. data/app/models/raif/concerns/llms/message_formatting.rb +30 -0
  42. data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +1 -1
  43. data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +4 -0
  44. data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +4 -0
  45. data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
  46. data/app/models/raif/conversation.rb +24 -3
  47. data/app/models/raif/conversation_entry.rb +6 -3
  48. data/app/models/raif/embedding_models/bedrock.rb +10 -1
  49. data/app/models/raif/embedding_models/google.rb +37 -0
  50. data/app/models/raif/evals/llm_judge.rb +70 -0
  51. data/{lib → app/models}/raif/evals/llm_judges/binary.rb +38 -0
  52. data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +38 -0
  53. data/{lib → app/models}/raif/evals/llm_judges/scored.rb +38 -0
  54. data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +38 -0
  55. data/app/models/raif/llm.rb +82 -7
  56. data/app/models/raif/llms/anthropic.rb +26 -4
  57. data/app/models/raif/llms/bedrock.rb +59 -5
  58. data/app/models/raif/llms/google.rb +28 -2
  59. data/app/models/raif/llms/open_ai_base.rb +4 -0
  60. data/app/models/raif/llms/open_ai_completions.rb +9 -2
  61. data/app/models/raif/llms/open_ai_responses.rb +9 -2
  62. data/app/models/raif/llms/open_router.rb +10 -3
  63. data/app/models/raif/model_completion.rb +75 -34
  64. data/app/models/raif/model_tool.rb +45 -3
  65. data/app/models/raif/model_tool_invocation.rb +31 -1
  66. data/app/models/raif/prompt_studio_batch_run.rb +155 -0
  67. data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
  68. data/app/models/raif/streaming_responses/bedrock.rb +60 -1
  69. data/app/models/raif/task.rb +30 -6
  70. data/app/views/layouts/raif/admin.html.erb +31 -1
  71. data/app/views/raif/admin/agents/_agent.html.erb +1 -0
  72. data/app/views/raif/admin/agents/index.html.erb +48 -0
  73. data/app/views/raif/admin/agents/show.html.erb +4 -0
  74. data/app/views/raif/admin/llms/index.html.erb +110 -0
  75. data/app/views/raif/admin/model_completions/_model_completion.html.erb +3 -7
  76. data/app/views/raif/admin/model_completions/index.html.erb +14 -1
  77. data/app/views/raif/admin/model_completions/show.html.erb +164 -55
  78. data/app/views/raif/admin/model_tool_invocations/index.html.erb +1 -1
  79. data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
  80. data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
  81. data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
  82. data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
  83. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
  84. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
  85. data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
  86. data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
  87. data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
  88. data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
  89. data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
  90. data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
  91. data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
  92. data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
  93. data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
  94. data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
  95. data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
  96. data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
  97. data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
  98. data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
  99. data/app/views/raif/admin/tasks/_task.html.erb +1 -0
  100. data/app/views/raif/admin/tasks/index.html.erb +17 -5
  101. data/app/views/raif/admin/tasks/show.html.erb +20 -0
  102. data/app/views/raif/conversation_entries/_message.html.erb +10 -6
  103. data/config/importmap.rb +8 -0
  104. data/config/locales/admin.en.yml +128 -0
  105. data/config/locales/en.yml +36 -2
  106. data/config/routes.rb +8 -0
  107. data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
  108. data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
  109. data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
  110. data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
  111. data/lib/generators/raif/agent/agent_generator.rb +18 -0
  112. data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
  113. data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
  114. data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
  115. data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
  116. data/lib/generators/raif/install/templates/initializer.rb +68 -27
  117. data/lib/generators/raif/task/task_generator.rb +18 -0
  118. data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
  119. data/lib/generators/raif/task/templates/task.rb.tt +9 -8
  120. data/lib/raif/configuration.rb +10 -0
  121. data/lib/raif/embedding_model_registry.rb +8 -0
  122. data/lib/raif/engine.rb +16 -1
  123. data/lib/raif/errors/blank_response_error.rb +8 -0
  124. data/lib/raif/errors/prompt_template_error.rb +15 -0
  125. data/lib/raif/errors.rb +2 -0
  126. data/lib/raif/evals.rb +0 -6
  127. data/lib/raif/llm_registry.rb +230 -9
  128. data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
  129. data/lib/raif/token_estimator.rb +28 -0
  130. data/lib/raif/version.rb +1 -1
  131. data/lib/raif.rb +2 -0
  132. data/spec/support/rspec_helpers.rb +7 -1
  133. data/spec/support/test_task.rb +9 -0
  134. data/spec/support/test_template_task.rb +41 -0
  135. metadata +65 -7
  136. data/lib/raif/evals/llm_judge.rb +0 -32
  137. /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
@@ -41,6 +41,13 @@ module Raif
41
41
 
42
42
  def self.default_llms
43
43
  open_ai_models = [
44
+ {
45
+ key: :open_ai_gpt_5_4,
46
+ api_name: "gpt-5.4",
47
+ input_token_cost: 2.5 / 1_000_000,
48
+ output_token_cost: 15.0 / 1_000_000,
49
+ model_provider_settings: { supports_temperature: false },
50
+ },
44
51
  {
45
52
  key: :open_ai_gpt_5_2,
46
53
  api_name: "gpt-5.2",
@@ -48,6 +55,13 @@ module Raif
48
55
  output_token_cost: 14.0 / 1_000_000,
49
56
  model_provider_settings: { supports_temperature: false },
50
57
  },
58
+ {
59
+ key: :open_ai_gpt_5_3,
60
+ api_name: "gpt-5.3",
61
+ input_token_cost: 1.75 / 1_000_000,
62
+ output_token_cost: 14.0 / 1_000_000,
63
+ model_provider_settings: { supports_temperature: false },
64
+ },
51
65
  {
52
66
  key: :open_ai_gpt_5_1,
53
67
  api_name: "gpt-5.1",
@@ -62,6 +76,20 @@ module Raif
62
76
  output_token_cost: 10.0 / 1_000_000,
63
77
  model_provider_settings: { supports_temperature: false },
64
78
  },
79
+ {
80
+ key: :open_ai_gpt_5_4_mini,
81
+ api_name: "gpt-5.4-mini",
82
+ input_token_cost: 0.75 / 1_000_000,
83
+ output_token_cost: 4.5 / 1_000_000,
84
+ model_provider_settings: { supports_temperature: false },
85
+ },
86
+ {
87
+ key: :open_ai_gpt_5_4_nano,
88
+ api_name: "gpt-5.4-nano",
89
+ input_token_cost: 0.20 / 1_000_000,
90
+ output_token_cost: 1.25 / 1_000_000,
91
+ model_provider_settings: { supports_temperature: false },
92
+ },
65
93
  {
66
94
  key: :open_ai_gpt_5_mini,
67
95
  api_name: "gpt-5-mini",
@@ -123,8 +151,8 @@ module Raif
123
151
  {
124
152
  key: :open_ai_o1_mini,
125
153
  api_name: "o1-mini",
126
- input_token_cost: 1.5 / 1_000_000,
127
- output_token_cost: 6.0 / 1_000_000,
154
+ input_token_cost: 1.1 / 1_000_000,
155
+ output_token_cost: 4.4 / 1_000_000,
128
156
  model_provider_settings: { supports_temperature: false },
129
157
  },
130
158
  {
@@ -164,7 +192,7 @@ module Raif
164
192
  # o1-mini is not supported by the OpenAI Responses API.
165
193
  open_ai_responses_models.delete_if{|model| model[:key] == :open_ai_o1_mini }
166
194
 
167
- # o1-pro and o3-pro are not supported by the OpenAI Completions API, but it is supported by the OpenAI Responses API.
195
+ # These models are not supported by the OpenAI Completions API, but are supported by the OpenAI Responses API.
168
196
  open_ai_responses_models << {
169
197
  key: :open_ai_responses_o1_pro,
170
198
  api_name: "o1-pro",
@@ -181,10 +209,79 @@ module Raif
181
209
  model_provider_settings: { supports_temperature: false },
182
210
  }
183
211
 
212
+ open_ai_responses_models << {
213
+ key: :open_ai_responses_gpt_5_pro,
214
+ api_name: "gpt-5-pro",
215
+ input_token_cost: 15.0 / 1_000_000,
216
+ output_token_cost: 120.0 / 1_000_000,
217
+ model_provider_settings: { supports_temperature: false },
218
+ }
219
+
220
+ open_ai_responses_models << {
221
+ key: :open_ai_responses_gpt_5_2_pro,
222
+ api_name: "gpt-5.2-pro",
223
+ input_token_cost: 21.0 / 1_000_000,
224
+ output_token_cost: 168.0 / 1_000_000,
225
+ model_provider_settings: { supports_temperature: false },
226
+ }
227
+
228
+ open_ai_responses_models << {
229
+ key: :open_ai_responses_gpt_5_4_pro,
230
+ api_name: "gpt-5.4-pro",
231
+ input_token_cost: 30.0 / 1_000_000,
232
+ output_token_cost: 180.0 / 1_000_000,
233
+ model_provider_settings: { supports_temperature: false, supports_structured_outputs: false },
234
+ }
235
+
184
236
  {
185
237
  Raif::Llms::OpenAiCompletions => open_ai_models,
186
238
  Raif::Llms::OpenAiResponses => open_ai_responses_models,
187
239
  Raif::Llms::Anthropic => [
240
+ {
241
+ key: :anthropic_claude_4_7_opus,
242
+ api_name: "claude-opus-4-7",
243
+ input_token_cost: 5.0 / 1_000_000,
244
+ output_token_cost: 25.0 / 1_000_000,
245
+ max_completion_tokens: 128_000,
246
+ model_provider_settings: { supports_temperature: false },
247
+ supported_provider_managed_tools: [
248
+ Raif::ModelTools::ProviderManaged::WebSearch,
249
+ Raif::ModelTools::ProviderManaged::CodeExecution
250
+ ]
251
+ },
252
+ {
253
+ key: :anthropic_claude_4_6_opus,
254
+ api_name: "claude-opus-4-6",
255
+ input_token_cost: 5.0 / 1_000_000,
256
+ output_token_cost: 25.0 / 1_000_000,
257
+ max_completion_tokens: 128_000,
258
+ supported_provider_managed_tools: [
259
+ Raif::ModelTools::ProviderManaged::WebSearch,
260
+ Raif::ModelTools::ProviderManaged::CodeExecution
261
+ ]
262
+ },
263
+ {
264
+ key: :anthropic_claude_4_6_sonnet,
265
+ api_name: "claude-sonnet-4-6",
266
+ input_token_cost: 3.0 / 1_000_000,
267
+ output_token_cost: 15.0 / 1_000_000,
268
+ max_completion_tokens: 64_000,
269
+ supported_provider_managed_tools: [
270
+ Raif::ModelTools::ProviderManaged::WebSearch,
271
+ Raif::ModelTools::ProviderManaged::CodeExecution
272
+ ]
273
+ },
274
+ {
275
+ key: :anthropic_claude_4_5_opus,
276
+ api_name: "claude-opus-4-5",
277
+ input_token_cost: 5.0 / 1_000_000,
278
+ output_token_cost: 25.0 / 1_000_000,
279
+ max_completion_tokens: 64_000,
280
+ supported_provider_managed_tools: [
281
+ Raif::ModelTools::ProviderManaged::WebSearch,
282
+ Raif::ModelTools::ProviderManaged::CodeExecution
283
+ ]
284
+ },
188
285
  {
189
286
  key: :anthropic_claude_4_5_sonnet,
190
287
  api_name: "claude-sonnet-4-5",
@@ -223,7 +320,7 @@ module Raif
223
320
  api_name: "claude-opus-4-20250514",
224
321
  input_token_cost: 15.0 / 1_000_000,
225
322
  output_token_cost: 75.0 / 1_000_000,
226
- max_completion_tokens: 8192,
323
+ max_completion_tokens: 32_000,
227
324
  supported_provider_managed_tools: [
228
325
  Raif::ModelTools::ProviderManaged::WebSearch,
229
326
  Raif::ModelTools::ProviderManaged::CodeExecution
@@ -234,7 +331,7 @@ module Raif
234
331
  api_name: "claude-sonnet-4-20250514",
235
332
  input_token_cost: 3.0 / 1_000_000,
236
333
  output_token_cost: 15.0 / 1_000_000,
237
- max_completion_tokens: 8192,
334
+ max_completion_tokens: 64_000,
238
335
  supported_provider_managed_tools: [
239
336
  Raif::ModelTools::ProviderManaged::WebSearch,
240
337
  Raif::ModelTools::ProviderManaged::CodeExecution
@@ -282,6 +379,34 @@ module Raif
282
379
  },
283
380
  ],
284
381
  Raif::Llms::Bedrock => [
382
+ {
383
+ key: :bedrock_claude_4_7_opus,
384
+ api_name: "anthropic.claude-opus-4-7",
385
+ input_token_cost: 0.005 / 1000,
386
+ output_token_cost: 0.025 / 1000,
387
+ max_completion_tokens: 128_000
388
+ },
389
+ {
390
+ key: :bedrock_claude_4_6_opus,
391
+ api_name: "anthropic.claude-opus-4-6-v1",
392
+ input_token_cost: 0.005 / 1000,
393
+ output_token_cost: 0.025 / 1000,
394
+ max_completion_tokens: 128_000
395
+ },
396
+ {
397
+ key: :bedrock_claude_4_6_sonnet,
398
+ api_name: "anthropic.claude-sonnet-4-6",
399
+ input_token_cost: 0.003 / 1000,
400
+ output_token_cost: 0.015 / 1000,
401
+ max_completion_tokens: 64_000
402
+ },
403
+ {
404
+ key: :bedrock_claude_4_5_opus,
405
+ api_name: "anthropic.claude-opus-4-5-20251101-v1:0",
406
+ input_token_cost: 0.005 / 1000,
407
+ output_token_cost: 0.025 / 1000,
408
+ max_completion_tokens: 64_000
409
+ },
285
410
  {
286
411
  key: :bedrock_claude_4_5_sonnet,
287
412
  api_name: "anthropic.claude-sonnet-4-5-20250929-v1:0",
@@ -308,14 +433,14 @@ module Raif
308
433
  api_name: "anthropic.claude-sonnet-4-20250514-v1:0",
309
434
  input_token_cost: 0.003 / 1000,
310
435
  output_token_cost: 0.015 / 1000,
311
- max_completion_tokens: 8192
436
+ max_completion_tokens: 64_000
312
437
  },
313
438
  {
314
439
  key: :bedrock_claude_4_opus,
315
440
  api_name: "anthropic.claude-opus-4-20250514-v1:0",
316
441
  input_token_cost: 0.015 / 1000,
317
442
  output_token_cost: 0.075 / 1000,
318
- max_completion_tokens: 8192
443
+ max_completion_tokens: 32_000
319
444
  },
320
445
  {
321
446
  key: :bedrock_claude_3_7_sonnet,
@@ -365,6 +490,34 @@ module Raif
365
490
  input_token_cost: 0.0002625 / 1000,
366
491
  output_token_cost: 0.0042 / 1000,
367
492
  max_completion_tokens: 4096
493
+ },
494
+ {
495
+ key: :bedrock_deepseek_v3_2,
496
+ api_name: "deepseek.v3.2",
497
+ input_token_cost: 0.62 / 1_000_000,
498
+ output_token_cost: 1.85 / 1_000_000,
499
+ max_completion_tokens: 32_768
500
+ },
501
+ {
502
+ key: :bedrock_deepseek_r1,
503
+ api_name: "deepseek.r1-v1:0",
504
+ input_token_cost: 0.00135 / 1_000,
505
+ output_token_cost: 0.0054 / 1_000,
506
+ max_completion_tokens: 32_768
507
+ },
508
+ {
509
+ key: :bedrock_gpt_oss_120b,
510
+ api_name: "openai.gpt-oss-120b-1:0",
511
+ input_token_cost: 0.15 / 1_000_000,
512
+ output_token_cost: 0.60 / 1_000_000,
513
+ max_completion_tokens: 32_768
514
+ },
515
+ {
516
+ key: :bedrock_gpt_oss_20b,
517
+ api_name: "openai.gpt-oss-20b-1:0",
518
+ input_token_cost: 0.07 / 1_000_000,
519
+ output_token_cost: 0.30 / 1_000_000,
520
+ max_completion_tokens: 32_768
368
521
  }
369
522
  ],
370
523
  Raif::Llms::OpenRouter => [
@@ -377,8 +530,8 @@ module Raif
377
530
  {
378
531
  key: :open_router_deepseek_chat_v3,
379
532
  api_name: "deepseek/deepseek-chat-v3-0324",
380
- input_token_cost: 0.27 / 1_000_000,
381
- output_token_cost: 1.1 / 1_000_000,
533
+ input_token_cost: 0.20 / 1_000_000,
534
+ output_token_cost: 0.77 / 1_000_000,
382
535
  },
383
536
  {
384
537
  key: :open_router_deepseek_v3_1,
@@ -386,6 +539,12 @@ module Raif
386
539
  input_token_cost: 0.25 / 1_000_000,
387
540
  output_token_cost: 1.0 / 1_000_000,
388
541
  },
542
+ {
543
+ key: :open_router_deepseek_v3_2,
544
+ api_name: "deepseek/deepseek-v3.2",
545
+ input_token_cost: 0.26 / 1_000_000,
546
+ output_token_cost: 0.38 / 1_000_000,
547
+ },
389
548
  {
390
549
  key: :open_router_gemini_2_0_flash,
391
550
  api_name: "google/gemini-2.0-flash-001",
@@ -410,6 +569,18 @@ module Raif
410
569
  input_token_cost: 2.0 / 1_000_000,
411
570
  output_token_cost: 12.0 / 1_000_000,
412
571
  },
572
+ {
573
+ key: :open_router_gemini_3_1_pro_preview,
574
+ api_name: "google/gemini-3.1-pro-preview",
575
+ input_token_cost: 2.0 / 1_000_000,
576
+ output_token_cost: 12.0 / 1_000_000,
577
+ },
578
+ {
579
+ key: :open_router_gemini_3_1_flash_lite_preview,
580
+ api_name: "google/gemini-3.1-flash-lite-preview",
581
+ input_token_cost: 0.25 / 1_000_000,
582
+ output_token_cost: 1.5 / 1_000_000,
583
+ },
413
584
  {
414
585
  key: :open_router_grok_4,
415
586
  api_name: "x-ai/grok-4",
@@ -428,6 +599,12 @@ module Raif
428
599
  input_token_cost: 0.45 / 1_000_000,
429
600
  output_token_cost: 2.35 / 1_000_000,
430
601
  },
602
+ {
603
+ key: :open_router_kimi_k2_5,
604
+ api_name: "moonshotai/kimi-k2.5",
605
+ input_token_cost: 0.45 / 1_000_000,
606
+ output_token_cost: 2.20 / 1_000_000,
607
+ },
431
608
  {
432
609
  key: :open_router_llama_3_3_70b_instruct,
433
610
  api_name: "meta-llama/llama-3.3-70b-instruct",
@@ -458,6 +635,18 @@ module Raif
458
635
  input_token_cost: 0.255 / 1_000_000,
459
636
  output_token_cost: 1.02 / 1_000_000,
460
637
  },
638
+ {
639
+ key: :open_router_minimax_m2_1,
640
+ api_name: "minimax/minimax-m2.1",
641
+ input_token_cost: 0.27 / 1_000_000,
642
+ output_token_cost: 0.95 / 1_000_000,
643
+ },
644
+ {
645
+ key: :open_router_minimax_m2_5,
646
+ api_name: "minimax/minimax-m2.5",
647
+ input_token_cost: 0.27 / 1_000_000,
648
+ output_token_cost: 0.95 / 1_000_000,
649
+ },
461
650
  {
462
651
  key: :open_router_mistral_large_3_2512,
463
652
  api_name: "mistralai/mistral-large-2512",
@@ -482,8 +671,40 @@ module Raif
482
671
  input_token_cost: 0.05 / 1_000_000,
483
672
  output_token_cost: 0.2 / 1_000_000,
484
673
  },
674
+ {
675
+ key: :open_router_grok_4_20,
676
+ api_name: "x-ai/grok-4.20",
677
+ input_token_cost: 2.0 / 1_000_000,
678
+ output_token_cost: 6.0 / 1_000_000,
679
+ },
680
+ {
681
+ key: :open_router_google_gemma_4_31b_it,
682
+ api_name: "google/gemma-4-31b-it",
683
+ input_token_cost: 0.14 / 1_000_000,
684
+ output_token_cost: 0.40 / 1_000_000,
685
+ },
485
686
  ],
486
687
  Raif::Llms::Google => [
688
+ {
689
+ key: :google_gemini_3_1_pro,
690
+ api_name: "gemini-3.1-pro-preview",
691
+ input_token_cost: 2.0 / 1_000_000,
692
+ output_token_cost: 12.0 / 1_000_000,
693
+ supported_provider_managed_tools: [
694
+ Raif::ModelTools::ProviderManaged::WebSearch,
695
+ Raif::ModelTools::ProviderManaged::CodeExecution
696
+ ]
697
+ },
698
+ {
699
+ key: :google_gemini_3_1_flash_lite,
700
+ api_name: "gemini-3.1-flash-lite-preview",
701
+ input_token_cost: 0.25 / 1_000_000,
702
+ output_token_cost: 1.5 / 1_000_000,
703
+ supported_provider_managed_tools: [
704
+ Raif::ModelTools::ProviderManaged::WebSearch,
705
+ Raif::ModelTools::ProviderManaged::CodeExecution
706
+ ]
707
+ },
487
708
  {
488
709
  key: :google_gemini_3_0_pro,
489
710
  api_name: "gemini-3-pro-preview",
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Raif
4
+ class PromptStudioComparisonBuilder
5
+ # Attempts to rebuild the prompt from current code for a given record.
6
+ # Returns a hash with the rendered prompts and any warnings.
7
+ def self.build(record)
8
+ new(record).build
9
+ end
10
+
11
+ def initialize(record)
12
+ @record = record
13
+ end
14
+
15
+ def build
16
+ {
17
+ original_prompt: original_prompt,
18
+ original_system_prompt: original_system_prompt,
19
+ current_prompt: current_prompt,
20
+ current_system_prompt: current_system_prompt,
21
+ prompt_changed: changed?(original_prompt, current_prompt),
22
+ system_prompt_changed: changed?(original_system_prompt, current_system_prompt),
23
+ has_stale_references: has_stale_references?,
24
+ warnings: warnings,
25
+ original_prompt_tokens: original_prompt_tokens,
26
+ original_prompt_token_cost: original_prompt_token_cost,
27
+ current_prompt_token_estimate: current_prompt_token_estimate,
28
+ current_prompt_cost_estimate: current_prompt_cost_estimate
29
+ }
30
+ end
31
+
32
+ private
33
+
34
+ def original_prompt
35
+ @original_prompt ||= @record.respond_to?(:prompt) ? @record.prompt : nil
36
+ end
37
+
38
+ def original_system_prompt
39
+ @original_system_prompt ||= @record.system_prompt
40
+ end
41
+
42
+ def current_prompt
43
+ return @current_prompt if defined?(@current_prompt)
44
+
45
+ @current_prompt = begin
46
+ @record.build_prompt
47
+ rescue NotImplementedError
48
+ nil
49
+ rescue => e
50
+ warnings << "Error rendering current prompt: #{e.message}"
51
+ nil
52
+ end
53
+ end
54
+
55
+ def current_system_prompt
56
+ return @current_system_prompt if defined?(@current_system_prompt)
57
+
58
+ @current_system_prompt = begin
59
+ @record.build_system_prompt
60
+ rescue NotImplementedError
61
+ nil
62
+ rescue => e
63
+ warnings << "Error rendering current system prompt: #{e.message}"
64
+ nil
65
+ end
66
+ end
67
+
68
+ def warnings
69
+ @warnings ||= [].tap do |w|
70
+ w << I18n.t("raif.admin.prompt_studio.common.warning_stale_reference") if has_stale_references?
71
+ end
72
+ end
73
+
74
+ def has_stale_references?
75
+ return @has_stale_references if defined?(@has_stale_references)
76
+
77
+ @has_stale_references = detect_stale_references
78
+ end
79
+
80
+ def detect_stale_references
81
+ return false unless @record.respond_to?(:run_with) && @record.run_with.present?
82
+
83
+ @record.run_with.each do |_key, value|
84
+ if value.is_a?(String) && value.start_with?("gid://")
85
+ begin
86
+ return true if GlobalID::Locator.locate(value).nil?
87
+ rescue StandardError
88
+ return true
89
+ end
90
+ end
91
+ end
92
+
93
+ false
94
+ end
95
+
96
+ def changed?(original, current)
97
+ original.present? && current.present? && original.strip != current.strip
98
+ end
99
+
100
+ def original_prompt_tokens
101
+ return unless @record.respond_to?(:raif_model_completion)
102
+
103
+ @record.raif_model_completion&.prompt_tokens
104
+ end
105
+
106
+ def original_prompt_token_cost
107
+ return unless @record.respond_to?(:raif_model_completion)
108
+
109
+ @record.raif_model_completion&.prompt_token_cost
110
+ end
111
+
112
+ def current_prompt_token_estimate
113
+ return unless prompt_changed? || system_prompt_changed?
114
+
115
+ Raif::TokenEstimator.estimate_tokens(current_system_prompt, current_prompt)
116
+ end
117
+
118
+ def current_prompt_cost_estimate
119
+ token_estimate = current_prompt_token_estimate
120
+ return unless token_estimate
121
+
122
+ return unless @record.llm_model_key.present?
123
+
124
+ llm_config = Raif.llm_config(@record.llm_model_key.to_sym)
125
+ return unless llm_config&.dig(:input_token_cost)
126
+
127
+ llm_config[:input_token_cost] * token_estimate
128
+ end
129
+
130
+ def prompt_changed?
131
+ changed?(original_prompt, current_prompt)
132
+ end
133
+
134
+ def system_prompt_changed?
135
+ changed?(original_system_prompt, current_system_prompt)
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Raif
4
+ class TokenEstimator
5
+ def self.available?
6
+ return true if defined?(::Tiktoken)
7
+
8
+ require "tiktoken_ruby"
9
+ !!defined?(::Tiktoken)
10
+ rescue LoadError
11
+ false
12
+ end
13
+
14
+ # Estimates the total token count for a prompt + system prompt combination.
15
+ # Returns nil if tiktoken_ruby is not installed.
16
+ def self.estimate_tokens(*texts)
17
+ return unless available?
18
+
19
+ encoder = encoder_for_model("gpt-4")
20
+ texts.compact.sum { |text| encoder.encode(text).length }
21
+ end
22
+
23
+ def self.encoder_for_model(model)
24
+ @encoders ||= {}
25
+ @encoders[model] ||= ::Tiktoken.encoding_for_model(model)
26
+ end
27
+ end
28
+ end
data/lib/raif/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Raif
4
- VERSION = "1.4.0"
4
+ VERSION = "1.5.0"
5
5
  end
data/lib/raif.rb CHANGED
@@ -11,6 +11,8 @@ require "raif/embedding_model_registry"
11
11
  require "raif/json_schema_builder"
12
12
  require "raif/migration_checker"
13
13
  require "raif/messages"
14
+ require "raif/prompt_studio_comparison_builder"
15
+ require "raif/token_estimator"
14
16
 
15
17
  require "faraday"
16
18
  require "event_stream_parser"
@@ -6,7 +6,7 @@ module Raif
6
6
  def stubbed_llm(llm_model_key, source_instance, &block)
7
7
  test_llm = Raif.llm(llm_model_key.to_sym)
8
8
 
9
- allow(test_llm).to receive(:perform_model_completion!) do |model_completion|
9
+ allow(test_llm).to receive(:perform_model_completion!) do |model_completion, &streaming_block|
10
10
  result = block.call(model_completion.messages, model_completion, source_instance)
11
11
  model_completion.raw_response = result if result.is_a?(String)
12
12
  model_completion.completion_tokens = rand(100..2000)
@@ -14,6 +14,12 @@ module Raif
14
14
  model_completion.total_tokens = model_completion.completion_tokens + model_completion.prompt_tokens
15
15
  model_completion.save!
16
16
 
17
+ if streaming_block && result.is_a?(String)
18
+ result.chars.each_slice(25) do |chunk|
19
+ streaming_block.call(model_completion, chunk.join, nil)
20
+ end
21
+ end
22
+
17
23
  model_completion
18
24
  end
19
25
 
@@ -30,6 +30,15 @@ class Raif::TestJsonTask < Raif::Task
30
30
  end
31
31
  end
32
32
 
33
+ class Raif::TestCachedTask < Raif::Task
34
+ enable_anthropic_prompt_caching
35
+ enable_bedrock_prompt_caching
36
+
37
+ def build_prompt
38
+ "Tell me a joke"
39
+ end
40
+ end
41
+
33
42
  class Raif::TestHtmlTask < Raif::Task
34
43
  llm_response_format :html
35
44
  llm_response_allowed_tags %w[p b i u s a]
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Raif::TestTemplateTask < Raif::Task
4
+ run_with :topic
5
+
6
+ after_initialize -> { self.topic ||= "pirates" }
7
+
8
+ def topic_description
9
+ "the topic of #{topic}"
10
+ end
11
+ end
12
+
13
+ class Raif::TestTemplateSystemPromptTask < Raif::Task
14
+ run_with :persona
15
+
16
+ after_initialize -> { self.persona ||= "comedian" }
17
+
18
+ def build_prompt
19
+ "Tell me a joke"
20
+ end
21
+ end
22
+
23
+ class Raif::TestTemplateConversation < Raif::Conversation
24
+ attr_writer :persona
25
+
26
+ def persona
27
+ @persona || "helpful assistant"
28
+ end
29
+ end
30
+
31
+ class Raif::TestTemplateWithPartialTask < Raif::Task
32
+ run_with :topic
33
+
34
+ after_initialize -> { self.topic ||= "dogs" }
35
+ end
36
+
37
+ class Raif::TestTemplateAgent < Raif::Agent
38
+ run_with :agent_role
39
+
40
+ after_initialize -> { self.agent_role ||= "researcher" }
41
+ end