raif 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/app/assets/builds/raif_admin.css +40 -2
  4. data/app/assets/builds/raif_admin_sprockets.js +2709 -0
  5. data/app/assets/javascript/raif/admin/copy_to_clipboard_controller.js +132 -0
  6. data/app/assets/javascript/raif/admin/cost_estimate_controller.js +80 -0
  7. data/app/assets/javascript/raif/admin/judge_config_controller.js +23 -0
  8. data/app/assets/javascript/raif/admin/select_all_checkboxes_controller.js +33 -0
  9. data/app/assets/javascript/raif/admin/sortable_table_controller.js +51 -0
  10. data/app/assets/javascript/raif/admin/table_search_controller.js +15 -0
  11. data/app/assets/javascript/raif/admin/tom_select_controller.js +33 -0
  12. data/app/assets/javascript/raif_admin.js +23 -0
  13. data/app/assets/javascript/raif_admin_sprockets.js +24 -0
  14. data/app/assets/stylesheets/raif_admin.scss +50 -1
  15. data/app/controllers/raif/admin/agents_controller.rb +27 -1
  16. data/app/controllers/raif/admin/configs_controller.rb +1 -0
  17. data/app/controllers/raif/admin/llms_controller.rb +27 -0
  18. data/app/controllers/raif/admin/model_completions_controller.rb +6 -0
  19. data/app/controllers/raif/admin/prompt_studio/agents_controller.rb +25 -0
  20. data/app/controllers/raif/admin/prompt_studio/base_controller.rb +32 -0
  21. data/app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb +102 -0
  22. data/app/controllers/raif/admin/prompt_studio/conversations_controller.rb +25 -0
  23. data/app/controllers/raif/admin/prompt_studio/tasks_controller.rb +64 -0
  24. data/app/controllers/raif/admin/tasks_controller.rb +5 -0
  25. data/app/helpers/raif/application_helper.rb +40 -0
  26. data/app/jobs/raif/prompt_studio_batch_run_item_job.rb +11 -0
  27. data/app/jobs/raif/prompt_studio_batch_run_job.rb +15 -0
  28. data/app/jobs/raif/prompt_studio_task_run_job.rb +36 -0
  29. data/app/models/raif/agent.rb +36 -5
  30. data/app/models/raif/agents/native_tool_calling_agent.rb +101 -19
  31. data/app/models/raif/concerns/has_prompt_templates.rb +88 -0
  32. data/app/models/raif/concerns/has_runtime_duration.rb +41 -0
  33. data/app/models/raif/concerns/json_schema_definition.rb +16 -3
  34. data/app/models/raif/concerns/llm_prompt_caching.rb +20 -0
  35. data/app/models/raif/concerns/llms/anthropic/message_formatting.rb +6 -0
  36. data/app/models/raif/concerns/llms/anthropic/tool_formatting.rb +5 -1
  37. data/app/models/raif/concerns/llms/bedrock/message_formatting.rb +7 -0
  38. data/app/models/raif/concerns/llms/bedrock/tool_formatting.rb +4 -0
  39. data/app/models/raif/concerns/llms/google/message_formatting.rb +5 -2
  40. data/app/models/raif/concerns/llms/google/tool_formatting.rb +4 -0
  41. data/app/models/raif/concerns/llms/message_formatting.rb +30 -0
  42. data/app/models/raif/concerns/llms/open_ai_completions/response_tool_calls.rb +1 -1
  43. data/app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb +4 -0
  44. data/app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb +4 -0
  45. data/app/models/raif/concerns/provider_managed_tool_calls.rb +162 -0
  46. data/app/models/raif/conversation.rb +24 -3
  47. data/app/models/raif/conversation_entry.rb +6 -3
  48. data/app/models/raif/embedding_models/bedrock.rb +10 -1
  49. data/app/models/raif/embedding_models/google.rb +37 -0
  50. data/app/models/raif/evals/llm_judge.rb +70 -0
  51. data/{lib → app/models}/raif/evals/llm_judges/binary.rb +38 -0
  52. data/{lib → app/models}/raif/evals/llm_judges/comparative.rb +38 -0
  53. data/{lib → app/models}/raif/evals/llm_judges/scored.rb +38 -0
  54. data/{lib → app/models}/raif/evals/llm_judges/summarization.rb +38 -0
  55. data/app/models/raif/llm.rb +82 -7
  56. data/app/models/raif/llms/anthropic.rb +26 -4
  57. data/app/models/raif/llms/bedrock.rb +59 -5
  58. data/app/models/raif/llms/google.rb +28 -2
  59. data/app/models/raif/llms/open_ai_base.rb +4 -0
  60. data/app/models/raif/llms/open_ai_completions.rb +9 -2
  61. data/app/models/raif/llms/open_ai_responses.rb +9 -2
  62. data/app/models/raif/llms/open_router.rb +10 -3
  63. data/app/models/raif/model_completion.rb +75 -34
  64. data/app/models/raif/model_tool.rb +45 -3
  65. data/app/models/raif/model_tool_invocation.rb +31 -1
  66. data/app/models/raif/prompt_studio_batch_run.rb +155 -0
  67. data/app/models/raif/prompt_studio_batch_run_item.rb +220 -0
  68. data/app/models/raif/streaming_responses/bedrock.rb +60 -1
  69. data/app/models/raif/task.rb +30 -6
  70. data/app/views/layouts/raif/admin.html.erb +31 -1
  71. data/app/views/raif/admin/agents/_agent.html.erb +1 -0
  72. data/app/views/raif/admin/agents/index.html.erb +48 -0
  73. data/app/views/raif/admin/agents/show.html.erb +4 -0
  74. data/app/views/raif/admin/llms/index.html.erb +110 -0
  75. data/app/views/raif/admin/model_completions/_model_completion.html.erb +3 -7
  76. data/app/views/raif/admin/model_completions/index.html.erb +14 -1
  77. data/app/views/raif/admin/model_completions/show.html.erb +164 -55
  78. data/app/views/raif/admin/model_tool_invocations/index.html.erb +1 -1
  79. data/app/views/raif/admin/model_tool_invocations/show.html.erb +18 -0
  80. data/app/views/raif/admin/prompt_studio/agents/index.html.erb +56 -0
  81. data/app/views/raif/admin/prompt_studio/agents/show.html.erb +57 -0
  82. data/app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb +54 -0
  83. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb +76 -0
  84. data/app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb +27 -0
  85. data/app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb +35 -0
  86. data/app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb +78 -0
  87. data/app/views/raif/admin/prompt_studio/batch_runs/show.html.erb +49 -0
  88. data/app/views/raif/admin/prompt_studio/conversations/index.html.erb +48 -0
  89. data/app/views/raif/admin/prompt_studio/conversations/show.html.erb +36 -0
  90. data/app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb +17 -0
  91. data/app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb +87 -0
  92. data/app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb +54 -0
  93. data/app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb +145 -0
  94. data/app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb +12 -0
  95. data/app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb +58 -0
  96. data/app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb +22 -0
  97. data/app/views/raif/admin/prompt_studio/tasks/index.html.erb +35 -0
  98. data/app/views/raif/admin/prompt_studio/tasks/show.html.erb +19 -0
  99. data/app/views/raif/admin/tasks/_task.html.erb +1 -0
  100. data/app/views/raif/admin/tasks/index.html.erb +17 -5
  101. data/app/views/raif/admin/tasks/show.html.erb +20 -0
  102. data/app/views/raif/conversation_entries/_message.html.erb +10 -6
  103. data/config/importmap.rb +8 -0
  104. data/config/locales/admin.en.yml +128 -0
  105. data/config/locales/en.yml +36 -2
  106. data/config/routes.rb +8 -0
  107. data/db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb +7 -0
  108. data/db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb +27 -0
  109. data/db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb +24 -0
  110. data/db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb +8 -0
  111. data/lib/generators/raif/agent/agent_generator.rb +18 -0
  112. data/lib/generators/raif/agent/templates/agent.rb.tt +7 -5
  113. data/lib/generators/raif/agent/templates/system_prompt.erb.tt +3 -0
  114. data/lib/generators/raif/conversation/conversation_generator.rb +19 -1
  115. data/lib/generators/raif/conversation/templates/system_prompt.erb.tt +4 -0
  116. data/lib/generators/raif/install/templates/initializer.rb +68 -27
  117. data/lib/generators/raif/task/task_generator.rb +18 -0
  118. data/lib/generators/raif/task/templates/prompt.erb.tt +4 -0
  119. data/lib/generators/raif/task/templates/task.rb.tt +9 -8
  120. data/lib/raif/configuration.rb +10 -0
  121. data/lib/raif/embedding_model_registry.rb +8 -0
  122. data/lib/raif/engine.rb +16 -1
  123. data/lib/raif/errors/blank_response_error.rb +8 -0
  124. data/lib/raif/errors/prompt_template_error.rb +15 -0
  125. data/lib/raif/errors.rb +2 -0
  126. data/lib/raif/evals.rb +0 -6
  127. data/lib/raif/llm_registry.rb +230 -9
  128. data/lib/raif/prompt_studio_comparison_builder.rb +138 -0
  129. data/lib/raif/token_estimator.rb +28 -0
  130. data/lib/raif/version.rb +1 -1
  131. data/lib/raif.rb +2 -0
  132. data/spec/support/rspec_helpers.rb +7 -1
  133. data/spec/support/test_task.rb +9 -0
  134. data/spec/support/test_template_task.rb +41 -0
  135. metadata +65 -7
  136. data/lib/raif/evals/llm_judge.rb +0 -32
  137. /data/{lib → app/models}/raif/evals/scoring_rubric.rb +0 -0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: raif
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Roesch
@@ -152,11 +152,21 @@ files:
152
152
  - Rakefile
153
153
  - app/assets/builds/raif.css
154
154
  - app/assets/builds/raif_admin.css
155
+ - app/assets/builds/raif_admin_sprockets.js
155
156
  - app/assets/config/raif_manifest.js
156
157
  - app/assets/images/raif-logo-white.svg
157
158
  - app/assets/javascript/raif.js
159
+ - app/assets/javascript/raif/admin/copy_to_clipboard_controller.js
160
+ - app/assets/javascript/raif/admin/cost_estimate_controller.js
161
+ - app/assets/javascript/raif/admin/judge_config_controller.js
162
+ - app/assets/javascript/raif/admin/select_all_checkboxes_controller.js
163
+ - app/assets/javascript/raif/admin/sortable_table_controller.js
164
+ - app/assets/javascript/raif/admin/table_search_controller.js
165
+ - app/assets/javascript/raif/admin/tom_select_controller.js
158
166
  - app/assets/javascript/raif/controllers/conversations_controller.js
159
167
  - app/assets/javascript/raif/stream_actions/raif_scroll_to_bottom.js
168
+ - app/assets/javascript/raif_admin.js
169
+ - app/assets/javascript/raif_admin_sprockets.js
160
170
  - app/assets/stylesheets/raif.scss
161
171
  - app/assets/stylesheets/raif/admin/conversation.scss
162
172
  - app/assets/stylesheets/raif/admin/stats.scss
@@ -167,8 +177,14 @@ files:
167
177
  - app/controllers/raif/admin/application_controller.rb
168
178
  - app/controllers/raif/admin/configs_controller.rb
169
179
  - app/controllers/raif/admin/conversations_controller.rb
180
+ - app/controllers/raif/admin/llms_controller.rb
170
181
  - app/controllers/raif/admin/model_completions_controller.rb
171
182
  - app/controllers/raif/admin/model_tool_invocations_controller.rb
183
+ - app/controllers/raif/admin/prompt_studio/agents_controller.rb
184
+ - app/controllers/raif/admin/prompt_studio/base_controller.rb
185
+ - app/controllers/raif/admin/prompt_studio/batch_runs_controller.rb
186
+ - app/controllers/raif/admin/prompt_studio/conversations_controller.rb
187
+ - app/controllers/raif/admin/prompt_studio/tasks_controller.rb
172
188
  - app/controllers/raif/admin/stats/model_tool_invocations_controller.rb
173
189
  - app/controllers/raif/admin/stats/tasks_controller.rb
174
190
  - app/controllers/raif/admin/stats_controller.rb
@@ -180,6 +196,9 @@ files:
180
196
  - app/helpers/raif/shared/conversations_helper.rb
181
197
  - app/jobs/raif/application_job.rb
182
198
  - app/jobs/raif/conversation_entry_job.rb
199
+ - app/jobs/raif/prompt_studio_batch_run_item_job.rb
200
+ - app/jobs/raif/prompt_studio_batch_run_job.rb
201
+ - app/jobs/raif/prompt_studio_task_run_job.rb
183
202
  - app/models/raif/admin/task_stat.rb
184
203
  - app/models/raif/agent.rb
185
204
  - app/models/raif/agents/native_tool_calling_agent.rb
@@ -188,9 +207,12 @@ files:
188
207
  - app/models/raif/concerns/boolean_timestamp.rb
189
208
  - app/models/raif/concerns/has_available_model_tools.rb
190
209
  - app/models/raif/concerns/has_llm.rb
210
+ - app/models/raif/concerns/has_prompt_templates.rb
191
211
  - app/models/raif/concerns/has_requested_language.rb
212
+ - app/models/raif/concerns/has_runtime_duration.rb
192
213
  - app/models/raif/concerns/invokes_model_tools.rb
193
214
  - app/models/raif/concerns/json_schema_definition.rb
215
+ - app/models/raif/concerns/llm_prompt_caching.rb
194
216
  - app/models/raif/concerns/llm_response_parsing.rb
195
217
  - app/models/raif/concerns/llm_temperature.rb
196
218
  - app/models/raif/concerns/llms/anthropic/message_formatting.rb
@@ -210,12 +232,20 @@ files:
210
232
  - app/models/raif/concerns/llms/open_ai_responses/message_formatting.rb
211
233
  - app/models/raif/concerns/llms/open_ai_responses/response_tool_calls.rb
212
234
  - app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb
235
+ - app/models/raif/concerns/provider_managed_tool_calls.rb
213
236
  - app/models/raif/concerns/run_with.rb
214
237
  - app/models/raif/conversation.rb
215
238
  - app/models/raif/conversation_entry.rb
216
239
  - app/models/raif/embedding_model.rb
217
240
  - app/models/raif/embedding_models/bedrock.rb
241
+ - app/models/raif/embedding_models/google.rb
218
242
  - app/models/raif/embedding_models/open_ai.rb
243
+ - app/models/raif/evals/llm_judge.rb
244
+ - app/models/raif/evals/llm_judges/binary.rb
245
+ - app/models/raif/evals/llm_judges/comparative.rb
246
+ - app/models/raif/evals/llm_judges/scored.rb
247
+ - app/models/raif/evals/llm_judges/summarization.rb
248
+ - app/models/raif/evals/scoring_rubric.rb
219
249
  - app/models/raif/llm.rb
220
250
  - app/models/raif/llms/anthropic.rb
221
251
  - app/models/raif/llms/bedrock.rb
@@ -236,6 +266,8 @@ files:
236
266
  - app/models/raif/model_tools/provider_managed/image_generation.rb
237
267
  - app/models/raif/model_tools/provider_managed/web_search.rb
238
268
  - app/models/raif/model_tools/wikipedia_search.rb
269
+ - app/models/raif/prompt_studio_batch_run.rb
270
+ - app/models/raif/prompt_studio_batch_run_item.rb
239
271
  - app/models/raif/streaming_responses/anthropic.rb
240
272
  - app/models/raif/streaming_responses/bedrock.rb
241
273
  - app/models/raif/streaming_responses/google.rb
@@ -253,6 +285,7 @@ files:
253
285
  - app/views/raif/admin/conversations/_conversation_entry.html.erb
254
286
  - app/views/raif/admin/conversations/index.html.erb
255
287
  - app/views/raif/admin/conversations/show.html.erb
288
+ - app/views/raif/admin/llms/index.html.erb
256
289
  - app/views/raif/admin/model_completions/_model_completion.html.erb
257
290
  - app/views/raif/admin/model_completions/index.html.erb
258
291
  - app/views/raif/admin/model_completions/show.html.erb
@@ -261,6 +294,25 @@ files:
261
294
  - app/views/raif/admin/model_tool_invocations/show.html.erb
262
295
  - app/views/raif/admin/model_tools/_list.html.erb
263
296
  - app/views/raif/admin/model_tools/_model_tool.html.erb
297
+ - app/views/raif/admin/prompt_studio/agents/index.html.erb
298
+ - app/views/raif/admin/prompt_studio/agents/show.html.erb
299
+ - app/views/raif/admin/prompt_studio/batch_runs/_batch_run_item.html.erb
300
+ - app/views/raif/admin/prompt_studio/batch_runs/_judge_config_fields.html.erb
301
+ - app/views/raif/admin/prompt_studio/batch_runs/_judge_detail_modal.html.erb
302
+ - app/views/raif/admin/prompt_studio/batch_runs/_modal.html.erb
303
+ - app/views/raif/admin/prompt_studio/batch_runs/_progress.html.erb
304
+ - app/views/raif/admin/prompt_studio/batch_runs/show.html.erb
305
+ - app/views/raif/admin/prompt_studio/conversations/index.html.erb
306
+ - app/views/raif/admin/prompt_studio/conversations/show.html.erb
307
+ - app/views/raif/admin/prompt_studio/shared/_nav_tabs.html.erb
308
+ - app/views/raif/admin/prompt_studio/shared/_prompt_comparison.html.erb
309
+ - app/views/raif/admin/prompt_studio/shared/_type_filter.html.erb
310
+ - app/views/raif/admin/prompt_studio/tasks/_task_result.html.erb
311
+ - app/views/raif/admin/prompt_studio/tasks/_task_row.html.erb
312
+ - app/views/raif/admin/prompt_studio/tasks/_task_type_filter.html.erb
313
+ - app/views/raif/admin/prompt_studio/tasks/_tasks_table.html.erb
314
+ - app/views/raif/admin/prompt_studio/tasks/index.html.erb
315
+ - app/views/raif/admin/prompt_studio/tasks/show.html.erb
264
316
  - app/views/raif/admin/stats/_stats_tile.html.erb
265
317
  - app/views/raif/admin/stats/index.html.erb
266
318
  - app/views/raif/admin/stats/model_tool_invocations/index.html.erb
@@ -314,16 +366,22 @@ files:
314
366
  - db/migrate/20260119000000_add_failure_tracking_to_raif_model_completions.rb
315
367
  - db/migrate/20260119000001_add_completed_at_to_raif_model_completions.rb
316
368
  - db/migrate/20260119000002_add_started_at_to_raif_model_completions.rb
369
+ - db/migrate/20260307000000_add_prompt_studio_run_to_raif_tasks.rb
370
+ - db/migrate/20260308000000_create_raif_prompt_studio_batch_runs.rb
371
+ - db/migrate/20260308000001_create_raif_prompt_studio_batch_run_items.rb
372
+ - db/migrate/20260407000000_add_cache_token_columns_to_raif_model_completions.rb
317
373
  - exe/raif
318
374
  - lib/generators/raif/agent/agent_generator.rb
319
375
  - lib/generators/raif/agent/templates/agent.rb.tt
320
376
  - lib/generators/raif/agent/templates/agent_eval_set.rb.tt
321
377
  - lib/generators/raif/agent/templates/application_agent.rb.tt
378
+ - lib/generators/raif/agent/templates/system_prompt.erb.tt
322
379
  - lib/generators/raif/base_generator.rb
323
380
  - lib/generators/raif/conversation/conversation_generator.rb
324
381
  - lib/generators/raif/conversation/templates/application_conversation.rb.tt
325
382
  - lib/generators/raif/conversation/templates/conversation.rb.tt
326
383
  - lib/generators/raif/conversation/templates/conversation_eval_set.rb.tt
384
+ - lib/generators/raif/conversation/templates/system_prompt.erb.tt
327
385
  - lib/generators/raif/eval_set/eval_set_generator.rb
328
386
  - lib/generators/raif/eval_set/templates/eval_set.rb.tt
329
387
  - lib/generators/raif/evals/setup/setup_generator.rb
@@ -334,6 +392,7 @@ files:
334
392
  - lib/generators/raif/model_tool/templates/model_tool_invocation_partial.html.erb.tt
335
393
  - lib/generators/raif/task/task_generator.rb
336
394
  - lib/generators/raif/task/templates/application_task.rb.tt
395
+ - lib/generators/raif/task/templates/prompt.erb.tt
337
396
  - lib/generators/raif/task/templates/task.rb.tt
338
397
  - lib/generators/raif/task/templates/task_eval_set.rb.tt
339
398
  - lib/generators/raif/views_generator.rb
@@ -347,6 +406,7 @@ files:
347
406
  - lib/raif/engine.rb
348
407
  - lib/raif/errors.rb
349
408
  - lib/raif/errors/action_not_authorized_error.rb
409
+ - lib/raif/errors/blank_response_error.rb
350
410
  - lib/raif/errors/instance_dependent_schema_error.rb
351
411
  - lib/raif/errors/invalid_config_error.rb
352
412
  - lib/raif/errors/invalid_conversation_type_error.rb
@@ -354,6 +414,7 @@ files:
354
414
  - lib/raif/errors/invalid_model_image_input_error.rb
355
415
  - lib/raif/errors/invalid_user_tool_type_error.rb
356
416
  - lib/raif/errors/open_ai/json_schema_error.rb
417
+ - lib/raif/errors/prompt_template_error.rb
357
418
  - lib/raif/errors/streaming_error.rb
358
419
  - lib/raif/errors/unsupported_feature_error.rb
359
420
  - lib/raif/evals.rb
@@ -362,19 +423,15 @@ files:
362
423
  - lib/raif/evals/eval_sets/expectations.rb
363
424
  - lib/raif/evals/eval_sets/llm_judge_expectations.rb
364
425
  - lib/raif/evals/expectation_result.rb
365
- - lib/raif/evals/llm_judge.rb
366
- - lib/raif/evals/llm_judges/binary.rb
367
- - lib/raif/evals/llm_judges/comparative.rb
368
- - lib/raif/evals/llm_judges/scored.rb
369
- - lib/raif/evals/llm_judges/summarization.rb
370
426
  - lib/raif/evals/run.rb
371
- - lib/raif/evals/scoring_rubric.rb
372
427
  - lib/raif/json_schema_builder.rb
373
428
  - lib/raif/languages.rb
374
429
  - lib/raif/llm_registry.rb
375
430
  - lib/raif/messages.rb
376
431
  - lib/raif/migration_checker.rb
432
+ - lib/raif/prompt_studio_comparison_builder.rb
377
433
  - lib/raif/rspec.rb
434
+ - lib/raif/token_estimator.rb
378
435
  - lib/raif/utils.rb
379
436
  - lib/raif/utils/colors.rb
380
437
  - lib/raif/utils/html_fragment_processor.rb
@@ -391,6 +448,7 @@ files:
391
448
  - spec/support/test_llm.rb
392
449
  - spec/support/test_model_tool.rb
393
450
  - spec/support/test_task.rb
451
+ - spec/support/test_template_task.rb
394
452
  homepage: https://github.com/cultivatelabs/raif
395
453
  licenses:
396
454
  - MIT
@@ -1,32 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Raif
4
- module Evals
5
- class LlmJudge < Raif::Task
6
- # Set default temperature for consistent judging
7
- llm_temperature 0.0
8
-
9
- # Default to JSON response format for structured output
10
- llm_response_format :json
11
-
12
- run_with :content_to_judge # the content to judge
13
- run_with :additional_context # additional context to be provided to the judge
14
-
15
- def default_llm_model_key
16
- Raif.config.evals_default_llm_judge_model_key || super
17
- end
18
-
19
- def judgment_reasoning
20
- parsed_response["reasoning"] if completed?
21
- end
22
-
23
- def judgment_confidence
24
- parsed_response["confidence"] if completed?
25
- end
26
-
27
- def low_confidence?
28
- judgment_confidence && judgment_confidence < 0.5
29
- end
30
- end
31
- end
32
- end
File without changes