raif 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +29 -935
- data/app/assets/builds/raif_admin.css +5 -1
- data/app/assets/images/raif-logo-white.svg +8 -0
- data/app/assets/stylesheets/raif_admin.scss +4 -0
- data/app/jobs/raif/conversation_entry_job.rb +1 -1
- data/app/models/raif/agents/re_act_step.rb +1 -2
- data/app/models/raif/concerns/has_llm.rb +1 -1
- data/app/models/raif/concerns/task_run_args.rb +62 -0
- data/app/models/raif/conversation.rb +8 -0
- data/app/models/raif/conversation_entry.rb +6 -9
- data/app/models/raif/llm.rb +1 -1
- data/app/models/raif/llms/open_router.rb +47 -4
- data/app/models/raif/task.rb +22 -9
- data/app/views/layouts/raif/admin.html.erb +3 -1
- data/app/views/raif/conversation_entries/_form.html.erb +1 -1
- data/app/views/raif/conversations/_full_conversation.html.erb +3 -6
- data/app/views/raif/conversations/_initial_chat_message.html.erb +5 -0
- data/config/locales/en.yml +8 -0
- data/db/migrate/20250804013843_add_task_run_args_to_raif_tasks.rb +13 -0
- data/db/migrate/20250811171150_make_raif_task_creator_optional.rb +8 -0
- data/exe/raif +7 -0
- data/lib/generators/raif/agent/agent_generator.rb +22 -7
- data/lib/generators/raif/agent/templates/agent.rb.tt +20 -24
- data/lib/generators/raif/agent/templates/agent_eval_set.rb.tt +48 -0
- data/lib/generators/raif/agent/templates/application_agent.rb.tt +0 -2
- data/lib/generators/raif/base_generator.rb +19 -0
- data/lib/generators/raif/conversation/conversation_generator.rb +21 -2
- data/lib/generators/raif/conversation/templates/application_conversation.rb.tt +0 -2
- data/lib/generators/raif/conversation/templates/conversation.rb.tt +29 -33
- data/lib/generators/raif/conversation/templates/conversation_eval_set.rb.tt +70 -0
- data/lib/generators/raif/eval_set/eval_set_generator.rb +28 -0
- data/lib/generators/raif/eval_set/templates/eval_set.rb.tt +21 -0
- data/lib/generators/raif/evals/setup/setup_generator.rb +47 -0
- data/lib/generators/raif/install/install_generator.rb +15 -0
- data/lib/generators/raif/install/templates/initializer.rb +14 -3
- data/lib/generators/raif/model_tool/model_tool_generator.rb +5 -2
- data/lib/generators/raif/model_tool/templates/model_tool.rb.tt +78 -76
- data/lib/generators/raif/model_tool/templates/model_tool_invocation_partial.html.erb.tt +10 -0
- data/lib/generators/raif/task/task_generator.rb +22 -3
- data/lib/generators/raif/task/templates/application_task.rb.tt +0 -2
- data/lib/generators/raif/task/templates/task.rb.tt +55 -59
- data/lib/generators/raif/task/templates/task_eval_set.rb.tt +54 -0
- data/lib/raif/cli/base.rb +39 -0
- data/lib/raif/cli/evals.rb +47 -0
- data/lib/raif/cli/evals_setup.rb +27 -0
- data/lib/raif/cli.rb +67 -0
- data/lib/raif/configuration.rb +23 -9
- data/lib/raif/engine.rb +2 -1
- data/lib/raif/evals/eval.rb +30 -0
- data/lib/raif/evals/eval_set.rb +111 -0
- data/lib/raif/evals/eval_sets/expectations.rb +53 -0
- data/lib/raif/evals/eval_sets/llm_judge_expectations.rb +255 -0
- data/lib/raif/evals/expectation_result.rb +39 -0
- data/lib/raif/evals/llm_judge.rb +32 -0
- data/lib/raif/evals/llm_judges/binary.rb +94 -0
- data/lib/raif/evals/llm_judges/comparative.rb +89 -0
- data/lib/raif/evals/llm_judges/scored.rb +63 -0
- data/lib/raif/evals/llm_judges/summarization.rb +166 -0
- data/lib/raif/evals/run.rb +201 -0
- data/lib/raif/evals/scoring_rubric.rb +174 -0
- data/lib/raif/evals.rb +26 -0
- data/lib/raif/llm_registry.rb +33 -0
- data/lib/raif/migration_checker.rb +3 -3
- data/lib/raif/utils/colors.rb +23 -0
- data/lib/raif/utils.rb +1 -0
- data/lib/raif/version.rb +1 -1
- data/lib/raif.rb +4 -0
- data/spec/support/current_temperature_test_tool.rb +34 -0
- data/spec/support/test_conversation.rb +1 -1
- metadata +37 -3
@@ -0,0 +1,174 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Raif
|
4
|
+
module Evals
|
5
|
+
# ScoringRubric provides a standardized way to define evaluation criteria with
|
6
|
+
# multiple scoring levels. Each level can define either a score range or a single
|
7
|
+
# score value, along with descriptive text explaining what qualifies for that score.
|
8
|
+
#
|
9
|
+
# @example Creating a custom rubric
|
10
|
+
# rubric = ScoringRubric.new(
|
11
|
+
# name: :technical_accuracy,
|
12
|
+
# description: "Evaluates technical correctness and precision",
|
13
|
+
# levels: [
|
14
|
+
# { score_range: (9..10), description: "Technically perfect with no errors" },
|
15
|
+
# { score_range: (7..8), description: "Mostly correct with minor technical issues" },
|
16
|
+
# { score_range: (5..6), description: "Generally correct but some technical problems" },
|
17
|
+
# { score_range: (3..4), description: "Significant technical errors present" },
|
18
|
+
# { score_range: (0..2), description: "Technically incorrect or misleading" }
|
19
|
+
# ]
|
20
|
+
# )
|
21
|
+
#
|
22
|
+
# @example Integer scoring levels
|
23
|
+
# rubric = ScoringRubric.new(
|
24
|
+
# name: :technical_accuracy ,
|
25
|
+
# description: "Evaluates technical correctness and precision",
|
26
|
+
# levels: [
|
27
|
+
# { score: 5, description: "Technically perfect with no errors" },
|
28
|
+
# { score: 4, description: "Mostly correct with minor technical issues" },
|
29
|
+
# { score: 3, description: "Generally correct but some technical problems" },
|
30
|
+
# { score: 2, description: "Significant technical errors present" },
|
31
|
+
# { score: 1, description: "Mostly incorrect or misleading" },
|
32
|
+
# { score: 0, description: "Completely incorrect or misleading" }
|
33
|
+
# ]
|
34
|
+
# )
|
35
|
+
#
|
36
|
+
# @example Using built-in rubrics
|
37
|
+
# accuracy_rubric = ScoringRubric.accuracy
|
38
|
+
# helpfulness_rubric = ScoringRubric.helpfulness
|
39
|
+
# clarity_rubric = ScoringRubric.clarity
|
40
|
+
#
|
41
|
+
class ScoringRubric
|
42
|
+
# @return [Symbol] The rubric's identifier name
|
43
|
+
attr_reader :name
|
44
|
+
# @return [String] Human-readable description of what this rubric evaluates
|
45
|
+
attr_reader :description
|
46
|
+
# @return [Array<Hash>] Array of scoring level definitions
|
47
|
+
attr_reader :levels
|
48
|
+
|
49
|
+
# Creates a new ScoringRubric with the specified criteria.
|
50
|
+
#
|
51
|
+
# @param name [Symbol] Identifier for this rubric (e.g., :accuracy, :helpfulness)
|
52
|
+
# @param description [String] Human-readable description of what this rubric evaluates
|
53
|
+
# @param levels [Array<Hash>] Array of scoring level definitions. Each level must contain
|
54
|
+
# either :score (Integer) or :score_range (Range), plus :description (String)
|
55
|
+
def initialize(name:, description:, levels:)
|
56
|
+
@name = name
|
57
|
+
@description = description
|
58
|
+
@levels = levels
|
59
|
+
end
|
60
|
+
|
61
|
+
# Converts the rubric into a formatted string suitable for LLM prompts.
|
62
|
+
#
|
63
|
+
# The output includes the rubric description followed by a detailed breakdown
|
64
|
+
# of all scoring levels with their criteria.
|
65
|
+
#
|
66
|
+
# @return [String] Formatted rubric text ready for inclusion in prompts
|
67
|
+
#
|
68
|
+
# @example Output format
|
69
|
+
# "Evaluates factual correctness and precision
|
70
|
+
#
|
71
|
+
# Scoring levels:
|
72
|
+
# - 9-10: Completely accurate with no errors
|
73
|
+
# - 7-8: Mostly accurate with minor imprecisions
|
74
|
+
# - 5-6: Generally accurate but some notable errors"
|
75
|
+
#
|
76
|
+
# @raise [ArgumentError] If a level doesn't contain :score or :score_range
|
77
|
+
def to_prompt
|
78
|
+
prompt = "#{description}\n\nScoring levels:\n"
|
79
|
+
|
80
|
+
levels.each do |level|
|
81
|
+
if level.key?(:score)
|
82
|
+
score = level[:score]
|
83
|
+
prompt += "- #{score}: #{level[:description]}\n"
|
84
|
+
else
|
85
|
+
range = level[:score_range]
|
86
|
+
min, max = case range
|
87
|
+
when Range
|
88
|
+
[range.begin, range.exclude_end? ? range.end - 1 : range.end]
|
89
|
+
else
|
90
|
+
raise ArgumentError, "level must include :score or :score_range (Range)"
|
91
|
+
end
|
92
|
+
prompt += "- #{min}-#{max}: #{level[:description]}\n"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
prompt.strip
|
97
|
+
end
|
98
|
+
|
99
|
+
class << self
|
100
|
+
# Creates a rubric for evaluating factual accuracy and correctness.
|
101
|
+
#
|
102
|
+
# This rubric focuses on whether information is factually correct,
|
103
|
+
# precise, and free from errors or misconceptions.
|
104
|
+
#
|
105
|
+
# @return [ScoringRubric] Pre-configured accuracy rubric (1-5 scale)
|
106
|
+
#
|
107
|
+
# @example
|
108
|
+
# rubric = ScoringRubric.accuracy
|
109
|
+
# expect_llm_judge_score(response, scoring_rubric: rubric, min_passing_score: 4)
|
110
|
+
def accuracy
|
111
|
+
new(
|
112
|
+
name: :accuracy,
|
113
|
+
description: "Evaluates factual correctness and precision",
|
114
|
+
levels: [
|
115
|
+
{ score: 5, description: "Completely accurate with no errors" },
|
116
|
+
{ score: 4, description: "Mostly accurate with minor imprecisions" },
|
117
|
+
{ score: 3, description: "Generally accurate but some notable errors" },
|
118
|
+
{ score: 2, description: "Significant inaccuracies present" },
|
119
|
+
{ score: 1, description: "Mostly or entirely inaccurate" }
|
120
|
+
]
|
121
|
+
)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Creates a rubric for evaluating how well content addresses user needs.
|
125
|
+
#
|
126
|
+
# This rubric assesses whether the response is useful, relevant, and
|
127
|
+
# effectively helps the user accomplish their goals.
|
128
|
+
#
|
129
|
+
# @return [ScoringRubric] Pre-configured helpfulness rubric (1-5 scale)
|
130
|
+
#
|
131
|
+
# @example
|
132
|
+
# rubric = ScoringRubric.helpfulness
|
133
|
+
# expect_llm_judge_score(response, scoring_rubric: rubric, min_passing_score: 4)
|
134
|
+
def helpfulness
|
135
|
+
new(
|
136
|
+
name: :helpfulness,
|
137
|
+
description: "Evaluates how well the response addresses user needs",
|
138
|
+
levels: [
|
139
|
+
{ score: 5, description: "Extremely helpful, fully addresses the need" },
|
140
|
+
{ score: 4, description: "Very helpful with good coverage" },
|
141
|
+
{ score: 3, description: "Moderately helpful but missing some aspects" },
|
142
|
+
{ score: 2, description: "Somewhat helpful but significant gaps" },
|
143
|
+
{ score: 1, description: "Not helpful or misleading" }
|
144
|
+
]
|
145
|
+
)
|
146
|
+
end
|
147
|
+
|
148
|
+
# Creates a rubric for evaluating clarity and comprehensibility.
|
149
|
+
#
|
150
|
+
# This rubric focuses on how easy content is to understand, whether
|
151
|
+
# it's well-organized, and if the language is appropriate for the audience.
|
152
|
+
#
|
153
|
+
# @return [ScoringRubric] Pre-configured clarity rubric (1-5 scale)
|
154
|
+
#
|
155
|
+
# @example
|
156
|
+
# rubric = ScoringRubric.clarity
|
157
|
+
# expect_llm_judge_score(response, scoring_rubric: rubric, min_passing_score: 4)
|
158
|
+
def clarity
|
159
|
+
new(
|
160
|
+
name: :clarity,
|
161
|
+
description: "Evaluates clarity and comprehensibility",
|
162
|
+
levels: [
|
163
|
+
{ score: 5, description: "Crystal clear and easy to understand" },
|
164
|
+
{ score: 4, description: "Clear with minor ambiguities" },
|
165
|
+
{ score: 3, description: "Generally clear but some confusion" },
|
166
|
+
{ score: 2, description: "Unclear in significant ways" },
|
167
|
+
{ score: 1, description: "Very unclear or incomprehensible" }
|
168
|
+
]
|
169
|
+
)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
data/lib/raif/evals.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "raif/evals/expectation_result"
|
4
|
+
require "raif/evals/eval"
|
5
|
+
require "raif/evals/eval_set"
|
6
|
+
require "raif/evals/run"
|
7
|
+
require "raif/evals/llm_judge"
|
8
|
+
require "raif/evals/llm_judges/binary"
|
9
|
+
require "raif/evals/llm_judges/comparative"
|
10
|
+
require "raif/evals/llm_judges/scored"
|
11
|
+
require "raif/evals/llm_judges/summarization"
|
12
|
+
require "raif/evals/scoring_rubric"
|
13
|
+
|
14
|
+
module Raif
|
15
|
+
module Evals
|
16
|
+
# Namespace modules for organizing eval sets
|
17
|
+
module Tasks
|
18
|
+
end
|
19
|
+
|
20
|
+
module Conversations
|
21
|
+
end
|
22
|
+
|
23
|
+
module Agents
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/raif/llm_registry.rb
CHANGED
@@ -113,6 +113,27 @@ module Raif
|
|
113
113
|
output_token_cost: 4.4 / 1_000_000,
|
114
114
|
model_provider_settings: { supports_temperature: false },
|
115
115
|
},
|
116
|
+
{
|
117
|
+
key: :open_ai_gpt_5,
|
118
|
+
api_name: "gpt-5",
|
119
|
+
input_token_cost: 1.25 / 1_000_000,
|
120
|
+
output_token_cost: 10.0 / 1_000_000,
|
121
|
+
model_provider_settings: { supports_temperature: false },
|
122
|
+
},
|
123
|
+
{
|
124
|
+
key: :open_ai_gpt_5_mini,
|
125
|
+
api_name: "gpt-5-mini",
|
126
|
+
input_token_cost: 0.25 / 1_000_000,
|
127
|
+
output_token_cost: 2.0 / 1_000_000,
|
128
|
+
model_provider_settings: { supports_temperature: false },
|
129
|
+
},
|
130
|
+
{
|
131
|
+
key: :open_ai_gpt_5_nano,
|
132
|
+
api_name: "gpt-5-nano",
|
133
|
+
input_token_cost: 0.05 / 1_000_000,
|
134
|
+
output_token_cost: 0.4 / 1_000_000,
|
135
|
+
model_provider_settings: { supports_temperature: false },
|
136
|
+
}
|
116
137
|
]
|
117
138
|
|
118
139
|
open_ai_responses_models = open_ai_models.dup.map.with_index do |model, _index|
|
@@ -321,6 +342,18 @@ module Raif
|
|
321
342
|
input_token_cost: 0.27 / 1_000_000,
|
322
343
|
output_token_cost: 1.1 / 1_000_000,
|
323
344
|
},
|
345
|
+
{
|
346
|
+
key: :open_router_open_ai_gpt_oss_120b,
|
347
|
+
api_name: "gpt-oss-120b",
|
348
|
+
input_token_cost: 0.15 / 1_000_000,
|
349
|
+
output_token_cost: 0.6 / 1_000_000,
|
350
|
+
},
|
351
|
+
{
|
352
|
+
key: :open_router_open_ai_gpt_oss_20b,
|
353
|
+
api_name: "gpt-oss-20b",
|
354
|
+
input_token_cost: 0.05 / 1_000_000,
|
355
|
+
output_token_cost: 0.2 / 1_000_000,
|
356
|
+
}
|
324
357
|
]
|
325
358
|
}
|
326
359
|
end
|
@@ -53,8 +53,7 @@ module Raif
|
|
53
53
|
end
|
54
54
|
|
55
55
|
def build_warning_message(uninstalled_migration_names)
|
56
|
-
<<~WARNING
|
57
|
-
\e[33m
|
56
|
+
msg = <<~WARNING
|
58
57
|
⚠️ RAIF MIGRATION WARNING ⚠️
|
59
58
|
|
60
59
|
The following Raif migrations have not been run in your application:
|
@@ -66,8 +65,9 @@ module Raif
|
|
66
65
|
rails raif:install:migrations
|
67
66
|
rails db:migrate
|
68
67
|
|
69
|
-
\e[0m
|
70
68
|
WARNING
|
69
|
+
|
70
|
+
Raif::Utils::Colors.yellow(msg)
|
71
71
|
end
|
72
72
|
end
|
73
73
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Raif
|
4
|
+
module Utils
|
5
|
+
module Colors
|
6
|
+
def self.green(text)
|
7
|
+
"\e[32m#{text}\e[0m"
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.red(text)
|
11
|
+
"\e[31m#{text}\e[0m"
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.yellow(text)
|
15
|
+
"\e[33m#{text}\e[0m"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.blue(text)
|
19
|
+
"\e[34m#{text}\e[0m"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/raif/utils.rb
CHANGED
data/lib/raif/version.rb
CHANGED
data/lib/raif.rb
CHANGED
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Raif::ModelTools::CurrentTemperatureTestTool < Raif::ModelTool
|
4
|
+
tool_arguments_schema do
|
5
|
+
string :zip_code, description: "The zip code to get the current temperature for"
|
6
|
+
end
|
7
|
+
|
8
|
+
tool_description do
|
9
|
+
"A tool to get the current temperature for a given zip code"
|
10
|
+
end
|
11
|
+
|
12
|
+
class << self
|
13
|
+
def process_invocation(tool_invocation)
|
14
|
+
tool_invocation.update!(
|
15
|
+
result: {
|
16
|
+
temperature: 72
|
17
|
+
}
|
18
|
+
)
|
19
|
+
|
20
|
+
tool_invocation.result
|
21
|
+
end
|
22
|
+
|
23
|
+
def triggers_observation_to_model?
|
24
|
+
true
|
25
|
+
end
|
26
|
+
|
27
|
+
def observation_for_invocation(tool_invocation)
|
28
|
+
zip_code = tool_invocation.tool_arguments["zip_code"]
|
29
|
+
temperature = tool_invocation.result["temperature"]
|
30
|
+
|
31
|
+
"The current temperature for zip code #{zip_code} is #{temperature} degrees Fahrenheit."
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: raif
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Roesch
|
8
8
|
- Brian Leslie
|
9
|
-
bindir:
|
9
|
+
bindir: exe
|
10
10
|
cert_chain: []
|
11
11
|
date: 1980-01-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
@@ -142,7 +142,8 @@ description: Raif (Ruby AI Framework) is a Rails engine that helps you add AI-po
|
|
142
142
|
email:
|
143
143
|
- ben@cultivatelabs.com
|
144
144
|
- brian@cultivatelabs.com
|
145
|
-
executables:
|
145
|
+
executables:
|
146
|
+
- raif
|
146
147
|
extensions: []
|
147
148
|
extra_rdoc_files: []
|
148
149
|
files:
|
@@ -152,6 +153,7 @@ files:
|
|
152
153
|
- app/assets/builds/raif.css
|
153
154
|
- app/assets/builds/raif_admin.css
|
154
155
|
- app/assets/config/raif_manifest.js
|
156
|
+
- app/assets/images/raif-logo-white.svg
|
155
157
|
- app/assets/javascript/raif.js
|
156
158
|
- app/assets/javascript/raif/controllers/conversations_controller.js
|
157
159
|
- app/assets/javascript/raif/stream_actions/raif_scroll_to_bottom.js
|
@@ -198,6 +200,7 @@ files:
|
|
198
200
|
- app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb
|
199
201
|
- app/models/raif/concerns/llms/open_ai_responses/message_formatting.rb
|
200
202
|
- app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb
|
203
|
+
- app/models/raif/concerns/task_run_args.rb
|
201
204
|
- app/models/raif/conversation.rb
|
202
205
|
- app/models/raif/conversation_entry.rb
|
203
206
|
- app/models/raif/embedding_model.rb
|
@@ -260,6 +263,7 @@ files:
|
|
260
263
|
- app/views/raif/conversation_entries/new.turbo_stream.erb
|
261
264
|
- app/views/raif/conversations/_available_user_tools.html.erb
|
262
265
|
- app/views/raif/conversations/_full_conversation.html.erb
|
266
|
+
- app/views/raif/conversations/_initial_chat_message.html.erb
|
263
267
|
- app/views/raif/conversations/show.html.erb
|
264
268
|
- config/i18n-tasks.yml
|
265
269
|
- config/importmap.rb
|
@@ -276,21 +280,36 @@ files:
|
|
276
280
|
- db/migrate/20250527213016_add_response_id_and_response_array_to_model_completions.rb
|
277
281
|
- db/migrate/20250603140622_add_citations_to_raif_model_completions.rb
|
278
282
|
- db/migrate/20250603202013_add_stream_response_to_raif_model_completions.rb
|
283
|
+
- db/migrate/20250804013843_add_task_run_args_to_raif_tasks.rb
|
284
|
+
- db/migrate/20250811171150_make_raif_task_creator_optional.rb
|
285
|
+
- exe/raif
|
279
286
|
- lib/generators/raif/agent/agent_generator.rb
|
280
287
|
- lib/generators/raif/agent/templates/agent.rb.tt
|
288
|
+
- lib/generators/raif/agent/templates/agent_eval_set.rb.tt
|
281
289
|
- lib/generators/raif/agent/templates/application_agent.rb.tt
|
290
|
+
- lib/generators/raif/base_generator.rb
|
282
291
|
- lib/generators/raif/conversation/conversation_generator.rb
|
283
292
|
- lib/generators/raif/conversation/templates/application_conversation.rb.tt
|
284
293
|
- lib/generators/raif/conversation/templates/conversation.rb.tt
|
294
|
+
- lib/generators/raif/conversation/templates/conversation_eval_set.rb.tt
|
295
|
+
- lib/generators/raif/eval_set/eval_set_generator.rb
|
296
|
+
- lib/generators/raif/eval_set/templates/eval_set.rb.tt
|
297
|
+
- lib/generators/raif/evals/setup/setup_generator.rb
|
285
298
|
- lib/generators/raif/install/install_generator.rb
|
286
299
|
- lib/generators/raif/install/templates/initializer.rb
|
287
300
|
- lib/generators/raif/model_tool/model_tool_generator.rb
|
288
301
|
- lib/generators/raif/model_tool/templates/model_tool.rb.tt
|
302
|
+
- lib/generators/raif/model_tool/templates/model_tool_invocation_partial.html.erb.tt
|
289
303
|
- lib/generators/raif/task/task_generator.rb
|
290
304
|
- lib/generators/raif/task/templates/application_task.rb.tt
|
291
305
|
- lib/generators/raif/task/templates/task.rb.tt
|
306
|
+
- lib/generators/raif/task/templates/task_eval_set.rb.tt
|
292
307
|
- lib/generators/raif/views_generator.rb
|
293
308
|
- lib/raif.rb
|
309
|
+
- lib/raif/cli.rb
|
310
|
+
- lib/raif/cli/base.rb
|
311
|
+
- lib/raif/cli/evals.rb
|
312
|
+
- lib/raif/cli/evals_setup.rb
|
294
313
|
- lib/raif/configuration.rb
|
295
314
|
- lib/raif/embedding_model_registry.rb
|
296
315
|
- lib/raif/engine.rb
|
@@ -304,18 +323,33 @@ files:
|
|
304
323
|
- lib/raif/errors/open_ai/json_schema_error.rb
|
305
324
|
- lib/raif/errors/streaming_error.rb
|
306
325
|
- lib/raif/errors/unsupported_feature_error.rb
|
326
|
+
- lib/raif/evals.rb
|
327
|
+
- lib/raif/evals/eval.rb
|
328
|
+
- lib/raif/evals/eval_set.rb
|
329
|
+
- lib/raif/evals/eval_sets/expectations.rb
|
330
|
+
- lib/raif/evals/eval_sets/llm_judge_expectations.rb
|
331
|
+
- lib/raif/evals/expectation_result.rb
|
332
|
+
- lib/raif/evals/llm_judge.rb
|
333
|
+
- lib/raif/evals/llm_judges/binary.rb
|
334
|
+
- lib/raif/evals/llm_judges/comparative.rb
|
335
|
+
- lib/raif/evals/llm_judges/scored.rb
|
336
|
+
- lib/raif/evals/llm_judges/summarization.rb
|
337
|
+
- lib/raif/evals/run.rb
|
338
|
+
- lib/raif/evals/scoring_rubric.rb
|
307
339
|
- lib/raif/json_schema_builder.rb
|
308
340
|
- lib/raif/languages.rb
|
309
341
|
- lib/raif/llm_registry.rb
|
310
342
|
- lib/raif/migration_checker.rb
|
311
343
|
- lib/raif/rspec.rb
|
312
344
|
- lib/raif/utils.rb
|
345
|
+
- lib/raif/utils/colors.rb
|
313
346
|
- lib/raif/utils/html_fragment_processor.rb
|
314
347
|
- lib/raif/utils/html_to_markdown_converter.rb
|
315
348
|
- lib/raif/utils/readable_content_extractor.rb
|
316
349
|
- lib/raif/version.rb
|
317
350
|
- lib/tasks/raif_tasks.rake
|
318
351
|
- spec/support/complex_test_tool.rb
|
352
|
+
- spec/support/current_temperature_test_tool.rb
|
319
353
|
- spec/support/rspec_helpers.rb
|
320
354
|
- spec/support/test_conversation.rb
|
321
355
|
- spec/support/test_embedding_model.rb
|