ruby_llm-evals 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +180 -8
- data/Rakefile +0 -2
- data/app/assets/stylesheets/ruby_llm/evals/application.css +15 -0
- data/app/assets/stylesheets/ruby_llm/evals/bulma.min.css +3 -0
- data/app/assets/stylesheets/ruby_llm/evals/json_editor.css +25 -0
- data/app/controllers/concerns/ruby_llm/evals/prompt_executions/prompt_execution_scoped.rb +19 -0
- data/app/controllers/ruby_llm/evals/application_controller.rb +14 -0
- data/app/controllers/ruby_llm/evals/prompt_executions/failures_controller.rb +15 -0
- data/app/controllers/ruby_llm/evals/prompt_executions/passages_controller.rb +15 -0
- data/app/controllers/ruby_llm/evals/prompt_executions/retries_controller.rb +16 -0
- data/app/controllers/ruby_llm/evals/prompts_controller.rb +87 -0
- data/app/controllers/ruby_llm/evals/runs_controller.rb +46 -0
- data/app/helpers/ruby_llm/evals/application_helper.rb +39 -0
- data/app/helpers/ruby_llm/evals/prompt_executions_helper.rb +6 -0
- data/app/helpers/ruby_llm/evals/prompts_helper.rb +37 -0
- data/app/helpers/ruby_llm/evals/runs_helper.rb +6 -0
- data/app/javascript/ruby_llm/evals/application.js +3 -0
- data/app/javascript/ruby_llm/evals/controllers/application.js +13 -0
- data/app/javascript/ruby_llm/evals/controllers/eval_type_selector_controller.js +37 -0
- data/app/javascript/ruby_llm/evals/controllers/file_input_controller.js +21 -0
- data/app/javascript/ruby_llm/evals/controllers/index.js +4 -0
- data/app/javascript/ruby_llm/evals/controllers/json_editor_controller.js +129 -0
- data/app/javascript/ruby_llm/evals/controllers/provider_model_controller.js +85 -0
- data/app/javascript/ruby_llm/evals/controllers/schema_selector_controller.js +31 -0
- data/app/jobs/ruby_llm/evals/application_job.rb +6 -0
- data/app/jobs/ruby_llm/evals/execute_sample_job.rb +26 -0
- data/app/jobs/ruby_llm/evals/perform_run_job.rb +21 -0
- data/app/mailers/ruby_llm/evals/application_mailer.rb +8 -0
- data/app/models/concerns/ruby_llm/evals/job_trackable.rb +15 -0
- data/app/models/ruby_llm/evals/application_record.rb +7 -0
- data/app/models/ruby_llm/evals/page.rb +53 -0
- data/app/models/ruby_llm/evals/prompt.rb +55 -0
- data/app/models/ruby_llm/evals/prompt_execution.rb +169 -0
- data/app/models/ruby_llm/evals/run.rb +45 -0
- data/app/models/ruby_llm/evals/sample.rb +20 -0
- data/app/schemas/ruby_llm/evals/judge_verdict_schema.rb +8 -0
- data/app/views/layouts/ruby_llm/evals/application.html.erb +29 -0
- data/app/views/ruby_llm/evals/application/_flashes.html.erb +9 -0
- data/app/views/ruby_llm/evals/application/_nav.html.erb +12 -0
- data/app/views/ruby_llm/evals/application/_pagination.html.erb +7 -0
- data/app/views/ruby_llm/evals/application/_tabs.html.erb +6 -0
- data/app/views/ruby_llm/evals/prompts/_filters.html.erb +15 -0
- data/app/views/ruby_llm/evals/prompts/_form.html.erb +104 -0
- data/app/views/ruby_llm/evals/prompts/_prompt.html.erb +14 -0
- data/app/views/ruby_llm/evals/prompts/compare.html.erb +90 -0
- data/app/views/ruby_llm/evals/prompts/edit.html.erb +5 -0
- data/app/views/ruby_llm/evals/prompts/index.html.erb +32 -0
- data/app/views/ruby_llm/evals/prompts/new.html.erb +5 -0
- data/app/views/ruby_llm/evals/prompts/show.html.erb +107 -0
- data/app/views/ruby_llm/evals/runs/_filters.html.erb +17 -0
- data/app/views/ruby_llm/evals/runs/_run.html.erb +13 -0
- data/app/views/ruby_llm/evals/runs/index.html.erb +30 -0
- data/app/views/ruby_llm/evals/runs/show.html.erb +188 -0
- data/app/views/ruby_llm/evals/samples/_form.html.erb +88 -0
- data/config/importmap.rb +13 -0
- data/config/locales/en.yml +7 -0
- data/config/routes.rb +20 -1
- data/db/migrate/20251022211228_create_ruby_llm_evals_prompts.rb +21 -0
- data/db/migrate/20251022211229_create_ruby_llm_evals_samples.rb +14 -0
- data/db/migrate/20251022211230_create_ruby_llm_evals_runs.rb +21 -0
- data/db/migrate/20251022211231_create_ruby_llm_evals_prompt_executions.rb +26 -0
- data/lib/activemodel/validations/json_validator.rb +14 -0
- data/lib/ruby_llm/evals/engine.rb +49 -1
- data/lib/ruby_llm/evals/version.rb +2 -2
- data/lib/ruby_llm/evals.rb +7 -3
- metadata +65 -6
- /data/lib/tasks/{ruby_llm/evals_tasks.rake → ruby_llm_evals_tasks.rake} +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
class CreateRubyLLMEvalsRuns < ActiveRecord::Migration[7.0]
|
|
2
|
+
def change
|
|
3
|
+
create_table :ruby_llm_evals_runs do |t|
|
|
4
|
+
t.references :ruby_llm_evals_prompt, null: false, foreign_key: true
|
|
5
|
+
t.string :active_job_id, null: false
|
|
6
|
+
t.timestamp :started_at
|
|
7
|
+
t.timestamp :ended_at
|
|
8
|
+
t.string :provider, null: false
|
|
9
|
+
t.string :model, null: false
|
|
10
|
+
t.float :temperature
|
|
11
|
+
t.json :params
|
|
12
|
+
t.json :tools
|
|
13
|
+
t.string :schema
|
|
14
|
+
t.json :schema_other
|
|
15
|
+
t.text :instructions
|
|
16
|
+
t.text :message
|
|
17
|
+
|
|
18
|
+
t.timestamps
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
class CreateRubyLLMEvalsPromptExecutions < ActiveRecord::Migration[7.0]
|
|
2
|
+
def change
|
|
3
|
+
create_table :ruby_llm_evals_prompt_executions do |t|
|
|
4
|
+
t.references :ruby_llm_evals_sample, null: false, foreign_key: true, index: { name: "index_rle_prompt_executions_on_rle_sample_id" }
|
|
5
|
+
t.references :ruby_llm_evals_run, null: false, foreign_key: true, index: { name: "index_rle_prompt_executions_on_rle_run_id" }
|
|
6
|
+
t.string :eval_type, null: false
|
|
7
|
+
t.text :expected_output
|
|
8
|
+
t.json :variables
|
|
9
|
+
t.integer :input
|
|
10
|
+
t.integer :output
|
|
11
|
+
t.text :message
|
|
12
|
+
t.boolean :passed
|
|
13
|
+
t.string :active_job_id, null: true
|
|
14
|
+
t.timestamp :started_at
|
|
15
|
+
t.timestamp :ended_at
|
|
16
|
+
t.text :error_message
|
|
17
|
+
t.string :judge_provider
|
|
18
|
+
t.string :judge_model
|
|
19
|
+
t.json :judge_message
|
|
20
|
+
t.integer :judge_input
|
|
21
|
+
t.integer :judge_output
|
|
22
|
+
|
|
23
|
+
t.timestamps
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module ActiveModel
|
|
2
|
+
module Validations
|
|
3
|
+
class JsonValidator < EachValidator
|
|
4
|
+
def validate_each(record, attribute, value)
|
|
5
|
+
return if value.blank?
|
|
6
|
+
return unless value.is_a? String
|
|
7
|
+
|
|
8
|
+
record.send :"#{attribute}=", JSON.parse(value)
|
|
9
|
+
rescue JSON::ParserError
|
|
10
|
+
record.errors.add attribute, :invalid_json
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -1,6 +1,54 @@
|
|
|
1
|
-
|
|
1
|
+
require "importmap-rails"
|
|
2
|
+
require "stimulus-rails"
|
|
3
|
+
require "turbo-rails"
|
|
4
|
+
|
|
5
|
+
module RubyLLM
|
|
2
6
|
module Evals
|
|
3
7
|
class Engine < ::Rails::Engine
|
|
8
|
+
isolate_namespace RubyLLM::Evals
|
|
9
|
+
|
|
10
|
+
INFLECTION_OVERRIDES = { "ruby_llm" => "RubyLLM" }.freeze
|
|
11
|
+
|
|
12
|
+
initializer "ruby_llm_evals.inflector", after: "ruby_llm.inflections", before: :set_autoload_paths do
|
|
13
|
+
ActiveSupport::Inflector.inflections(:en) do |inflections|
|
|
14
|
+
# The RubyLLM gem registers "RubyLLM" as an acronym in its railtie,
|
|
15
|
+
# which breaks underscore conversion (RubyLLM.underscore => "rubyllm").
|
|
16
|
+
# We need to remove it and use "LLM" as an acronym instead for proper conversion:
|
|
17
|
+
# * "ruby_llm".camelize => "RubyLLM" (not "RubyLlm")
|
|
18
|
+
# * "RubyLLM".underscore => "ruby_llm" (not "rubyllm")
|
|
19
|
+
inflections.acronyms.delete("rubyllm")
|
|
20
|
+
inflections.acronym("LLM")
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
Rails.autoloaders.each do |loader|
|
|
24
|
+
loader.inflector.inflect(INFLECTION_OVERRIDES)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
initializer "ruby_llm_evals.assets" do |app|
|
|
29
|
+
app.config.assets.paths << root.join("app/assets/stylesheets")
|
|
30
|
+
app.config.assets.paths << root.join("app/assets/images")
|
|
31
|
+
app.config.assets.paths << root.join("app/javascript")
|
|
32
|
+
|
|
33
|
+
app.config.assets.precompile += %w[
|
|
34
|
+
ruby_llm/evals/application.css
|
|
35
|
+
ruby_llm/evals/bulma.min.css
|
|
36
|
+
ruby_llm/evals/application.js
|
|
37
|
+
ruby_llm/evals/controllers/application.js
|
|
38
|
+
ruby_llm/evals/controllers/index.js
|
|
39
|
+
ruby_llm/evals/controllers/provider_model_controller.js
|
|
40
|
+
ruby_llm/evals/controllers/file_input_controller.js
|
|
41
|
+
]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
initializer "ruby_llm_evals.importmap", after: "importmap" do |app|
|
|
45
|
+
RubyLLM::Evals.importmap.draw(root.join("config/importmap.rb"))
|
|
46
|
+
RubyLLM::Evals.importmap.cache_sweeper(watches: root.join("app/javascript"))
|
|
47
|
+
|
|
48
|
+
ActiveSupport.on_load(:action_controller_base) do
|
|
49
|
+
before_action { RubyLLM::Evals.importmap.cache_sweeper.execute_if_updated }
|
|
50
|
+
end
|
|
51
|
+
end
|
|
4
52
|
end
|
|
5
53
|
end
|
|
6
54
|
end
|
data/lib/ruby_llm/evals.rb
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
|
-
require "
|
|
1
|
+
require "activemodel/validations/json_validator"
|
|
2
|
+
require "liquid"
|
|
3
|
+
require "ruby_llm"
|
|
2
4
|
require "ruby_llm/evals/engine"
|
|
5
|
+
require "ruby_llm/evals/version"
|
|
6
|
+
require "ruby_llm/schema"
|
|
3
7
|
|
|
4
|
-
module
|
|
8
|
+
module RubyLLM
|
|
5
9
|
module Evals
|
|
6
|
-
|
|
10
|
+
mattr_accessor :importmap, default: Importmap::Map.new
|
|
7
11
|
end
|
|
8
12
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby_llm-evals
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0
|
|
4
|
+
version: 0.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Patricio Mac Adden
|
|
8
8
|
- Fernando Martinez
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: importmap-rails
|
|
@@ -44,14 +44,14 @@ dependencies:
|
|
|
44
44
|
requirements:
|
|
45
45
|
- - ">="
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: 7.
|
|
47
|
+
version: 7.2.0
|
|
48
48
|
type: :runtime
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - ">="
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: 7.
|
|
54
|
+
version: 7.2.0
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: ruby_llm
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -118,11 +118,70 @@ extra_rdoc_files: []
|
|
|
118
118
|
files:
|
|
119
119
|
- README.md
|
|
120
120
|
- Rakefile
|
|
121
|
+
- app/assets/stylesheets/ruby_llm/evals/application.css
|
|
122
|
+
- app/assets/stylesheets/ruby_llm/evals/bulma.min.css
|
|
123
|
+
- app/assets/stylesheets/ruby_llm/evals/json_editor.css
|
|
124
|
+
- app/controllers/concerns/ruby_llm/evals/prompt_executions/prompt_execution_scoped.rb
|
|
125
|
+
- app/controllers/ruby_llm/evals/application_controller.rb
|
|
126
|
+
- app/controllers/ruby_llm/evals/prompt_executions/failures_controller.rb
|
|
127
|
+
- app/controllers/ruby_llm/evals/prompt_executions/passages_controller.rb
|
|
128
|
+
- app/controllers/ruby_llm/evals/prompt_executions/retries_controller.rb
|
|
129
|
+
- app/controllers/ruby_llm/evals/prompts_controller.rb
|
|
130
|
+
- app/controllers/ruby_llm/evals/runs_controller.rb
|
|
131
|
+
- app/helpers/ruby_llm/evals/application_helper.rb
|
|
132
|
+
- app/helpers/ruby_llm/evals/prompt_executions_helper.rb
|
|
133
|
+
- app/helpers/ruby_llm/evals/prompts_helper.rb
|
|
134
|
+
- app/helpers/ruby_llm/evals/runs_helper.rb
|
|
135
|
+
- app/javascript/ruby_llm/evals/application.js
|
|
136
|
+
- app/javascript/ruby_llm/evals/controllers/application.js
|
|
137
|
+
- app/javascript/ruby_llm/evals/controllers/eval_type_selector_controller.js
|
|
138
|
+
- app/javascript/ruby_llm/evals/controllers/file_input_controller.js
|
|
139
|
+
- app/javascript/ruby_llm/evals/controllers/index.js
|
|
140
|
+
- app/javascript/ruby_llm/evals/controllers/json_editor_controller.js
|
|
141
|
+
- app/javascript/ruby_llm/evals/controllers/provider_model_controller.js
|
|
142
|
+
- app/javascript/ruby_llm/evals/controllers/schema_selector_controller.js
|
|
143
|
+
- app/jobs/ruby_llm/evals/application_job.rb
|
|
144
|
+
- app/jobs/ruby_llm/evals/execute_sample_job.rb
|
|
145
|
+
- app/jobs/ruby_llm/evals/perform_run_job.rb
|
|
146
|
+
- app/mailers/ruby_llm/evals/application_mailer.rb
|
|
147
|
+
- app/models/concerns/ruby_llm/evals/job_trackable.rb
|
|
148
|
+
- app/models/ruby_llm/evals/application_record.rb
|
|
149
|
+
- app/models/ruby_llm/evals/page.rb
|
|
150
|
+
- app/models/ruby_llm/evals/prompt.rb
|
|
151
|
+
- app/models/ruby_llm/evals/prompt_execution.rb
|
|
152
|
+
- app/models/ruby_llm/evals/run.rb
|
|
153
|
+
- app/models/ruby_llm/evals/sample.rb
|
|
154
|
+
- app/schemas/ruby_llm/evals/judge_verdict_schema.rb
|
|
155
|
+
- app/views/layouts/ruby_llm/evals/application.html.erb
|
|
156
|
+
- app/views/ruby_llm/evals/application/_flashes.html.erb
|
|
157
|
+
- app/views/ruby_llm/evals/application/_nav.html.erb
|
|
158
|
+
- app/views/ruby_llm/evals/application/_pagination.html.erb
|
|
159
|
+
- app/views/ruby_llm/evals/application/_tabs.html.erb
|
|
160
|
+
- app/views/ruby_llm/evals/prompts/_filters.html.erb
|
|
161
|
+
- app/views/ruby_llm/evals/prompts/_form.html.erb
|
|
162
|
+
- app/views/ruby_llm/evals/prompts/_prompt.html.erb
|
|
163
|
+
- app/views/ruby_llm/evals/prompts/compare.html.erb
|
|
164
|
+
- app/views/ruby_llm/evals/prompts/edit.html.erb
|
|
165
|
+
- app/views/ruby_llm/evals/prompts/index.html.erb
|
|
166
|
+
- app/views/ruby_llm/evals/prompts/new.html.erb
|
|
167
|
+
- app/views/ruby_llm/evals/prompts/show.html.erb
|
|
168
|
+
- app/views/ruby_llm/evals/runs/_filters.html.erb
|
|
169
|
+
- app/views/ruby_llm/evals/runs/_run.html.erb
|
|
170
|
+
- app/views/ruby_llm/evals/runs/index.html.erb
|
|
171
|
+
- app/views/ruby_llm/evals/runs/show.html.erb
|
|
172
|
+
- app/views/ruby_llm/evals/samples/_form.html.erb
|
|
173
|
+
- config/importmap.rb
|
|
174
|
+
- config/locales/en.yml
|
|
121
175
|
- config/routes.rb
|
|
176
|
+
- db/migrate/20251022211228_create_ruby_llm_evals_prompts.rb
|
|
177
|
+
- db/migrate/20251022211229_create_ruby_llm_evals_samples.rb
|
|
178
|
+
- db/migrate/20251022211230_create_ruby_llm_evals_runs.rb
|
|
179
|
+
- db/migrate/20251022211231_create_ruby_llm_evals_prompt_executions.rb
|
|
180
|
+
- lib/activemodel/validations/json_validator.rb
|
|
122
181
|
- lib/ruby_llm/evals.rb
|
|
123
182
|
- lib/ruby_llm/evals/engine.rb
|
|
124
183
|
- lib/ruby_llm/evals/version.rb
|
|
125
|
-
- lib/tasks/
|
|
184
|
+
- lib/tasks/ruby_llm_evals_tasks.rake
|
|
126
185
|
homepage: https://github.com/sinaptia/ruby_llm-evals
|
|
127
186
|
licenses: []
|
|
128
187
|
metadata:
|
|
@@ -142,7 +201,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
142
201
|
- !ruby/object:Gem::Version
|
|
143
202
|
version: '0'
|
|
144
203
|
requirements: []
|
|
145
|
-
rubygems_version: 3.6.
|
|
204
|
+
rubygems_version: 3.6.9
|
|
146
205
|
specification_version: 4
|
|
147
206
|
summary: LLM evaluation engine for Rails.
|
|
148
207
|
test_files: []
|
|
File without changes
|