ruby_llm-evals 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +180 -8
  3. data/Rakefile +0 -2
  4. data/app/assets/stylesheets/ruby_llm/evals/application.css +15 -0
  5. data/app/assets/stylesheets/ruby_llm/evals/bulma.min.css +3 -0
  6. data/app/assets/stylesheets/ruby_llm/evals/json_editor.css +25 -0
  7. data/app/controllers/concerns/ruby_llm/evals/prompt_executions/prompt_execution_scoped.rb +19 -0
  8. data/app/controllers/ruby_llm/evals/application_controller.rb +14 -0
  9. data/app/controllers/ruby_llm/evals/prompt_executions/failures_controller.rb +15 -0
  10. data/app/controllers/ruby_llm/evals/prompt_executions/passages_controller.rb +15 -0
  11. data/app/controllers/ruby_llm/evals/prompt_executions/retries_controller.rb +16 -0
  12. data/app/controllers/ruby_llm/evals/prompts_controller.rb +87 -0
  13. data/app/controllers/ruby_llm/evals/runs_controller.rb +46 -0
  14. data/app/helpers/ruby_llm/evals/application_helper.rb +39 -0
  15. data/app/helpers/ruby_llm/evals/prompt_executions_helper.rb +6 -0
  16. data/app/helpers/ruby_llm/evals/prompts_helper.rb +37 -0
  17. data/app/helpers/ruby_llm/evals/runs_helper.rb +6 -0
  18. data/app/javascript/ruby_llm/evals/application.js +3 -0
  19. data/app/javascript/ruby_llm/evals/controllers/application.js +13 -0
  20. data/app/javascript/ruby_llm/evals/controllers/eval_type_selector_controller.js +37 -0
  21. data/app/javascript/ruby_llm/evals/controllers/file_input_controller.js +21 -0
  22. data/app/javascript/ruby_llm/evals/controllers/index.js +4 -0
  23. data/app/javascript/ruby_llm/evals/controllers/json_editor_controller.js +129 -0
  24. data/app/javascript/ruby_llm/evals/controllers/provider_model_controller.js +85 -0
  25. data/app/javascript/ruby_llm/evals/controllers/schema_selector_controller.js +31 -0
  26. data/app/jobs/ruby_llm/evals/application_job.rb +6 -0
  27. data/app/jobs/ruby_llm/evals/execute_sample_job.rb +26 -0
  28. data/app/jobs/ruby_llm/evals/perform_run_job.rb +21 -0
  29. data/app/mailers/ruby_llm/evals/application_mailer.rb +8 -0
  30. data/app/models/concerns/ruby_llm/evals/job_trackable.rb +15 -0
  31. data/app/models/ruby_llm/evals/application_record.rb +7 -0
  32. data/app/models/ruby_llm/evals/page.rb +53 -0
  33. data/app/models/ruby_llm/evals/prompt.rb +55 -0
  34. data/app/models/ruby_llm/evals/prompt_execution.rb +169 -0
  35. data/app/models/ruby_llm/evals/run.rb +45 -0
  36. data/app/models/ruby_llm/evals/sample.rb +20 -0
  37. data/app/schemas/ruby_llm/evals/judge_verdict_schema.rb +8 -0
  38. data/app/views/layouts/ruby_llm/evals/application.html.erb +29 -0
  39. data/app/views/ruby_llm/evals/application/_flashes.html.erb +9 -0
  40. data/app/views/ruby_llm/evals/application/_nav.html.erb +12 -0
  41. data/app/views/ruby_llm/evals/application/_pagination.html.erb +7 -0
  42. data/app/views/ruby_llm/evals/application/_tabs.html.erb +6 -0
  43. data/app/views/ruby_llm/evals/prompts/_filters.html.erb +15 -0
  44. data/app/views/ruby_llm/evals/prompts/_form.html.erb +104 -0
  45. data/app/views/ruby_llm/evals/prompts/_prompt.html.erb +14 -0
  46. data/app/views/ruby_llm/evals/prompts/compare.html.erb +90 -0
  47. data/app/views/ruby_llm/evals/prompts/edit.html.erb +5 -0
  48. data/app/views/ruby_llm/evals/prompts/index.html.erb +32 -0
  49. data/app/views/ruby_llm/evals/prompts/new.html.erb +5 -0
  50. data/app/views/ruby_llm/evals/prompts/show.html.erb +107 -0
  51. data/app/views/ruby_llm/evals/runs/_filters.html.erb +17 -0
  52. data/app/views/ruby_llm/evals/runs/_run.html.erb +13 -0
  53. data/app/views/ruby_llm/evals/runs/index.html.erb +30 -0
  54. data/app/views/ruby_llm/evals/runs/show.html.erb +188 -0
  55. data/app/views/ruby_llm/evals/samples/_form.html.erb +88 -0
  56. data/config/importmap.rb +13 -0
  57. data/config/locales/en.yml +7 -0
  58. data/config/routes.rb +20 -1
  59. data/db/migrate/20251022211228_create_ruby_llm_evals_prompts.rb +21 -0
  60. data/db/migrate/20251022211229_create_ruby_llm_evals_samples.rb +14 -0
  61. data/db/migrate/20251022211230_create_ruby_llm_evals_runs.rb +21 -0
  62. data/db/migrate/20251022211231_create_ruby_llm_evals_prompt_executions.rb +26 -0
  63. data/lib/activemodel/validations/json_validator.rb +14 -0
  64. data/lib/ruby_llm/evals/engine.rb +49 -1
  65. data/lib/ruby_llm/evals/version.rb +2 -2
  66. data/lib/ruby_llm/evals.rb +7 -3
  67. metadata +65 -6
  68. /data/lib/tasks/{ruby_llm/evals_tasks.rake → ruby_llm_evals_tasks.rake} +0 -0
@@ -0,0 +1,21 @@
1
+ class CreateRubyLLMEvalsRuns < ActiveRecord::Migration[7.0]
2
+ def change
3
+ create_table :ruby_llm_evals_runs do |t|
4
+ t.references :ruby_llm_evals_prompt, null: false, foreign_key: true
5
+ t.string :active_job_id, null: false
6
+ t.timestamp :started_at
7
+ t.timestamp :ended_at
8
+ t.string :provider, null: false
9
+ t.string :model, null: false
10
+ t.float :temperature
11
+ t.json :params
12
+ t.json :tools
13
+ t.string :schema
14
+ t.json :schema_other
15
+ t.text :instructions
16
+ t.text :message
17
+
18
+ t.timestamps
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,26 @@
1
+ class CreateRubyLLMEvalsPromptExecutions < ActiveRecord::Migration[7.0]
2
+ def change
3
+ create_table :ruby_llm_evals_prompt_executions do |t|
4
+ t.references :ruby_llm_evals_sample, null: false, foreign_key: true, index: { name: "index_rle_prompt_executions_on_rle_sample_id" }
5
+ t.references :ruby_llm_evals_run, null: false, foreign_key: true, index: { name: "index_rle_prompt_executions_on_rle_run_id" }
6
+ t.string :eval_type, null: false
7
+ t.text :expected_output
8
+ t.json :variables
9
+ t.integer :input
10
+ t.integer :output
11
+ t.text :message
12
+ t.boolean :passed
13
+ t.string :active_job_id, null: true
14
+ t.timestamp :started_at
15
+ t.timestamp :ended_at
16
+ t.text :error_message
17
+ t.string :judge_provider
18
+ t.string :judge_model
19
+ t.json :judge_message
20
+ t.integer :judge_input
21
+ t.integer :judge_output
22
+
23
+ t.timestamps
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,14 @@
1
+ module ActiveModel
2
+ module Validations
3
+ class JsonValidator < EachValidator
4
+ def validate_each(record, attribute, value)
5
+ return if value.blank?
6
+ return unless value.is_a? String
7
+
8
+ record.send :"#{attribute}=", JSON.parse(value)
9
+ rescue JSON::ParserError
10
+ record.errors.add attribute, :invalid_json
11
+ end
12
+ end
13
+ end
14
+ end
@@ -1,6 +1,54 @@
1
- module RubyLlm
1
+ require "importmap-rails"
2
+ require "stimulus-rails"
3
+ require "turbo-rails"
4
+
5
+ module RubyLLM
2
6
  module Evals
3
7
  class Engine < ::Rails::Engine
8
+ isolate_namespace RubyLLM::Evals
9
+
10
+ INFLECTION_OVERRIDES = { "ruby_llm" => "RubyLLM" }.freeze
11
+
12
+ initializer "ruby_llm_evals.inflector", after: "ruby_llm.inflections", before: :set_autoload_paths do
13
+ ActiveSupport::Inflector.inflections(:en) do |inflections|
14
+ # The RubyLLM gem registers "RubyLLM" as an acronym in its railtie,
15
+ # which breaks underscore conversion (RubyLLM.underscore => "rubyllm").
16
+ # We need to remove it and use "LLM" as an acronym instead for proper conversion:
17
+ # * "ruby_llm".camelize => "RubyLLM" (not "RubyLlm")
18
+ # * "RubyLLM".underscore => "ruby_llm" (not "rubyllm")
19
+ inflections.acronyms.delete("rubyllm")
20
+ inflections.acronym("LLM")
21
+ end
22
+
23
+ Rails.autoloaders.each do |loader|
24
+ loader.inflector.inflect(INFLECTION_OVERRIDES)
25
+ end
26
+ end
27
+
28
+ initializer "ruby_llm_evals.assets" do |app|
29
+ app.config.assets.paths << root.join("app/assets/stylesheets")
30
+ app.config.assets.paths << root.join("app/assets/images")
31
+ app.config.assets.paths << root.join("app/javascript")
32
+
33
+ app.config.assets.precompile += %w[
34
+ ruby_llm/evals/application.css
35
+ ruby_llm/evals/bulma.min.css
36
+ ruby_llm/evals/application.js
37
+ ruby_llm/evals/controllers/application.js
38
+ ruby_llm/evals/controllers/index.js
39
+ ruby_llm/evals/controllers/provider_model_controller.js
40
+ ruby_llm/evals/controllers/file_input_controller.js
41
+ ]
42
+ end
43
+
44
+ initializer "ruby_llm_evals.importmap", after: "importmap" do |app|
45
+ RubyLLM::Evals.importmap.draw(root.join("config/importmap.rb"))
46
+ RubyLLM::Evals.importmap.cache_sweeper(watches: root.join("app/javascript"))
47
+
48
+ ActiveSupport.on_load(:action_controller_base) do
49
+ before_action { RubyLLM::Evals.importmap.cache_sweeper.execute_if_updated }
50
+ end
51
+ end
4
52
  end
5
53
  end
6
54
  end
@@ -1,5 +1,5 @@
1
- module RubyLlm
1
+ module RubyLLM
2
2
  module Evals
3
- VERSION = "0.0.1"
3
+ VERSION = "0.1.0"
4
4
  end
5
5
  end
@@ -1,8 +1,12 @@
1
- require "ruby_llm/evals/version"
1
+ require "activemodel/validations/json_validator"
2
+ require "liquid"
3
+ require "ruby_llm"
2
4
  require "ruby_llm/evals/engine"
5
+ require "ruby_llm/evals/version"
6
+ require "ruby_llm/schema"
3
7
 
4
- module RubyLlm
8
+ module RubyLLM
5
9
  module Evals
6
- # Your code goes here...
10
+ mattr_accessor :importmap, default: Importmap::Map.new
7
11
  end
8
12
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_llm-evals
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Patricio Mac Adden
8
8
  - Fernando Martinez
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-10-23 00:00:00.000000000 Z
11
+ date: 1980-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: importmap-rails
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: 7.0.0
47
+ version: 7.2.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: 7.0.0
54
+ version: 7.2.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: ruby_llm
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -118,11 +118,70 @@ extra_rdoc_files: []
118
118
  files:
119
119
  - README.md
120
120
  - Rakefile
121
+ - app/assets/stylesheets/ruby_llm/evals/application.css
122
+ - app/assets/stylesheets/ruby_llm/evals/bulma.min.css
123
+ - app/assets/stylesheets/ruby_llm/evals/json_editor.css
124
+ - app/controllers/concerns/ruby_llm/evals/prompt_executions/prompt_execution_scoped.rb
125
+ - app/controllers/ruby_llm/evals/application_controller.rb
126
+ - app/controllers/ruby_llm/evals/prompt_executions/failures_controller.rb
127
+ - app/controllers/ruby_llm/evals/prompt_executions/passages_controller.rb
128
+ - app/controllers/ruby_llm/evals/prompt_executions/retries_controller.rb
129
+ - app/controllers/ruby_llm/evals/prompts_controller.rb
130
+ - app/controllers/ruby_llm/evals/runs_controller.rb
131
+ - app/helpers/ruby_llm/evals/application_helper.rb
132
+ - app/helpers/ruby_llm/evals/prompt_executions_helper.rb
133
+ - app/helpers/ruby_llm/evals/prompts_helper.rb
134
+ - app/helpers/ruby_llm/evals/runs_helper.rb
135
+ - app/javascript/ruby_llm/evals/application.js
136
+ - app/javascript/ruby_llm/evals/controllers/application.js
137
+ - app/javascript/ruby_llm/evals/controllers/eval_type_selector_controller.js
138
+ - app/javascript/ruby_llm/evals/controllers/file_input_controller.js
139
+ - app/javascript/ruby_llm/evals/controllers/index.js
140
+ - app/javascript/ruby_llm/evals/controllers/json_editor_controller.js
141
+ - app/javascript/ruby_llm/evals/controllers/provider_model_controller.js
142
+ - app/javascript/ruby_llm/evals/controllers/schema_selector_controller.js
143
+ - app/jobs/ruby_llm/evals/application_job.rb
144
+ - app/jobs/ruby_llm/evals/execute_sample_job.rb
145
+ - app/jobs/ruby_llm/evals/perform_run_job.rb
146
+ - app/mailers/ruby_llm/evals/application_mailer.rb
147
+ - app/models/concerns/ruby_llm/evals/job_trackable.rb
148
+ - app/models/ruby_llm/evals/application_record.rb
149
+ - app/models/ruby_llm/evals/page.rb
150
+ - app/models/ruby_llm/evals/prompt.rb
151
+ - app/models/ruby_llm/evals/prompt_execution.rb
152
+ - app/models/ruby_llm/evals/run.rb
153
+ - app/models/ruby_llm/evals/sample.rb
154
+ - app/schemas/ruby_llm/evals/judge_verdict_schema.rb
155
+ - app/views/layouts/ruby_llm/evals/application.html.erb
156
+ - app/views/ruby_llm/evals/application/_flashes.html.erb
157
+ - app/views/ruby_llm/evals/application/_nav.html.erb
158
+ - app/views/ruby_llm/evals/application/_pagination.html.erb
159
+ - app/views/ruby_llm/evals/application/_tabs.html.erb
160
+ - app/views/ruby_llm/evals/prompts/_filters.html.erb
161
+ - app/views/ruby_llm/evals/prompts/_form.html.erb
162
+ - app/views/ruby_llm/evals/prompts/_prompt.html.erb
163
+ - app/views/ruby_llm/evals/prompts/compare.html.erb
164
+ - app/views/ruby_llm/evals/prompts/edit.html.erb
165
+ - app/views/ruby_llm/evals/prompts/index.html.erb
166
+ - app/views/ruby_llm/evals/prompts/new.html.erb
167
+ - app/views/ruby_llm/evals/prompts/show.html.erb
168
+ - app/views/ruby_llm/evals/runs/_filters.html.erb
169
+ - app/views/ruby_llm/evals/runs/_run.html.erb
170
+ - app/views/ruby_llm/evals/runs/index.html.erb
171
+ - app/views/ruby_llm/evals/runs/show.html.erb
172
+ - app/views/ruby_llm/evals/samples/_form.html.erb
173
+ - config/importmap.rb
174
+ - config/locales/en.yml
121
175
  - config/routes.rb
176
+ - db/migrate/20251022211228_create_ruby_llm_evals_prompts.rb
177
+ - db/migrate/20251022211229_create_ruby_llm_evals_samples.rb
178
+ - db/migrate/20251022211230_create_ruby_llm_evals_runs.rb
179
+ - db/migrate/20251022211231_create_ruby_llm_evals_prompt_executions.rb
180
+ - lib/activemodel/validations/json_validator.rb
122
181
  - lib/ruby_llm/evals.rb
123
182
  - lib/ruby_llm/evals/engine.rb
124
183
  - lib/ruby_llm/evals/version.rb
125
- - lib/tasks/ruby_llm/evals_tasks.rake
184
+ - lib/tasks/ruby_llm_evals_tasks.rake
126
185
  homepage: https://github.com/sinaptia/ruby_llm-evals
127
186
  licenses: []
128
187
  metadata:
@@ -142,7 +201,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
201
  - !ruby/object:Gem::Version
143
202
  version: '0'
144
203
  requirements: []
145
- rubygems_version: 3.6.2
204
+ rubygems_version: 3.6.9
146
205
  specification_version: 4
147
206
  summary: LLM evaluation engine for Rails.
148
207
  test_files: []