leva 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +52 -16
  3. data/app/controllers/leva/dataset_records_controller.rb +21 -0
  4. data/app/controllers/leva/datasets_controller.rb +9 -2
  5. data/app/controllers/leva/experiments_controller.rb +34 -9
  6. data/app/controllers/leva/runner_results_controller.rb +8 -0
  7. data/app/controllers/leva/workbench_controller.rb +85 -12
  8. data/app/helpers/leva/application_helper.rb +39 -0
  9. data/app/javascript/controllers/prompt_form_controller.js +45 -0
  10. data/app/javascript/controllers/prompt_selector_controller.js +31 -0
  11. data/app/jobs/leva/experiment_job.rb +9 -4
  12. data/app/jobs/leva/run_eval_job.rb +40 -0
  13. data/app/models/concerns/leva/recordable.rb +37 -0
  14. data/app/models/leva/dataset.rb +15 -6
  15. data/app/models/leva/dataset_record.rb +43 -5
  16. data/app/models/leva/evaluation_result.rb +22 -14
  17. data/app/models/leva/experiment.rb +26 -14
  18. data/app/models/leva/prompt.rb +14 -1
  19. data/app/models/leva/runner_result.rb +54 -0
  20. data/app/views/layouts/leva/application.html.erb +24 -13
  21. data/app/views/leva/dataset_records/index.html.erb +49 -0
  22. data/app/views/leva/dataset_records/show.html.erb +30 -0
  23. data/app/views/leva/datasets/_dataset.html.erb +18 -0
  24. data/app/views/leva/datasets/_form.html.erb +24 -0
  25. data/app/views/leva/datasets/edit.html.erb +5 -0
  26. data/app/views/leva/datasets/index.html.erb +51 -38
  27. data/app/views/leva/datasets/new.html.erb +5 -0
  28. data/app/views/leva/datasets/show.html.erb +160 -8
  29. data/app/views/leva/experiments/_experiment.html.erb +42 -0
  30. data/app/views/leva/experiments/_form.html.erb +49 -0
  31. data/app/views/leva/experiments/edit.html.erb +5 -0
  32. data/app/views/leva/experiments/index.html.erb +53 -37
  33. data/app/views/leva/experiments/new.html.erb +5 -0
  34. data/app/views/leva/experiments/show.html.erb +115 -19
  35. data/app/views/leva/runner_results/show.html.erb +64 -0
  36. data/app/views/leva/workbench/_evaluation_area.html.erb +5 -0
  37. data/app/views/leva/workbench/_prompt_content.html.erb +216 -0
  38. data/app/views/leva/workbench/_prompt_form.html.erb +89 -0
  39. data/app/views/leva/workbench/_prompt_sidebar.html.erb +21 -0
  40. data/app/views/leva/workbench/_results_section.html.erb +159 -0
  41. data/app/views/leva/workbench/_top_bar.html.erb +10 -0
  42. data/app/views/leva/workbench/edit.html.erb +20 -0
  43. data/app/views/leva/workbench/index.html.erb +5 -91
  44. data/app/views/leva/workbench/new.html.erb +79 -36
  45. data/config/routes.rb +15 -6
  46. data/db/migrate/20240813172916_create_leva_datasets.rb +1 -0
  47. data/db/migrate/20240813173033_create_leva_dataset_records.rb +1 -1
  48. data/db/migrate/20240813173035_create_leva_experiments.rb +3 -2
  49. data/db/migrate/20240813173050_create_leva_evaluation_results.rb +2 -2
  50. data/db/migrate/20240816201419_create_leva_runner_results.rb +11 -0
  51. data/db/migrate/20240816201433_update_leva_evaluation_results.rb +8 -0
  52. data/db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb +6 -0
  53. data/db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb +5 -0
  54. data/db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb +6 -0
  55. data/db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb +5 -0
  56. data/db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb +5 -0
  57. data/lib/generators/leva/templates/eval.rb.erb +6 -7
  58. data/lib/leva/version.rb +1 -1
  59. data/lib/leva.rb +62 -45
  60. metadata +48 -5
  61. data/app/evals/test_sentiment_accuracy_eval.rb +0 -6
  62. data/app/runners/test_sentiment_run.rb +0 -13
  63. data/lib/leva/base_eval.rb +0 -75
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9d04a7b6d164d0962b4c9e2da411c8622f748688012e8724d95723787363531e
4
- data.tar.gz: d2e199b18f717b368ed10b56f5a2bd5d52d26ffe738a670e570b167ced98461c
3
+ metadata.gz: 2abd3b8bab0e39683850e7e95df8815268332022c2adf6846cd27442e880cb88
4
+ data.tar.gz: 73c430d4a5a2c6a98dba70a36be988dd40541debbf10b274d62db8e232e931f7
5
5
  SHA512:
6
- metadata.gz: 94fb2936e694272809dfe8e13fc052ead002a2cd29de4ff46ef4cb4bc1ecaf9b4fbab947fe43cb12c292f09badd23e8c3ca86bdb2e8e7d33c8221077611e35c2
7
- data.tar.gz: 67e127f8b7147753021cb4ff96c2f3d5398a66490c702c2454df034d69cca1fec1153880713dfeec79e4f5898ffa44c3e187aa479b1ae2ea85926b1ea960b7be
6
+ metadata.gz: ee31e67dae95204cb6c6f2fc96ad41dae624ea48f6760b65571f8f1447fd24ca055a4dba33244067f0138db79a620ec82e8bbb3d3fd4363f9b6e98079b45b6a7
7
+ data.tar.gz: fadb2838c4d31e498f40ed8e5d20859c57e4e2df433fae1251d3215ddcbc3a2c4843b5a7bd0b73bb4ff4e2b3abb55842b83d7a018ad7feaa40a4da89e6313adc
data/README.md CHANGED
@@ -27,14 +27,46 @@ rails db:migrate
27
27
 
28
28
  ### 1. Setting up Datasets
29
29
 
30
- First, create a dataset and add any ActiveRecord records you want to evaluate against:
30
+ First, create a dataset and add any ActiveRecord records you want to evaluate against. To make your models compatible with Leva, include the `Leva::Recordable` concern in your model:
31
31
 
32
- ```ruby
33
- dataset = Leva::Dataset.create(name: "Sentiment Analysis Dataset")
34
- dataset.add_record TextContent.create(text: "I love this product!", expected_label: "Positive")
35
- dataset.add_record TextContent.create(text: "Terrible experience", expected_label: "Negative")
36
- dataset.add_record TextContent.create(text: "I's ok", expected_label: "Neutral")
37
- ```
32
+ ````ruby
33
+ class TextContent < ApplicationRecord
34
+ include Leva::Recordable
35
+
36
+ # @return [String] The ground truth label for the record
37
+ def ground_truth
38
+ expected_label
39
+ end
40
+
41
+ # @return [Hash] A hash of attributes to be displayed in the dataset records index
42
+ def index_attributes
43
+ {
44
+ text: text,
45
+ expected_label: expected_label,
46
+ created_at: created_at.strftime('%Y-%m-%d %H:%M:%S')
47
+ }
48
+ end
49
+
50
+ # @return [Hash] A hash of attributes to be displayed in the dataset record show view
51
+ def show_attributes
52
+ {
53
+ text: text,
54
+ expected_label: expected_label,
55
+ created_at: created_at.strftime('%Y-%m-%d %H:%M:%S')
56
+ }
57
+ end
58
+
59
+ # @return [Hash] A hash of attributes to be displayed in the dataset record show view
60
+ def to_llm_context
61
+ {
62
+ text: text,
63
+ expected_label: expected_label,
64
+ created_at: created_at.strftime('%Y-%m-%d %H:%M:%S')
65
+ }
66
+ end
67
+ end
68
+
69
+ dataset = Leva::Dataset.create(name: "Sentiment Analysis Dataset") dataset.add_record TextContent.create(text: "I love this product!", expected_label: "Positive") dataset.add_record TextContent.create(text: "Terrible experience", expected_label: "Negative") dataset.add_record TextContent.create(text: "It's ok", expected_label: "Neutral")
38
70
 
39
71
  ### 2. Implementing Runs
40
72
 
@@ -42,7 +74,7 @@ Create a run class to handle the execution of your inference logic:
42
74
 
43
75
  ```bash
44
76
  rails generate leva:runner sentiment
45
- ```
77
+ ````
46
78
 
47
79
  ```ruby
48
80
  class SentimentRun < Leva::BaseRun
@@ -64,17 +96,17 @@ rails generate leva:eval sentiment_accuracy
64
96
 
65
97
  ```ruby
66
98
  class SentimentAccuracyEval < Leva::BaseEval
67
- def evaluate(prediction, expected)
68
- score = prediction == expected ? 1.0 : 0.0
69
- Leva::Result.new(label: 'sentiment_accuracy', score: score)
99
+ def evaluate(prediction, record)
100
+ score = prediction == record.expected_label ? 1.0 : 0.0
101
+ [score, record.expected_label]
70
102
  end
71
103
  end
72
104
 
73
105
  class SentimentF1Eval < Leva::BaseEval
74
- def evaluate(prediction, expected)
106
+ def evaluate(prediction, record)
75
107
  # Calculate F1 score
76
108
  # ...
77
- Leva::Result.new(label: 'sentiment_f1', score: f1_score)
109
+ [f1_score, record.f1_score]
78
110
  end
79
111
  end
80
112
  ```
@@ -122,9 +154,9 @@ Leva.run_evaluation(experiment: experiment, run: run, evals: evals)
122
154
  After the experiments are complete, analyze the results:
123
155
 
124
156
  ```ruby
125
- experiment.evaluation_results.group_by(&:label).each do |label, results|
157
+ experiment.evaluation_results.group_by(&:evaluator_class).each do |evaluator_class, results|
126
158
  average_score = results.average(&:score)
127
- puts "#{label.capitalize} Average Score: #{average_score}"
159
+ puts "#{evaluator_class.capitalize} Average Score: #{average_score}"
128
160
  end
129
161
  ```
130
162
 
@@ -139,13 +171,13 @@ Ensure you set up any required API keys or other configurations in your Rails cr
139
171
  - `Leva`: Handles the process of running experiments.
140
172
  - `Leva::BaseRun`: Base class for run implementations.
141
173
  - `Leva::BaseEval`: Base class for eval implementations.
142
- - `Leva::Result`: Represents the result of an evaluation.
143
174
 
144
175
  ### Models
145
176
 
146
177
  - `Leva::Dataset`: Represents a collection of data to be evaluated.
147
178
  - `Leva::DatasetRecord`: Represents individual records within a dataset.
148
179
  - `Leva::Experiment`: Represents a single run of an evaluation on a dataset.
180
+ - `Leva::RunnerResult`: Stores the results of each run execution.
149
181
  - `Leva::EvaluationResult`: Stores the results of each evaluation.
150
182
  - `Leva::Prompt`: Represents a prompt for an LLM.
151
183
 
@@ -156,3 +188,7 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/kieran
156
188
  ## License
157
189
 
158
190
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
191
+
192
+ ## Roadmap
193
+
194
+ - [ ] Parallelize evaluation
@@ -0,0 +1,21 @@
1
+ module Leva
2
+ class DatasetRecordsController < ApplicationController
3
+ before_action :set_dataset
4
+
5
+ # GET /datasets/:dataset_id/records
6
+ def index
7
+ @records = @dataset.dataset_records.includes(:recordable)
8
+ end
9
+
10
+ # GET /datasets/:dataset_id/records/:id
11
+ def show
12
+ @record = @dataset.dataset_records.find(params[:id])
13
+ end
14
+
15
+ private
16
+
17
+ def set_dataset
18
+ @dataset = Dataset.find(params[:dataset_id])
19
+ end
20
+ end
21
+ end
@@ -13,6 +13,8 @@ module Leva
13
13
  # GET /datasets/1
14
14
  # @return [void]
15
15
  def show
16
+ @experiments = @dataset.experiments
17
+ @new_experiment = Experiment.new(dataset: @dataset)
16
18
  end
17
19
 
18
20
  # GET /datasets/new
@@ -24,6 +26,7 @@ module Leva
24
26
  # GET /datasets/1/edit
25
27
  # @return [void]
26
28
  def edit
29
+ # The @dataset is already set by the before_action
27
30
  end
28
31
 
29
32
  # POST /datasets
@@ -51,8 +54,12 @@ module Leva
51
54
  # DELETE /datasets/1
52
55
  # @return [void]
53
56
  def destroy
54
- @dataset.destroy
55
- redirect_to datasets_url, notice: 'Dataset was successfully destroyed.'
57
+ if @dataset.dataset_records.any?
58
+ redirect_to @dataset, alert: 'Cannot delete dataset with existing records.'
59
+ else
60
+ @dataset.destroy
61
+ redirect_to datasets_url, notice: 'Dataset was successfully destroyed.'
62
+ end
56
63
  end
57
64
 
58
65
  private
@@ -2,7 +2,11 @@
2
2
 
3
3
  module Leva
4
4
  class ExperimentsController < ApplicationController
5
- before_action :set_experiment, only: [:show, :edit, :update, :destroy]
5
+ include ApplicationHelper
6
+
7
+ before_action :set_experiment, only: [:show, :edit, :update]
8
+ before_action :check_editable, only: [:edit, :update]
9
+ before_action :load_runners_and_evaluators, only: [:new, :edit, :create, :update]
6
10
 
7
11
  # GET /experiments
8
12
  # @return [void]
@@ -13,17 +17,19 @@ module Leva
13
17
  # GET /experiments/1
14
18
  # @return [void]
15
19
  def show
20
+ @experiment = Experiment.includes(runner_results: :evaluation_results).find(params[:id])
16
21
  end
17
22
 
18
23
  # GET /experiments/new
19
24
  # @return [void]
20
25
  def new
21
- @experiment = Experiment.new
26
+ @experiment = Experiment.new(dataset_id: params[:dataset_id])
22
27
  end
23
28
 
24
29
  # GET /experiments/1/edit
25
30
  # @return [void]
26
31
  def edit
32
+ # The @experiment is already set by the before_action
27
33
  end
28
34
 
29
35
  # POST /experiments
@@ -32,8 +38,8 @@ module Leva
32
38
  @experiment = Experiment.new(experiment_params)
33
39
 
34
40
  if @experiment.save
35
- ExperimentJob.perform_later(@experiment)
36
- redirect_to @experiment, notice: 'Experiment was successfully created.'
41
+ ExperimentJob.perform_later(@experiment) unless @experiment.completed?
42
+ redirect_to @experiment, notice: 'Experiment was successfully created and is now running.'
37
43
  else
38
44
  render :new
39
45
  end
@@ -49,11 +55,21 @@ module Leva
49
55
  end
50
56
  end
51
57
 
52
- # DELETE /experiments/1
58
+ # POST /experiments/1/rerun
53
59
  # @return [void]
54
- def destroy
55
- @experiment.destroy
56
- redirect_to experiments_url, notice: 'Experiment was successfully destroyed.'
60
+ def rerun
61
+ @experiment = Experiment.find(params[:id])
62
+
63
+ # Delete existing runner results and evaluation results
64
+ @experiment.runner_results.destroy_all
65
+
66
+ # Reset experiment status to pending
67
+ @experiment.update(status: :pending)
68
+
69
+ # Queue the job again
70
+ ExperimentJob.perform_later(@experiment)
71
+
72
+ redirect_to @experiment, notice: 'Experiment has been reset and is now running again.'
57
73
  end
58
74
 
59
75
  private
@@ -67,7 +83,16 @@ module Leva
67
83
  # Only allow a list of trusted parameters through.
68
84
  # @return [ActionController::Parameters]
69
85
  def experiment_params
70
- params.require(:experiment).permit(:name, :description, :dataset_id)
86
+ params.require(:experiment).permit(:name, :description, :dataset_id, :prompt_id, :runner_class, evaluator_classes: [])
87
+ end
88
+
89
+ def load_runners_and_evaluators
90
+ @runners = load_runners
91
+ @evaluators = load_evaluators
92
+ end
93
+
94
+ def check_editable
95
+ redirect_to @experiment, alert: 'Completed experiments cannot be edited.' if @experiment.completed?
71
96
  end
72
97
  end
73
98
  end
@@ -0,0 +1,8 @@
1
+ module Leva
2
+ class RunnerResultsController < ApplicationController
3
+ def show
4
+ @experiment = Experiment.find(params[:experiment_id])
5
+ @runner_result = @experiment.runner_results.find(params[:id])
6
+ end
7
+ end
8
+ end
@@ -2,39 +2,112 @@
2
2
 
3
3
  module Leva
4
4
  class WorkbenchController < ApplicationController
5
+ include ApplicationHelper
6
+
7
+ before_action :set_prompt, only: [:index, :edit, :update, :run, :run_all_evals, :run_evaluator]
8
+ before_action :set_dataset_record, only: [:index, :run, :run_all_evals, :run_evaluator]
9
+ before_action :set_runner_result, only: [:index, :run_all_evals, :run_evaluator]
10
+
5
11
  # GET /workbench
6
12
  # @return [void]
7
13
  def index
8
14
  @prompts = Prompt.all
9
- @selected_prompt = Prompt.first || Prompt.create!(name: "Test Prompt", version: 1, system_prompt: "You are a helpful assistant.", user_prompt: "Hello, how can I help you today?")
10
- @evaluators = ['Evaluator 1', 'Evaluator 2', 'Evaluator 3']
15
+ @selected_prompt = @prompt || Prompt.first
16
+ @evaluators = load_evaluators
17
+ @runners = load_runners
18
+ @selected_runner = params[:runner] || @runners.first&.name
19
+ @selected_dataset_record = params[:dataset_record_id] || DatasetRecord.first&.id
11
20
  end
12
21
 
13
22
  # GET /workbench/new
14
23
  # @return [void]
15
24
  def new
16
- @experiment = Experiment.new
25
+ @prompt = Prompt.new
26
+ @predefined_prompts = load_predefined_prompts
27
+ end
28
+
29
+ # POST /workbench
30
+ # @return [void]
31
+ def create
32
+ @prompt = Prompt.new(prompt_params)
33
+ if @prompt.save
34
+ redirect_to workbench_index_path(prompt_id: @prompt.id), notice: 'Prompt was successfully created.'
35
+ else
36
+ render :new
37
+ end
17
38
  end
18
39
 
19
40
  # GET /workbench/1
20
41
  # @return [void]
21
- def show
22
- @experiment = Experiment.find(params[:id])
42
+ def edit
43
+ end
44
+
45
+ # PATCH/PUT /workbench/1
46
+ # @return [void]
47
+ def update
48
+ @prompt = Prompt.find(params[:id])
49
+ if @prompt.update(prompt_params)
50
+ render json: { status: 'success', message: 'Prompt updated successfully' }
51
+ else
52
+ render json: { status: 'error', errors: @prompt.errors.full_messages }, status: :unprocessable_entity
53
+ end
23
54
  end
24
55
 
25
56
  def run
26
- # Implement the logic for running the prompt
27
- redirect_to workbench_index_path, notice: 'Prompt run successfully'
57
+ return redirect_to workbench_index_path, alert: 'Please select a record and a runner' unless @dataset_record && run_params[:runner]
58
+
59
+ runner_class = run_params[:runner].constantize
60
+ return redirect_to workbench_index_path, alert: 'Invalid runner selected' unless runner_class < Leva::BaseRun
61
+
62
+ runner = runner_class.new
63
+ runner_result = runner.execute_and_store(nil, @dataset_record, @prompt)
64
+
65
+ redirect_to workbench_index_path(prompt_id: @prompt.id, dataset_record_id: @dataset_record.id, runner: run_params[:runner]), notice: 'Run completed successfully'
28
66
  end
29
67
 
30
- def run_with_evaluation
31
- # Implement the logic for running the prompt with evaluation
32
- redirect_to workbench_index_path, notice: 'Prompt run with evaluation successfully'
68
+ def run_all_evals
69
+ return redirect_to workbench_index_path, alert: 'No runner result available' unless @runner_result
70
+
71
+ load_evaluators.each do |evaluator_class|
72
+ evaluator = evaluator_class.new
73
+ evaluator.evaluate_and_store(nil, @runner_result)
74
+ end
75
+
76
+ redirect_to workbench_index_path(prompt_id: @prompt.id, dataset_record_id: @dataset_record.id, runner: params[:runner]), notice: 'All evaluations completed successfully'
33
77
  end
34
78
 
35
79
  def run_evaluator
36
- # Implement the logic for running a single evaluator
37
- redirect_to workbench_index_path, notice: 'Evaluator run successfully'
80
+ return redirect_to workbench_index_path, alert: 'No runner result available' unless @runner_result
81
+
82
+ evaluator_class = params[:evaluator].constantize
83
+ return redirect_to workbench_index_path, alert: 'Invalid evaluator selected' unless evaluator_class < Leva::BaseEval
84
+
85
+ evaluator = evaluator_class.new
86
+ evaluator.evaluate_and_store(nil, @runner_result)
87
+
88
+ redirect_to workbench_index_path(prompt_id: @prompt.id, dataset_record_id: @dataset_record.id, runner: params[:runner]), notice: 'Evaluator run successfully'
89
+ end
90
+
91
+ private
92
+
93
+ def set_prompt
94
+ @prompt = params[:prompt_id] ? Prompt.find(params[:prompt_id]) : Prompt.first
95
+ end
96
+
97
+ def prompt_params
98
+ params.require(:prompt).permit(:name, :system_prompt, :user_prompt, :version)
99
+ end
100
+
101
+ def set_dataset_record
102
+ @dataset_record = DatasetRecord.find_by(id: params[:dataset_record_id]) || DatasetRecord.first
103
+ end
104
+
105
+ def run_params
106
+ params.permit(:runner, :prompt_id, :dataset_record_id)
107
+ end
108
+
109
+ def set_runner_result
110
+ @runner_result = @dataset_record.runner_results.last if @dataset_record
38
111
  end
39
112
  end
40
113
  end
@@ -1,4 +1,43 @@
1
1
  module Leva
2
2
  module ApplicationHelper
3
+ # Loads all evaluator classes that inherit from Leva::BaseEval
4
+ #
5
+ # @return [Array<Class>] An array of evaluator classes
6
+ def load_evaluators
7
+ load_classes_from_directory('app/evals', Leva::BaseEval) || []
8
+ end
9
+
10
+ # Loads all runner classes that inherit from Leva::BaseRun
11
+ #
12
+ # @return [Array<Class>] An array of runner classes
13
+ def load_runners
14
+ load_classes_from_directory('app/runners', Leva::BaseRun) || []
15
+ end
16
+
17
+ # Loads predefined prompts from markdown files
18
+ #
19
+ # @return [Array<Array<String, String>>] An array of prompt name and content pairs
20
+ def load_predefined_prompts
21
+ prompts = Dir.glob(Rails.root.join('app', 'prompts', '*.md')).map do |file|
22
+ name = File.basename(file, '.md').titleize
23
+ content = File.read(file)
24
+ [name, content]
25
+ end
26
+ prompts
27
+ end
28
+
29
+ private
30
+
31
+ # Loads classes from a specified directory that inherit from a given base class
32
+ #
33
+ # @param directory [String] The directory path to load classes from
34
+ # @param base_class [Class] The base class that loaded classes should inherit from
35
+ # @return [Array<Class>] An array of loaded classes
36
+ def load_classes_from_directory(directory, base_class)
37
+ classes = Dir[Rails.root.join(directory, '*.rb')].map do |file|
38
+ File.basename(file, '.rb').camelize.constantize
39
+ end.select { |klass| klass < base_class }
40
+ classes.empty? ? [] : classes
41
+ end
3
42
  end
4
43
  end
@@ -0,0 +1,45 @@
1
+ import { Controller } from "@hotwired/stimulus";
2
+
3
+ export default class extends Controller {
4
+ static targets = ["form"];
5
+
6
+ autoSave() {
7
+ clearTimeout(this.timeout);
8
+ this.timeout = setTimeout(() => {
9
+ this.submitForm();
10
+ }, 500);
11
+ }
12
+
13
+ submitForm() {
14
+ const form = this.element;
15
+ const formData = new FormData(form);
16
+
17
+ fetch(form.action, {
18
+ method: form.method,
19
+ body: formData,
20
+ headers: {
21
+ Accept: "application/json",
22
+ "X-CSRF-Token": document.querySelector('meta[name="csrf-token"]').content,
23
+ },
24
+ })
25
+ .then((response) => response.json())
26
+ .then((data) => {
27
+ const statusElement = document.getElementById("form-status");
28
+ if (data.status === "success") {
29
+ statusElement.textContent = "Changes saved successfully";
30
+ statusElement.classList.add("text-green-500");
31
+ statusElement.classList.remove("text-red-500");
32
+ } else {
33
+ statusElement.textContent = `Error: ${data.errors.join(", ")}`;
34
+ statusElement.classList.add("text-red-500");
35
+ statusElement.classList.remove("text-green-500");
36
+ }
37
+ setTimeout(() => {
38
+ statusElement.textContent = "";
39
+ }, 3000);
40
+ })
41
+ .catch((error) => {
42
+ console.error("Error:", error);
43
+ });
44
+ }
45
+ }
@@ -0,0 +1,31 @@
1
+ import { Controller } from "@hotwired/stimulus";
2
+
3
+ export default class extends Controller {
4
+ static targets = ["userPromptField"];
5
+
6
+ toggleUserPrompt(event) {
7
+ const selectedFile = event.target.value;
8
+ if (selectedFile) {
9
+ this.userPromptFieldTarget.style.display = "none";
10
+ this.loadPredefinedPrompt(selectedFile);
11
+ } else {
12
+ this.userPromptFieldTarget.style.display = "block";
13
+ this.clearUserPrompt();
14
+ }
15
+ }
16
+
17
+ loadPredefinedPrompt(file) {
18
+ fetch(file)
19
+ .then((response) => response.text())
20
+ .then((content) => {
21
+ const userPromptTextarea = this.userPromptFieldTarget.querySelector("textarea");
22
+ userPromptTextarea.value = content;
23
+ })
24
+ .catch((error) => console.error("Error loading predefined prompt:", error));
25
+ }
26
+
27
+ clearUserPrompt() {
28
+ const userPromptTextarea = this.userPromptFieldTarget.querySelector("textarea");
29
+ userPromptTextarea.value = "";
30
+ }
31
+ }
@@ -4,13 +4,18 @@ module Leva
4
4
  class ExperimentJob < ApplicationJob
5
5
  queue_as :default
6
6
 
7
- # Perform the experiment
7
+ # Perform the experiment by scheduling all dataset records for evaluation
8
8
  #
9
9
  # @param experiment [Experiment] The experiment to run
10
10
  # @return [void]
11
- def perform(eval, record)
12
- result = eval.run_each(record)
13
- eval.save_result(result)
11
+ def perform(experiment)
12
+ return if experiment.completed? || experiment.running?
13
+
14
+ experiment.update!(status: :running)
15
+
16
+ experiment.dataset.dataset_records.each_with_index do |record, index|
17
+ RunEvalJob.set(wait: 3.seconds * index).perform_later(experiment.id, record.id)
18
+ end
14
19
  end
15
20
  end
16
21
  end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Leva
4
+ class RunEvalJob < ApplicationJob
5
+ queue_as :default
6
+
7
+ # Perform a single run and evaluation for a dataset record
8
+ #
9
+ # @param experiment_id [Integer] The ID of the experiment
10
+ # @param dataset_record_id [Integer] The ID of the dataset record
11
+ # @return [void]
12
+ def perform(experiment_id, dataset_record_id)
13
+ experiment = Experiment.find(experiment_id)
14
+ dataset_record = DatasetRecord.find(dataset_record_id)
15
+
16
+ run = constantize_class(experiment.runner_class).new
17
+ evals = experiment.evaluator_classes.compact.reject(&:empty?).map { |klass| constantize_class(klass).new }
18
+
19
+ Leva.run_single_evaluation(experiment: experiment, run: run, evals: evals, dataset_record: dataset_record)
20
+
21
+ experiment.update!(status: :completed) if is_last(experiment)
22
+ end
23
+
24
+ private
25
+
26
+ def constantize_class(class_name)
27
+ class_name.constantize
28
+ rescue NameError => e
29
+ raise NameError, "Invalid class name: #{class_name}. Error: #{e.message}"
30
+ end
31
+
32
+ # Check if all dataset records for the experiment have a runner result
33
+ #
34
+ # @param experiment [Experiment] The experiment to check
35
+ # @return [Boolean] True if all dataset records have a runner result, false otherwise
36
+ def is_last(experiment)
37
+ experiment.dataset.dataset_records.count == experiment.runner_results.count
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,37 @@
1
+ module Leva
2
+ module Recordable
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ has_many :dataset_records, as: :recordable, class_name: 'Leva::DatasetRecord', dependent: :destroy
7
+ has_many :datasets, through: :dataset_records, class_name: 'Leva::Dataset'
8
+ has_many :runner_results, through: :dataset_records, class_name: 'Leva::RunnerResult'
9
+ has_many :evaluation_results, through: :runner_results, class_name: 'Leva::EvaluationResult'
10
+ end
11
+
12
+ # @return [String] The ground truth label for the record
13
+ def ground_truth
14
+ raise NotImplementedError, "#{self.class} must implement #ground_truth"
15
+ end
16
+
17
+ # @return [Hash] A hash of attributes to be displayed in the dataset records index
18
+ def index_attributes
19
+ raise NotImplementedError, "#{self.class} must implement #index_attributes"
20
+ end
21
+
22
+ # @return [Hash] A hash of attributes to be displayed in the dataset record show view
23
+ def show_attributes
24
+ raise NotImplementedError, "#{self.class} must implement #show_attributes"
25
+ end
26
+
27
+ # @return [Hash] A hash of attributes to be liquified for LLM context
28
+ def to_llm_context
29
+ raise NotImplementedError, "#{self.class} must implement #to_llm_context"
30
+ end
31
+
32
+ # @return [Regexp] A regex pattern to extract the contents of a LLM response
33
+ def extract_regex_pattern
34
+ false
35
+ end
36
+ end
37
+ end
@@ -2,18 +2,27 @@
2
2
  #
3
3
  # Table name: leva_datasets
4
4
  #
5
- # id :integer not null, primary key
6
- # name :string
7
- # created_at :datetime not null
8
- # updated_at :datetime not null
5
+ # id :integer not null, primary key
6
+ # description :text
7
+ # name :string
8
+ # created_at :datetime not null
9
+ # updated_at :datetime not null
9
10
  #
10
11
  module Leva
11
12
  class Dataset < ApplicationRecord
12
13
  has_many :dataset_records, dependent: :destroy
13
14
  has_many :experiments, dependent: :destroy
14
15
 
16
+ validates :name, presence: true
17
+
18
+ # Adds a record to the dataset if it doesn't already exist
19
+ #
20
+ # @param record [ActiveRecord::Base] The record to be added to the dataset
21
+ # @return [Leva::DatasetRecord, nil] The created dataset record or nil if it already exists
15
22
  def add_record(record)
16
- dataset_records.create(recordable: record)
23
+ dataset_records.find_or_create_by(recordable: record) do |dr|
24
+ dr.recordable = record
25
+ end
17
26
  end
18
27
  end
19
- end
28
+ end