leva 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +55 -16
- data/app/controllers/leva/dataset_records_controller.rb +21 -0
- data/app/controllers/leva/datasets_controller.rb +9 -2
- data/app/controllers/leva/experiments_controller.rb +34 -9
- data/app/controllers/leva/runner_results_controller.rb +8 -0
- data/app/controllers/leva/workbench_controller.rb +85 -12
- data/app/helpers/leva/application_helper.rb +39 -0
- data/app/javascript/controllers/prompt_form_controller.js +45 -0
- data/app/javascript/controllers/prompt_selector_controller.js +31 -0
- data/app/jobs/leva/experiment_job.rb +9 -4
- data/app/jobs/leva/run_eval_job.rb +40 -0
- data/app/models/concerns/leva/recordable.rb +37 -0
- data/app/models/leva/dataset.rb +15 -6
- data/app/models/leva/dataset_record.rb +40 -1
- data/app/models/leva/evaluation_result.rb +15 -7
- data/app/models/leva/experiment.rb +24 -12
- data/app/models/leva/prompt.rb +14 -1
- data/app/models/leva/runner_result.rb +56 -0
- data/app/views/layouts/leva/application.html.erb +24 -13
- data/app/views/leva/dataset_records/index.html.erb +49 -0
- data/app/views/leva/dataset_records/show.html.erb +30 -0
- data/app/views/leva/datasets/_dataset.html.erb +18 -0
- data/app/views/leva/datasets/_form.html.erb +24 -0
- data/app/views/leva/datasets/edit.html.erb +5 -0
- data/app/views/leva/datasets/index.html.erb +51 -38
- data/app/views/leva/datasets/new.html.erb +5 -0
- data/app/views/leva/datasets/show.html.erb +160 -8
- data/app/views/leva/experiments/_experiment.html.erb +42 -0
- data/app/views/leva/experiments/_form.html.erb +49 -0
- data/app/views/leva/experiments/edit.html.erb +5 -0
- data/app/views/leva/experiments/index.html.erb +53 -37
- data/app/views/leva/experiments/new.html.erb +5 -0
- data/app/views/leva/experiments/show.html.erb +115 -19
- data/app/views/leva/runner_results/show.html.erb +64 -0
- data/app/views/leva/workbench/_evaluation_area.html.erb +5 -0
- data/app/views/leva/workbench/_prompt_content.html.erb +216 -0
- data/app/views/leva/workbench/_prompt_form.html.erb +89 -0
- data/app/views/leva/workbench/_prompt_sidebar.html.erb +21 -0
- data/app/views/leva/workbench/_results_section.html.erb +159 -0
- data/app/views/leva/workbench/_top_bar.html.erb +10 -0
- data/app/views/leva/workbench/edit.html.erb +20 -0
- data/app/views/leva/workbench/index.html.erb +5 -91
- data/app/views/leva/workbench/new.html.erb +79 -36
- data/config/routes.rb +15 -6
- data/db/migrate/20240813172916_create_leva_datasets.rb +1 -0
- data/db/migrate/20240813173035_create_leva_experiments.rb +1 -0
- data/db/migrate/20240816201419_create_leva_runner_results.rb +11 -0
- data/db/migrate/20240816201433_update_leva_evaluation_results.rb +8 -0
- data/db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb +6 -0
- data/db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb +5 -0
- data/db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb +6 -0
- data/db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb +5 -0
- data/db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb +5 -0
- data/db/migrate/20240912183556_add_runner_class_to_leva_runner_results.rb +5 -0
- data/lib/generators/leva/templates/eval.rb.erb +7 -8
- data/lib/generators/leva/templates/runner.rb.erb +25 -0
- data/lib/leva/version.rb +1 -1
- data/lib/leva.rb +84 -44
- metadata +49 -5
- data/app/evals/test_sentiment_accuracy_eval.rb +0 -6
- data/app/runners/test_sentiment_run.rb +0 -13
- data/lib/leva/base_eval.rb +0 -75
data/lib/leva/base_eval.rb
DELETED
@@ -1,75 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Leva
|
4
|
-
class BaseEval
|
5
|
-
class << self
|
6
|
-
attr_reader :dataset_record_class_name
|
7
|
-
|
8
|
-
# Set the dataset record class for the eval
|
9
|
-
# @param class_name [String] The name of the dataset record class
|
10
|
-
def leva_dataset_record_class(class_name)
|
11
|
-
@dataset_record_class_name = class_name
|
12
|
-
end
|
13
|
-
|
14
|
-
# Run the experiment
|
15
|
-
# @param experiment [Leva::Experiment] The experiment to run
|
16
|
-
def run_experiment(experiment)
|
17
|
-
new.run_experiment(experiment)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
# Run the experiment
|
22
|
-
# @param experiment [Leva::Experiment] The experiment to run
|
23
|
-
def run_experiment(experiment)
|
24
|
-
@experiment = experiment
|
25
|
-
@experiment.update(status: :running)
|
26
|
-
|
27
|
-
@experiment.dataset.records.each do |record|
|
28
|
-
@record = record
|
29
|
-
unless @record.class_name == self.class.dataset_record_class_name
|
30
|
-
raise ArgumentError, "Record class #{@record.class_name} does not match expected class #{self.class.dataset_record_class_name}"
|
31
|
-
end
|
32
|
-
ExperimentJob.perform_later(self, @record)
|
33
|
-
end
|
34
|
-
|
35
|
-
@experiment.update(status: :completed)
|
36
|
-
rescue StandardError => e
|
37
|
-
@experiment.update(status: :failed)
|
38
|
-
Rails.logger.error "Error in experiment #{@experiment.name}: #{e.message}"
|
39
|
-
end
|
40
|
-
|
41
|
-
# Run the evaluation for a single record
|
42
|
-
# @param record [ActiveRecord::Base] The record to evaluate
|
43
|
-
# @return [Leva::Result] The result of the evaluation
|
44
|
-
def run_each(record)
|
45
|
-
raise NotImplementedError, "Subclasses must implement the 'run_each' method"
|
46
|
-
end
|
47
|
-
|
48
|
-
# Save the result of an evaluation
|
49
|
-
# @param result [Leva::Result] The result of the evaluation
|
50
|
-
def save_result(result)
|
51
|
-
Leva::EvaluationResult.create!(
|
52
|
-
experiment: @experiment,
|
53
|
-
dataset_record: Leva::DatasetRecord.find_by(recordable: @record, dataset: @experiment.dataset),
|
54
|
-
prediction: result.prediction,
|
55
|
-
score: result.score,
|
56
|
-
label: result.label
|
57
|
-
)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# Represents the result of an evaluation
|
62
|
-
class Result
|
63
|
-
attr_reader :label, :prediction, :score
|
64
|
-
|
65
|
-
# Initialize a new Result
|
66
|
-
# @param label [String] The label for the result
|
67
|
-
# @param prediction [String] The prediction made by the evaluation
|
68
|
-
# @param score [Float] The score of the evaluation (0.0 to 1.0)
|
69
|
-
def initialize(label:, prediction:, score:)
|
70
|
-
@label = label
|
71
|
-
@prediction = prediction
|
72
|
-
@score = score
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|