leva 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +55 -16
  3. data/app/controllers/leva/dataset_records_controller.rb +21 -0
  4. data/app/controllers/leva/datasets_controller.rb +9 -2
  5. data/app/controllers/leva/experiments_controller.rb +34 -9
  6. data/app/controllers/leva/runner_results_controller.rb +8 -0
  7. data/app/controllers/leva/workbench_controller.rb +85 -12
  8. data/app/helpers/leva/application_helper.rb +39 -0
  9. data/app/javascript/controllers/prompt_form_controller.js +45 -0
  10. data/app/javascript/controllers/prompt_selector_controller.js +31 -0
  11. data/app/jobs/leva/experiment_job.rb +9 -4
  12. data/app/jobs/leva/run_eval_job.rb +40 -0
  13. data/app/models/concerns/leva/recordable.rb +37 -0
  14. data/app/models/leva/dataset.rb +15 -6
  15. data/app/models/leva/dataset_record.rb +40 -1
  16. data/app/models/leva/evaluation_result.rb +15 -7
  17. data/app/models/leva/experiment.rb +24 -12
  18. data/app/models/leva/prompt.rb +14 -1
  19. data/app/models/leva/runner_result.rb +56 -0
  20. data/app/views/layouts/leva/application.html.erb +24 -13
  21. data/app/views/leva/dataset_records/index.html.erb +49 -0
  22. data/app/views/leva/dataset_records/show.html.erb +30 -0
  23. data/app/views/leva/datasets/_dataset.html.erb +18 -0
  24. data/app/views/leva/datasets/_form.html.erb +24 -0
  25. data/app/views/leva/datasets/edit.html.erb +5 -0
  26. data/app/views/leva/datasets/index.html.erb +51 -38
  27. data/app/views/leva/datasets/new.html.erb +5 -0
  28. data/app/views/leva/datasets/show.html.erb +160 -8
  29. data/app/views/leva/experiments/_experiment.html.erb +42 -0
  30. data/app/views/leva/experiments/_form.html.erb +49 -0
  31. data/app/views/leva/experiments/edit.html.erb +5 -0
  32. data/app/views/leva/experiments/index.html.erb +53 -37
  33. data/app/views/leva/experiments/new.html.erb +5 -0
  34. data/app/views/leva/experiments/show.html.erb +115 -19
  35. data/app/views/leva/runner_results/show.html.erb +64 -0
  36. data/app/views/leva/workbench/_evaluation_area.html.erb +5 -0
  37. data/app/views/leva/workbench/_prompt_content.html.erb +216 -0
  38. data/app/views/leva/workbench/_prompt_form.html.erb +89 -0
  39. data/app/views/leva/workbench/_prompt_sidebar.html.erb +21 -0
  40. data/app/views/leva/workbench/_results_section.html.erb +159 -0
  41. data/app/views/leva/workbench/_top_bar.html.erb +10 -0
  42. data/app/views/leva/workbench/edit.html.erb +20 -0
  43. data/app/views/leva/workbench/index.html.erb +5 -91
  44. data/app/views/leva/workbench/new.html.erb +79 -36
  45. data/config/routes.rb +15 -6
  46. data/db/migrate/20240813172916_create_leva_datasets.rb +1 -0
  47. data/db/migrate/20240813173035_create_leva_experiments.rb +1 -0
  48. data/db/migrate/20240816201419_create_leva_runner_results.rb +11 -0
  49. data/db/migrate/20240816201433_update_leva_evaluation_results.rb +8 -0
  50. data/db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb +6 -0
  51. data/db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb +5 -0
  52. data/db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb +6 -0
  53. data/db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb +5 -0
  54. data/db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb +5 -0
  55. data/db/migrate/20240912183556_add_runner_class_to_leva_runner_results.rb +5 -0
  56. data/lib/generators/leva/templates/eval.rb.erb +7 -8
  57. data/lib/generators/leva/templates/runner.rb.erb +25 -0
  58. data/lib/leva/version.rb +1 -1
  59. data/lib/leva.rb +84 -44
  60. metadata +49 -5
  61. data/app/evals/test_sentiment_accuracy_eval.rb +0 -6
  62. data/app/runners/test_sentiment_run.rb +0 -13
  63. data/lib/leva/base_eval.rb +0 -75
@@ -1,75 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Leva
4
- class BaseEval
5
- class << self
6
- attr_reader :dataset_record_class_name
7
-
8
- # Set the dataset record class for the eval
9
- # @param class_name [String] The name of the dataset record class
10
- def leva_dataset_record_class(class_name)
11
- @dataset_record_class_name = class_name
12
- end
13
-
14
- # Run the experiment
15
- # @param experiment [Leva::Experiment] The experiment to run
16
- def run_experiment(experiment)
17
- new.run_experiment(experiment)
18
- end
19
- end
20
-
21
- # Run the experiment
22
- # @param experiment [Leva::Experiment] The experiment to run
23
- def run_experiment(experiment)
24
- @experiment = experiment
25
- @experiment.update(status: :running)
26
-
27
- @experiment.dataset.records.each do |record|
28
- @record = record
29
- unless @record.class_name == self.class.dataset_record_class_name
30
- raise ArgumentError, "Record class #{@record.class_name} does not match expected class #{self.class.dataset_record_class_name}"
31
- end
32
- ExperimentJob.perform_later(self, @record)
33
- end
34
-
35
- @experiment.update(status: :completed)
36
- rescue StandardError => e
37
- @experiment.update(status: :failed)
38
- Rails.logger.error "Error in experiment #{@experiment.name}: #{e.message}"
39
- end
40
-
41
- # Run the evaluation for a single record
42
- # @param record [ActiveRecord::Base] The record to evaluate
43
- # @return [Leva::Result] The result of the evaluation
44
- def run_each(record)
45
- raise NotImplementedError, "Subclasses must implement the 'run_each' method"
46
- end
47
-
48
- # Save the result of an evaluation
49
- # @param result [Leva::Result] The result of the evaluation
50
- def save_result(result)
51
- Leva::EvaluationResult.create!(
52
- experiment: @experiment,
53
- dataset_record: Leva::DatasetRecord.find_by(recordable: @record, dataset: @experiment.dataset),
54
- prediction: result.prediction,
55
- score: result.score,
56
- label: result.label
57
- )
58
- end
59
- end
60
-
61
- # Represents the result of an evaluation
62
- class Result
63
- attr_reader :label, :prediction, :score
64
-
65
- # Initialize a new Result
66
- # @param label [String] The label for the result
67
- # @param prediction [String] The prediction made by the evaluation
68
- # @param score [Float] The score of the evaluation (0.0 to 1.0)
69
- def initialize(label:, prediction:, score:)
70
- @label = label
71
- @prediction = prediction
72
- @score = score
73
- end
74
- end
75
- end