leva 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +52 -16
- data/app/controllers/leva/dataset_records_controller.rb +21 -0
- data/app/controllers/leva/datasets_controller.rb +9 -2
- data/app/controllers/leva/experiments_controller.rb +34 -9
- data/app/controllers/leva/runner_results_controller.rb +8 -0
- data/app/controllers/leva/workbench_controller.rb +85 -12
- data/app/helpers/leva/application_helper.rb +39 -0
- data/app/javascript/controllers/prompt_form_controller.js +45 -0
- data/app/javascript/controllers/prompt_selector_controller.js +31 -0
- data/app/jobs/leva/experiment_job.rb +9 -4
- data/app/jobs/leva/run_eval_job.rb +40 -0
- data/app/models/concerns/leva/recordable.rb +37 -0
- data/app/models/leva/dataset.rb +15 -6
- data/app/models/leva/dataset_record.rb +43 -5
- data/app/models/leva/evaluation_result.rb +22 -14
- data/app/models/leva/experiment.rb +26 -14
- data/app/models/leva/prompt.rb +14 -1
- data/app/models/leva/runner_result.rb +54 -0
- data/app/views/layouts/leva/application.html.erb +24 -13
- data/app/views/leva/dataset_records/index.html.erb +49 -0
- data/app/views/leva/dataset_records/show.html.erb +30 -0
- data/app/views/leva/datasets/_dataset.html.erb +18 -0
- data/app/views/leva/datasets/_form.html.erb +24 -0
- data/app/views/leva/datasets/edit.html.erb +5 -0
- data/app/views/leva/datasets/index.html.erb +51 -38
- data/app/views/leva/datasets/new.html.erb +5 -0
- data/app/views/leva/datasets/show.html.erb +160 -8
- data/app/views/leva/experiments/_experiment.html.erb +42 -0
- data/app/views/leva/experiments/_form.html.erb +49 -0
- data/app/views/leva/experiments/edit.html.erb +5 -0
- data/app/views/leva/experiments/index.html.erb +53 -37
- data/app/views/leva/experiments/new.html.erb +5 -0
- data/app/views/leva/experiments/show.html.erb +115 -19
- data/app/views/leva/runner_results/show.html.erb +64 -0
- data/app/views/leva/workbench/_evaluation_area.html.erb +5 -0
- data/app/views/leva/workbench/_prompt_content.html.erb +216 -0
- data/app/views/leva/workbench/_prompt_form.html.erb +89 -0
- data/app/views/leva/workbench/_prompt_sidebar.html.erb +21 -0
- data/app/views/leva/workbench/_results_section.html.erb +159 -0
- data/app/views/leva/workbench/_top_bar.html.erb +10 -0
- data/app/views/leva/workbench/edit.html.erb +20 -0
- data/app/views/leva/workbench/index.html.erb +5 -91
- data/app/views/leva/workbench/new.html.erb +79 -36
- data/config/routes.rb +15 -6
- data/db/migrate/20240813172916_create_leva_datasets.rb +1 -0
- data/db/migrate/20240813173033_create_leva_dataset_records.rb +1 -1
- data/db/migrate/20240813173035_create_leva_experiments.rb +3 -2
- data/db/migrate/20240813173050_create_leva_evaluation_results.rb +2 -2
- data/db/migrate/20240816201419_create_leva_runner_results.rb +11 -0
- data/db/migrate/20240816201433_update_leva_evaluation_results.rb +8 -0
- data/db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb +6 -0
- data/db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb +5 -0
- data/db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb +6 -0
- data/db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb +5 -0
- data/db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb +5 -0
- data/lib/generators/leva/templates/eval.rb.erb +6 -7
- data/lib/leva/version.rb +1 -1
- data/lib/leva.rb +62 -45
- metadata +48 -5
- data/app/evals/test_sentiment_accuracy_eval.rb +0 -6
- data/app/runners/test_sentiment_run.rb +0 -13
- data/lib/leva/base_eval.rb +0 -75
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2abd3b8bab0e39683850e7e95df8815268332022c2adf6846cd27442e880cb88
|
4
|
+
data.tar.gz: 73c430d4a5a2c6a98dba70a36be988dd40541debbf10b274d62db8e232e931f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee31e67dae95204cb6c6f2fc96ad41dae624ea48f6760b65571f8f1447fd24ca055a4dba33244067f0138db79a620ec82e8bbb3d3fd4363f9b6e98079b45b6a7
|
7
|
+
data.tar.gz: fadb2838c4d31e498f40ed8e5d20859c57e4e2df433fae1251d3215ddcbc3a2c4843b5a7bd0b73bb4ff4e2b3abb55842b83d7a018ad7feaa40a4da89e6313adc
|
data/README.md
CHANGED
@@ -27,14 +27,46 @@ rails db:migrate
|
|
27
27
|
|
28
28
|
### 1. Setting up Datasets
|
29
29
|
|
30
|
-
First, create a dataset and add any ActiveRecord records you want to evaluate against:
|
30
|
+
First, create a dataset and add any ActiveRecord records you want to evaluate against. To make your models compatible with Leva, include the `Leva::Recordable` concern in your model:
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
32
|
+
````ruby
|
33
|
+
class TextContent < ApplicationRecord
|
34
|
+
include Leva::Recordable
|
35
|
+
|
36
|
+
# @return [String] The ground truth label for the record
|
37
|
+
def ground_truth
|
38
|
+
expected_label
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [Hash] A hash of attributes to be displayed in the dataset records index
|
42
|
+
def index_attributes
|
43
|
+
{
|
44
|
+
text: text,
|
45
|
+
expected_label: expected_label,
|
46
|
+
created_at: created_at.strftime('%Y-%m-%d %H:%M:%S')
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
# @return [Hash] A hash of attributes to be displayed in the dataset record show view
|
51
|
+
def show_attributes
|
52
|
+
{
|
53
|
+
text: text,
|
54
|
+
expected_label: expected_label,
|
55
|
+
created_at: created_at.strftime('%Y-%m-%d %H:%M:%S')
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [Hash] A hash of attributes to be displayed in the dataset record show view
|
60
|
+
def to_llm_context
|
61
|
+
{
|
62
|
+
text: text,
|
63
|
+
expected_label: expected_label,
|
64
|
+
created_at: created_at.strftime('%Y-%m-%d %H:%M:%S')
|
65
|
+
}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
dataset = Leva::Dataset.create(name: "Sentiment Analysis Dataset") dataset.add_record TextContent.create(text: "I love this product!", expected_label: "Positive") dataset.add_record TextContent.create(text: "Terrible experience", expected_label: "Negative") dataset.add_record TextContent.create(text: "It's ok", expected_label: "Neutral")
|
38
70
|
|
39
71
|
### 2. Implementing Runs
|
40
72
|
|
@@ -42,7 +74,7 @@ Create a run class to handle the execution of your inference logic:
|
|
42
74
|
|
43
75
|
```bash
|
44
76
|
rails generate leva:runner sentiment
|
45
|
-
|
77
|
+
````
|
46
78
|
|
47
79
|
```ruby
|
48
80
|
class SentimentRun < Leva::BaseRun
|
@@ -64,17 +96,17 @@ rails generate leva:eval sentiment_accuracy
|
|
64
96
|
|
65
97
|
```ruby
|
66
98
|
class SentimentAccuracyEval < Leva::BaseEval
|
67
|
-
def evaluate(prediction,
|
68
|
-
score = prediction ==
|
69
|
-
|
99
|
+
def evaluate(prediction, record)
|
100
|
+
score = prediction == record.expected_label ? 1.0 : 0.0
|
101
|
+
[score, record.expected_label]
|
70
102
|
end
|
71
103
|
end
|
72
104
|
|
73
105
|
class SentimentF1Eval < Leva::BaseEval
|
74
|
-
def evaluate(prediction,
|
106
|
+
def evaluate(prediction, record)
|
75
107
|
# Calculate F1 score
|
76
108
|
# ...
|
77
|
-
|
109
|
+
[f1_score, record.f1_score]
|
78
110
|
end
|
79
111
|
end
|
80
112
|
```
|
@@ -122,9 +154,9 @@ Leva.run_evaluation(experiment: experiment, run: run, evals: evals)
|
|
122
154
|
After the experiments are complete, analyze the results:
|
123
155
|
|
124
156
|
```ruby
|
125
|
-
experiment.evaluation_results.group_by(&:
|
157
|
+
experiment.evaluation_results.group_by(&:evaluator_class).each do |evaluator_class, results|
|
126
158
|
average_score = results.average(&:score)
|
127
|
-
puts "#{
|
159
|
+
puts "#{evaluator_class.capitalize} Average Score: #{average_score}"
|
128
160
|
end
|
129
161
|
```
|
130
162
|
|
@@ -139,13 +171,13 @@ Ensure you set up any required API keys or other configurations in your Rails cr
|
|
139
171
|
- `Leva`: Handles the process of running experiments.
|
140
172
|
- `Leva::BaseRun`: Base class for run implementations.
|
141
173
|
- `Leva::BaseEval`: Base class for eval implementations.
|
142
|
-
- `Leva::Result`: Represents the result of an evaluation.
|
143
174
|
|
144
175
|
### Models
|
145
176
|
|
146
177
|
- `Leva::Dataset`: Represents a collection of data to be evaluated.
|
147
178
|
- `Leva::DatasetRecord`: Represents individual records within a dataset.
|
148
179
|
- `Leva::Experiment`: Represents a single run of an evaluation on a dataset.
|
180
|
+
- `Leva::RunnerResult`: Stores the results of each run execution.
|
149
181
|
- `Leva::EvaluationResult`: Stores the results of each evaluation.
|
150
182
|
- `Leva::Prompt`: Represents a prompt for an LLM.
|
151
183
|
|
@@ -156,3 +188,7 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/kieran
|
|
156
188
|
## License
|
157
189
|
|
158
190
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
191
|
+
|
192
|
+
## Roadmap
|
193
|
+
|
194
|
+
- [ ] Parallelize evaluation
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Leva
|
2
|
+
class DatasetRecordsController < ApplicationController
|
3
|
+
before_action :set_dataset
|
4
|
+
|
5
|
+
# GET /datasets/:dataset_id/records
|
6
|
+
def index
|
7
|
+
@records = @dataset.dataset_records.includes(:recordable)
|
8
|
+
end
|
9
|
+
|
10
|
+
# GET /datasets/:dataset_id/records/:id
|
11
|
+
def show
|
12
|
+
@record = @dataset.dataset_records.find(params[:id])
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def set_dataset
|
18
|
+
@dataset = Dataset.find(params[:dataset_id])
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -13,6 +13,8 @@ module Leva
|
|
13
13
|
# GET /datasets/1
|
14
14
|
# @return [void]
|
15
15
|
def show
|
16
|
+
@experiments = @dataset.experiments
|
17
|
+
@new_experiment = Experiment.new(dataset: @dataset)
|
16
18
|
end
|
17
19
|
|
18
20
|
# GET /datasets/new
|
@@ -24,6 +26,7 @@ module Leva
|
|
24
26
|
# GET /datasets/1/edit
|
25
27
|
# @return [void]
|
26
28
|
def edit
|
29
|
+
# The @dataset is already set by the before_action
|
27
30
|
end
|
28
31
|
|
29
32
|
# POST /datasets
|
@@ -51,8 +54,12 @@ module Leva
|
|
51
54
|
# DELETE /datasets/1
|
52
55
|
# @return [void]
|
53
56
|
def destroy
|
54
|
-
@dataset.
|
55
|
-
|
57
|
+
if @dataset.dataset_records.any?
|
58
|
+
redirect_to @dataset, alert: 'Cannot delete dataset with existing records.'
|
59
|
+
else
|
60
|
+
@dataset.destroy
|
61
|
+
redirect_to datasets_url, notice: 'Dataset was successfully destroyed.'
|
62
|
+
end
|
56
63
|
end
|
57
64
|
|
58
65
|
private
|
@@ -2,7 +2,11 @@
|
|
2
2
|
|
3
3
|
module Leva
|
4
4
|
class ExperimentsController < ApplicationController
|
5
|
-
|
5
|
+
include ApplicationHelper
|
6
|
+
|
7
|
+
before_action :set_experiment, only: [:show, :edit, :update]
|
8
|
+
before_action :check_editable, only: [:edit, :update]
|
9
|
+
before_action :load_runners_and_evaluators, only: [:new, :edit, :create, :update]
|
6
10
|
|
7
11
|
# GET /experiments
|
8
12
|
# @return [void]
|
@@ -13,17 +17,19 @@ module Leva
|
|
13
17
|
# GET /experiments/1
|
14
18
|
# @return [void]
|
15
19
|
def show
|
20
|
+
@experiment = Experiment.includes(runner_results: :evaluation_results).find(params[:id])
|
16
21
|
end
|
17
22
|
|
18
23
|
# GET /experiments/new
|
19
24
|
# @return [void]
|
20
25
|
def new
|
21
|
-
@experiment = Experiment.new
|
26
|
+
@experiment = Experiment.new(dataset_id: params[:dataset_id])
|
22
27
|
end
|
23
28
|
|
24
29
|
# GET /experiments/1/edit
|
25
30
|
# @return [void]
|
26
31
|
def edit
|
32
|
+
# The @experiment is already set by the before_action
|
27
33
|
end
|
28
34
|
|
29
35
|
# POST /experiments
|
@@ -32,8 +38,8 @@ module Leva
|
|
32
38
|
@experiment = Experiment.new(experiment_params)
|
33
39
|
|
34
40
|
if @experiment.save
|
35
|
-
ExperimentJob.perform_later(@experiment)
|
36
|
-
redirect_to @experiment, notice: 'Experiment was successfully created.'
|
41
|
+
ExperimentJob.perform_later(@experiment) unless @experiment.completed?
|
42
|
+
redirect_to @experiment, notice: 'Experiment was successfully created and is now running.'
|
37
43
|
else
|
38
44
|
render :new
|
39
45
|
end
|
@@ -49,11 +55,21 @@ module Leva
|
|
49
55
|
end
|
50
56
|
end
|
51
57
|
|
52
|
-
#
|
58
|
+
# POST /experiments/1/rerun
|
53
59
|
# @return [void]
|
54
|
-
def
|
55
|
-
@experiment.
|
56
|
-
|
60
|
+
def rerun
|
61
|
+
@experiment = Experiment.find(params[:id])
|
62
|
+
|
63
|
+
# Delete existing runner results and evaluation results
|
64
|
+
@experiment.runner_results.destroy_all
|
65
|
+
|
66
|
+
# Reset experiment status to pending
|
67
|
+
@experiment.update(status: :pending)
|
68
|
+
|
69
|
+
# Queue the job again
|
70
|
+
ExperimentJob.perform_later(@experiment)
|
71
|
+
|
72
|
+
redirect_to @experiment, notice: 'Experiment has been reset and is now running again.'
|
57
73
|
end
|
58
74
|
|
59
75
|
private
|
@@ -67,7 +83,16 @@ module Leva
|
|
67
83
|
# Only allow a list of trusted parameters through.
|
68
84
|
# @return [ActionController::Parameters]
|
69
85
|
def experiment_params
|
70
|
-
params.require(:experiment).permit(:name, :description, :dataset_id)
|
86
|
+
params.require(:experiment).permit(:name, :description, :dataset_id, :prompt_id, :runner_class, evaluator_classes: [])
|
87
|
+
end
|
88
|
+
|
89
|
+
def load_runners_and_evaluators
|
90
|
+
@runners = load_runners
|
91
|
+
@evaluators = load_evaluators
|
92
|
+
end
|
93
|
+
|
94
|
+
def check_editable
|
95
|
+
redirect_to @experiment, alert: 'Completed experiments cannot be edited.' if @experiment.completed?
|
71
96
|
end
|
72
97
|
end
|
73
98
|
end
|
@@ -2,39 +2,112 @@
|
|
2
2
|
|
3
3
|
module Leva
|
4
4
|
class WorkbenchController < ApplicationController
|
5
|
+
include ApplicationHelper
|
6
|
+
|
7
|
+
before_action :set_prompt, only: [:index, :edit, :update, :run, :run_all_evals, :run_evaluator]
|
8
|
+
before_action :set_dataset_record, only: [:index, :run, :run_all_evals, :run_evaluator]
|
9
|
+
before_action :set_runner_result, only: [:index, :run_all_evals, :run_evaluator]
|
10
|
+
|
5
11
|
# GET /workbench
|
6
12
|
# @return [void]
|
7
13
|
def index
|
8
14
|
@prompts = Prompt.all
|
9
|
-
@selected_prompt =
|
10
|
-
@evaluators =
|
15
|
+
@selected_prompt = @prompt || Prompt.first
|
16
|
+
@evaluators = load_evaluators
|
17
|
+
@runners = load_runners
|
18
|
+
@selected_runner = params[:runner] || @runners.first&.name
|
19
|
+
@selected_dataset_record = params[:dataset_record_id] || DatasetRecord.first&.id
|
11
20
|
end
|
12
21
|
|
13
22
|
# GET /workbench/new
|
14
23
|
# @return [void]
|
15
24
|
def new
|
16
|
-
@
|
25
|
+
@prompt = Prompt.new
|
26
|
+
@predefined_prompts = load_predefined_prompts
|
27
|
+
end
|
28
|
+
|
29
|
+
# POST /workbench
|
30
|
+
# @return [void]
|
31
|
+
def create
|
32
|
+
@prompt = Prompt.new(prompt_params)
|
33
|
+
if @prompt.save
|
34
|
+
redirect_to workbench_index_path(prompt_id: @prompt.id), notice: 'Prompt was successfully created.'
|
35
|
+
else
|
36
|
+
render :new
|
37
|
+
end
|
17
38
|
end
|
18
39
|
|
19
40
|
# GET /workbench/1
|
20
41
|
# @return [void]
|
21
|
-
def
|
22
|
-
|
42
|
+
def edit
|
43
|
+
end
|
44
|
+
|
45
|
+
# PATCH/PUT /workbench/1
|
46
|
+
# @return [void]
|
47
|
+
def update
|
48
|
+
@prompt = Prompt.find(params[:id])
|
49
|
+
if @prompt.update(prompt_params)
|
50
|
+
render json: { status: 'success', message: 'Prompt updated successfully' }
|
51
|
+
else
|
52
|
+
render json: { status: 'error', errors: @prompt.errors.full_messages }, status: :unprocessable_entity
|
53
|
+
end
|
23
54
|
end
|
24
55
|
|
25
56
|
def run
|
26
|
-
|
27
|
-
|
57
|
+
return redirect_to workbench_index_path, alert: 'Please select a record and a runner' unless @dataset_record && run_params[:runner]
|
58
|
+
|
59
|
+
runner_class = run_params[:runner].constantize
|
60
|
+
return redirect_to workbench_index_path, alert: 'Invalid runner selected' unless runner_class < Leva::BaseRun
|
61
|
+
|
62
|
+
runner = runner_class.new
|
63
|
+
runner_result = runner.execute_and_store(nil, @dataset_record, @prompt)
|
64
|
+
|
65
|
+
redirect_to workbench_index_path(prompt_id: @prompt.id, dataset_record_id: @dataset_record.id, runner: run_params[:runner]), notice: 'Run completed successfully'
|
28
66
|
end
|
29
67
|
|
30
|
-
def
|
31
|
-
|
32
|
-
|
68
|
+
def run_all_evals
|
69
|
+
return redirect_to workbench_index_path, alert: 'No runner result available' unless @runner_result
|
70
|
+
|
71
|
+
load_evaluators.each do |evaluator_class|
|
72
|
+
evaluator = evaluator_class.new
|
73
|
+
evaluator.evaluate_and_store(nil, @runner_result)
|
74
|
+
end
|
75
|
+
|
76
|
+
redirect_to workbench_index_path(prompt_id: @prompt.id, dataset_record_id: @dataset_record.id, runner: params[:runner]), notice: 'All evaluations completed successfully'
|
33
77
|
end
|
34
78
|
|
35
79
|
def run_evaluator
|
36
|
-
|
37
|
-
|
80
|
+
return redirect_to workbench_index_path, alert: 'No runner result available' unless @runner_result
|
81
|
+
|
82
|
+
evaluator_class = params[:evaluator].constantize
|
83
|
+
return redirect_to workbench_index_path, alert: 'Invalid evaluator selected' unless evaluator_class < Leva::BaseEval
|
84
|
+
|
85
|
+
evaluator = evaluator_class.new
|
86
|
+
evaluator.evaluate_and_store(nil, @runner_result)
|
87
|
+
|
88
|
+
redirect_to workbench_index_path(prompt_id: @prompt.id, dataset_record_id: @dataset_record.id, runner: params[:runner]), notice: 'Evaluator run successfully'
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def set_prompt
|
94
|
+
@prompt = params[:prompt_id] ? Prompt.find(params[:prompt_id]) : Prompt.first
|
95
|
+
end
|
96
|
+
|
97
|
+
def prompt_params
|
98
|
+
params.require(:prompt).permit(:name, :system_prompt, :user_prompt, :version)
|
99
|
+
end
|
100
|
+
|
101
|
+
def set_dataset_record
|
102
|
+
@dataset_record = DatasetRecord.find_by(id: params[:dataset_record_id]) || DatasetRecord.first
|
103
|
+
end
|
104
|
+
|
105
|
+
def run_params
|
106
|
+
params.permit(:runner, :prompt_id, :dataset_record_id)
|
107
|
+
end
|
108
|
+
|
109
|
+
def set_runner_result
|
110
|
+
@runner_result = @dataset_record.runner_results.last if @dataset_record
|
38
111
|
end
|
39
112
|
end
|
40
113
|
end
|
@@ -1,4 +1,43 @@
|
|
1
1
|
module Leva
|
2
2
|
module ApplicationHelper
|
3
|
+
# Loads all evaluator classes that inherit from Leva::BaseEval
|
4
|
+
#
|
5
|
+
# @return [Array<Class>] An array of evaluator classes
|
6
|
+
def load_evaluators
|
7
|
+
load_classes_from_directory('app/evals', Leva::BaseEval) || []
|
8
|
+
end
|
9
|
+
|
10
|
+
# Loads all runner classes that inherit from Leva::BaseRun
|
11
|
+
#
|
12
|
+
# @return [Array<Class>] An array of runner classes
|
13
|
+
def load_runners
|
14
|
+
load_classes_from_directory('app/runners', Leva::BaseRun) || []
|
15
|
+
end
|
16
|
+
|
17
|
+
# Loads predefined prompts from markdown files
|
18
|
+
#
|
19
|
+
# @return [Array<Array<String, String>>] An array of prompt name and content pairs
|
20
|
+
def load_predefined_prompts
|
21
|
+
prompts = Dir.glob(Rails.root.join('app', 'prompts', '*.md')).map do |file|
|
22
|
+
name = File.basename(file, '.md').titleize
|
23
|
+
content = File.read(file)
|
24
|
+
[name, content]
|
25
|
+
end
|
26
|
+
prompts
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
# Loads classes from a specified directory that inherit from a given base class
|
32
|
+
#
|
33
|
+
# @param directory [String] The directory path to load classes from
|
34
|
+
# @param base_class [Class] The base class that loaded classes should inherit from
|
35
|
+
# @return [Array<Class>] An array of loaded classes
|
36
|
+
def load_classes_from_directory(directory, base_class)
|
37
|
+
classes = Dir[Rails.root.join(directory, '*.rb')].map do |file|
|
38
|
+
File.basename(file, '.rb').camelize.constantize
|
39
|
+
end.select { |klass| klass < base_class }
|
40
|
+
classes.empty? ? [] : classes
|
41
|
+
end
|
3
42
|
end
|
4
43
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
import { Controller } from "@hotwired/stimulus";
|
2
|
+
|
3
|
+
export default class extends Controller {
|
4
|
+
static targets = ["form"];
|
5
|
+
|
6
|
+
autoSave() {
|
7
|
+
clearTimeout(this.timeout);
|
8
|
+
this.timeout = setTimeout(() => {
|
9
|
+
this.submitForm();
|
10
|
+
}, 500);
|
11
|
+
}
|
12
|
+
|
13
|
+
submitForm() {
|
14
|
+
const form = this.element;
|
15
|
+
const formData = new FormData(form);
|
16
|
+
|
17
|
+
fetch(form.action, {
|
18
|
+
method: form.method,
|
19
|
+
body: formData,
|
20
|
+
headers: {
|
21
|
+
Accept: "application/json",
|
22
|
+
"X-CSRF-Token": document.querySelector('meta[name="csrf-token"]').content,
|
23
|
+
},
|
24
|
+
})
|
25
|
+
.then((response) => response.json())
|
26
|
+
.then((data) => {
|
27
|
+
const statusElement = document.getElementById("form-status");
|
28
|
+
if (data.status === "success") {
|
29
|
+
statusElement.textContent = "Changes saved successfully";
|
30
|
+
statusElement.classList.add("text-green-500");
|
31
|
+
statusElement.classList.remove("text-red-500");
|
32
|
+
} else {
|
33
|
+
statusElement.textContent = `Error: ${data.errors.join(", ")}`;
|
34
|
+
statusElement.classList.add("text-red-500");
|
35
|
+
statusElement.classList.remove("text-green-500");
|
36
|
+
}
|
37
|
+
setTimeout(() => {
|
38
|
+
statusElement.textContent = "";
|
39
|
+
}, 3000);
|
40
|
+
})
|
41
|
+
.catch((error) => {
|
42
|
+
console.error("Error:", error);
|
43
|
+
});
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
import { Controller } from "@hotwired/stimulus";
|
2
|
+
|
3
|
+
export default class extends Controller {
|
4
|
+
static targets = ["userPromptField"];
|
5
|
+
|
6
|
+
toggleUserPrompt(event) {
|
7
|
+
const selectedFile = event.target.value;
|
8
|
+
if (selectedFile) {
|
9
|
+
this.userPromptFieldTarget.style.display = "none";
|
10
|
+
this.loadPredefinedPrompt(selectedFile);
|
11
|
+
} else {
|
12
|
+
this.userPromptFieldTarget.style.display = "block";
|
13
|
+
this.clearUserPrompt();
|
14
|
+
}
|
15
|
+
}
|
16
|
+
|
17
|
+
loadPredefinedPrompt(file) {
|
18
|
+
fetch(file)
|
19
|
+
.then((response) => response.text())
|
20
|
+
.then((content) => {
|
21
|
+
const userPromptTextarea = this.userPromptFieldTarget.querySelector("textarea");
|
22
|
+
userPromptTextarea.value = content;
|
23
|
+
})
|
24
|
+
.catch((error) => console.error("Error loading predefined prompt:", error));
|
25
|
+
}
|
26
|
+
|
27
|
+
clearUserPrompt() {
|
28
|
+
const userPromptTextarea = this.userPromptFieldTarget.querySelector("textarea");
|
29
|
+
userPromptTextarea.value = "";
|
30
|
+
}
|
31
|
+
}
|
@@ -4,13 +4,18 @@ module Leva
|
|
4
4
|
class ExperimentJob < ApplicationJob
|
5
5
|
queue_as :default
|
6
6
|
|
7
|
-
# Perform the experiment
|
7
|
+
# Perform the experiment by scheduling all dataset records for evaluation
|
8
8
|
#
|
9
9
|
# @param experiment [Experiment] The experiment to run
|
10
10
|
# @return [void]
|
11
|
-
def perform(
|
12
|
-
|
13
|
-
|
11
|
+
def perform(experiment)
|
12
|
+
return if experiment.completed? || experiment.running?
|
13
|
+
|
14
|
+
experiment.update!(status: :running)
|
15
|
+
|
16
|
+
experiment.dataset.dataset_records.each_with_index do |record, index|
|
17
|
+
RunEvalJob.set(wait: 3.seconds * index).perform_later(experiment.id, record.id)
|
18
|
+
end
|
14
19
|
end
|
15
20
|
end
|
16
21
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Leva
|
4
|
+
class RunEvalJob < ApplicationJob
|
5
|
+
queue_as :default
|
6
|
+
|
7
|
+
# Perform a single run and evaluation for a dataset record
|
8
|
+
#
|
9
|
+
# @param experiment_id [Integer] The ID of the experiment
|
10
|
+
# @param dataset_record_id [Integer] The ID of the dataset record
|
11
|
+
# @return [void]
|
12
|
+
def perform(experiment_id, dataset_record_id)
|
13
|
+
experiment = Experiment.find(experiment_id)
|
14
|
+
dataset_record = DatasetRecord.find(dataset_record_id)
|
15
|
+
|
16
|
+
run = constantize_class(experiment.runner_class).new
|
17
|
+
evals = experiment.evaluator_classes.compact.reject(&:empty?).map { |klass| constantize_class(klass).new }
|
18
|
+
|
19
|
+
Leva.run_single_evaluation(experiment: experiment, run: run, evals: evals, dataset_record: dataset_record)
|
20
|
+
|
21
|
+
experiment.update!(status: :completed) if is_last(experiment)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def constantize_class(class_name)
|
27
|
+
class_name.constantize
|
28
|
+
rescue NameError => e
|
29
|
+
raise NameError, "Invalid class name: #{class_name}. Error: #{e.message}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# Check if all dataset records for the experiment have a runner result
|
33
|
+
#
|
34
|
+
# @param experiment [Experiment] The experiment to check
|
35
|
+
# @return [Boolean] True if all dataset records have a runner result, false otherwise
|
36
|
+
def is_last(experiment)
|
37
|
+
experiment.dataset.dataset_records.count == experiment.runner_results.count
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Leva
|
2
|
+
module Recordable
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
has_many :dataset_records, as: :recordable, class_name: 'Leva::DatasetRecord', dependent: :destroy
|
7
|
+
has_many :datasets, through: :dataset_records, class_name: 'Leva::Dataset'
|
8
|
+
has_many :runner_results, through: :dataset_records, class_name: 'Leva::RunnerResult'
|
9
|
+
has_many :evaluation_results, through: :runner_results, class_name: 'Leva::EvaluationResult'
|
10
|
+
end
|
11
|
+
|
12
|
+
# @return [String] The ground truth label for the record
|
13
|
+
def ground_truth
|
14
|
+
raise NotImplementedError, "#{self.class} must implement #ground_truth"
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [Hash] A hash of attributes to be displayed in the dataset records index
|
18
|
+
def index_attributes
|
19
|
+
raise NotImplementedError, "#{self.class} must implement #index_attributes"
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [Hash] A hash of attributes to be displayed in the dataset record show view
|
23
|
+
def show_attributes
|
24
|
+
raise NotImplementedError, "#{self.class} must implement #show_attributes"
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Hash] A hash of attributes to be liquified for LLM context
|
28
|
+
def to_llm_context
|
29
|
+
raise NotImplementedError, "#{self.class} must implement #to_llm_context"
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [Regexp] A regex pattern to extract the contents of a LLM response
|
33
|
+
def extract_regex_pattern
|
34
|
+
false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/app/models/leva/dataset.rb
CHANGED
@@ -2,18 +2,27 @@
|
|
2
2
|
#
|
3
3
|
# Table name: leva_datasets
|
4
4
|
#
|
5
|
-
# id
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
5
|
+
# id :integer not null, primary key
|
6
|
+
# description :text
|
7
|
+
# name :string
|
8
|
+
# created_at :datetime not null
|
9
|
+
# updated_at :datetime not null
|
9
10
|
#
|
10
11
|
module Leva
|
11
12
|
class Dataset < ApplicationRecord
|
12
13
|
has_many :dataset_records, dependent: :destroy
|
13
14
|
has_many :experiments, dependent: :destroy
|
14
15
|
|
16
|
+
validates :name, presence: true
|
17
|
+
|
18
|
+
# Adds a record to the dataset if it doesn't already exist
|
19
|
+
#
|
20
|
+
# @param record [ActiveRecord::Base] The record to be added to the dataset
|
21
|
+
# @return [Leva::DatasetRecord, nil] The created dataset record or nil if it already exists
|
15
22
|
def add_record(record)
|
16
|
-
dataset_records.
|
23
|
+
dataset_records.find_or_create_by(recordable: record) do |dr|
|
24
|
+
dr.recordable = record
|
25
|
+
end
|
17
26
|
end
|
18
27
|
end
|
19
|
-
end
|
28
|
+
end
|