leva 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +63 -64
- data/app/controllers/leva/application_controller.rb +4 -1
- data/app/controllers/leva/datasets_controller.rb +72 -0
- data/app/controllers/leva/experiments_controller.rb +73 -0
- data/app/controllers/leva/workbench_controller.rb +40 -0
- data/app/evals/test_sentiment_accuracy_eval.rb +6 -0
- data/app/jobs/leva/experiment_job.rb +16 -0
- data/app/models/leva/dataset.rb +4 -1
- data/app/models/leva/dataset_record.rb +1 -1
- data/app/models/leva/evaluation_result.rb +4 -2
- data/app/models/leva/experiment.rb +6 -4
- data/app/runners/test_sentiment_run.rb +13 -0
- data/app/views/layouts/leva/application.html.erb +29 -16
- data/app/views/leva/datasets/index.html.erb +43 -0
- data/app/views/leva/datasets/show.html.erb +13 -0
- data/app/views/leva/experiments/index.html.erb +44 -0
- data/app/views/leva/experiments/show.html.erb +24 -0
- data/app/views/leva/workbench/index.html.erb +101 -0
- data/app/views/leva/workbench/new.html.erb +38 -0
- data/config/routes.rb +11 -1
- data/db/migrate/20240813173222_create_leva_experiments.rb +1 -1
- data/lib/generators/leva/eval_generator.rb +23 -0
- data/lib/generators/leva/runner_generator.rb +23 -0
- data/lib/generators/leva/templates/eval.rb.erb +15 -0
- data/lib/generators/leva/templates/runner.rb.erb +11 -0
- data/lib/leva/base_eval.rb +75 -0
- data/lib/leva/version.rb +1 -1
- data/lib/leva.rb +90 -2
- metadata +19 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a1b17552bf84bd7721ee48cd83caf82f199d2b79218296f153015e144211356
|
4
|
+
data.tar.gz: eab228d96d36c3afce52a403d728ff93f6729730cf7cbef35411958443b6327c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e2a98bde71873044ceca0be67213b2fe4d0547eb38430ea142f3425e57c6b1b14adad5644d6c747662e21587e29ca65a9205c2a85b2f14ca67752eda87fa607
|
7
|
+
data.tar.gz: dedfbe490f7ded9daf6af1d43e0c5611c4020beb5a9150d949e31ed4b32d56f358c4a15bde9e43b96c64ee9bffd50819a47343d0181bc415c1e3d04e20083610
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Leva - Flexible Evaluation Framework for Language Models
|
2
2
|
|
3
|
-
Leva is a Ruby on Rails framework for evaluating Language Models (LLMs) using ActiveRecord datasets. It provides a flexible structure for creating experiments, managing datasets, and implementing various evaluation logic.
|
3
|
+
Leva is a Ruby on Rails framework for evaluating Language Models (LLMs) using ActiveRecord datasets on production models. It provides a flexible structure for creating experiments, managing datasets, and implementing various evaluation logic on production data with security in mind.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -20,120 +20,119 @@ $ bundle install
|
|
20
20
|
|
21
21
|
### 1. Setting up Datasets
|
22
22
|
|
23
|
-
First, create a dataset and add any ActiveRecord records:
|
23
|
+
First, create a dataset and add any ActiveRecord records you want to evaluate against:
|
24
24
|
|
25
25
|
```ruby
|
26
|
-
dataset = Dataset.create(name: "Sentiment Analysis Dataset")
|
27
|
-
|
28
|
-
dataset.
|
29
|
-
dataset.
|
30
|
-
dataset.records << TextContent.create(text: "I's ok", expected_label: "Neutral")
|
26
|
+
dataset = Leva::Dataset.create(name: "Sentiment Analysis Dataset")
|
27
|
+
dataset.add_record TextContent.create(text: "I love this product!", expected_label: "Positive")
|
28
|
+
dataset.add_record TextContent.create(text: "Terrible experience", expected_label: "Negative")
|
29
|
+
dataset.add_record TextContent.create(text: "I's ok", expected_label: "Neutral")
|
31
30
|
```
|
32
31
|
|
33
|
-
|
34
|
-
|
35
|
-
### 2. Implementing Evals
|
32
|
+
### 2. Implementing Runs
|
36
33
|
|
37
|
-
Create
|
34
|
+
Create a run class to handle the execution of your inference logic:
|
38
35
|
|
39
36
|
```bash
|
40
|
-
|
37
|
+
rails generate leva:runner sentiment
|
41
38
|
```
|
42
39
|
|
43
|
-
#### Sentiment Evaluation (app/evals/sentiment_eval.rb)
|
44
|
-
|
45
40
|
```ruby
|
46
|
-
class
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
score = calculate_score(prediction, record.expected_label)
|
52
|
-
|
53
|
-
Leva::Result.new(
|
54
|
-
label: 'sentiment',
|
55
|
-
prediction: prediction,
|
56
|
-
score: score
|
57
|
-
)
|
41
|
+
class SentimentRun < Leva::BaseRun
|
42
|
+
def execute(record)
|
43
|
+
# Your model execution logic here
|
44
|
+
# This could involve calling an API, running a local model, etc.
|
45
|
+
# Return the model's output
|
58
46
|
end
|
47
|
+
end
|
48
|
+
```
|
49
|
+
|
50
|
+
### 3. Implementing Evals
|
51
|
+
|
52
|
+
Create one or more eval classes to evaluate the model's output:
|
53
|
+
|
54
|
+
```bash
|
55
|
+
rails generate leva:eval sentiment_accuracy
|
56
|
+
```
|
59
57
|
|
60
|
-
|
61
|
-
|
62
|
-
def
|
63
|
-
|
64
|
-
|
65
|
-
if text.include?('love')
|
66
|
-
'Positive'
|
67
|
-
elsif text.include?('terrible')
|
68
|
-
'Negative'
|
69
|
-
else
|
70
|
-
'Neutral'
|
71
|
-
end
|
58
|
+
```ruby
|
59
|
+
class SentimentAccuracyEval < Leva::BaseEval
|
60
|
+
def evaluate(prediction, expected)
|
61
|
+
score = prediction == expected ? 1.0 : 0.0
|
62
|
+
Leva::Result.new(label: 'sentiment_accuracy', score: score)
|
72
63
|
end
|
64
|
+
end
|
73
65
|
|
74
|
-
|
75
|
-
|
66
|
+
class SentimentF1Eval < Leva::BaseEval
|
67
|
+
def evaluate(prediction, expected)
|
68
|
+
# Calculate F1 score
|
69
|
+
# ...
|
70
|
+
Leva::Result.new(label: 'sentiment_f1', score: f1_score)
|
76
71
|
end
|
77
72
|
end
|
78
73
|
```
|
79
74
|
|
80
|
-
###
|
75
|
+
### 4. Running Experiments
|
81
76
|
|
82
|
-
You can run experiments with different evals:
|
77
|
+
You can run experiments with different runs and evals:
|
83
78
|
|
84
79
|
```ruby
|
85
|
-
|
86
|
-
|
80
|
+
experiment = Leva::Experiment.create!(name: "Sentiment Analysis", dataset: dataset)
|
81
|
+
|
82
|
+
run = SentimentRun.new
|
83
|
+
evals = [SentimentAccuracyEval.new, SentimentF1Eval.new]
|
84
|
+
|
85
|
+
Leva.run_evaluation(experiment: experiment, run: run, evals: evals)
|
87
86
|
```
|
88
87
|
|
89
|
-
|
88
|
+
### 5. Using Prompts
|
89
|
+
|
90
|
+
You can also use prompts with your runs:
|
90
91
|
|
91
92
|
```ruby
|
92
93
|
prompt = Leva::Prompt.create!(
|
93
94
|
name: "Sentiment Analysis",
|
94
95
|
version: 1,
|
95
96
|
system_prompt: "You are an expert at analyzing text and returning the sentiment.",
|
96
|
-
user_prompt: "Please analyze the following text and return the sentiment as Positive, Negative, or Neutral
|
97
|
-
metadata: {
|
98
|
-
model: "gpt-4o",
|
99
|
-
temperature: 0.5
|
100
|
-
}
|
97
|
+
user_prompt: "Please analyze the following text and return the sentiment as Positive, Negative, or Neutral.\n\n{{TEXT}}",
|
98
|
+
metadata: { model: "gpt-4", temperature: 0.5 }
|
101
99
|
)
|
102
100
|
|
103
|
-
|
101
|
+
experiment = Leva::Experiment.create!(
|
104
102
|
name: "Sentiment Analysis with LLM",
|
105
103
|
dataset: dataset,
|
106
104
|
prompt: prompt
|
107
105
|
)
|
108
106
|
|
109
|
-
|
107
|
+
run = SentimentRun.new
|
108
|
+
evals = [SentimentAccuracyEval.new, SentimentF1Eval.new]
|
109
|
+
|
110
|
+
Leva.run_evaluation(experiment: experiment, run: run, evals: evals)
|
110
111
|
```
|
111
112
|
|
112
|
-
###
|
113
|
+
### 6. Analyzing Results
|
113
114
|
|
114
115
|
After the experiments are complete, analyze the results:
|
115
116
|
|
116
117
|
```ruby
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
puts "Experiment: #{experiment.name}"
|
123
|
-
puts "Average Score: #{average_score}"
|
124
|
-
puts "Number of Evaluations: #{count}"
|
118
|
+
experiment.evaluation_results.group_by(&:label).each do |label, results|
|
119
|
+
average_score = results.average(&:score)
|
120
|
+
puts "#{label.capitalize} Average Score: #{average_score}"
|
121
|
+
end
|
125
122
|
```
|
126
123
|
|
127
124
|
## Configuration
|
128
125
|
|
129
|
-
|
126
|
+
Ensure you set up any required API keys or other configurations in your Rails credentials or environment variables.
|
130
127
|
|
131
128
|
## Leva's Components
|
132
129
|
|
133
130
|
### Classes
|
134
131
|
|
135
|
-
- `Leva
|
136
|
-
- `Leva::
|
132
|
+
- `Leva`: Handles the process of running experiments.
|
133
|
+
- `Leva::BaseRun`: Base class for run implementations.
|
134
|
+
- `Leva::BaseEval`: Base class for eval implementations.
|
135
|
+
- `Leva::Result`: Represents the result of an evaluation.
|
137
136
|
|
138
137
|
### Models
|
139
138
|
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Leva
|
4
|
+
class DatasetsController < ApplicationController
|
5
|
+
before_action :set_dataset, only: [:show, :edit, :update, :destroy]
|
6
|
+
|
7
|
+
# GET /datasets
|
8
|
+
# @return [void]
|
9
|
+
def index
|
10
|
+
@datasets = Dataset.all
|
11
|
+
end
|
12
|
+
|
13
|
+
# GET /datasets/1
|
14
|
+
# @return [void]
|
15
|
+
def show
|
16
|
+
end
|
17
|
+
|
18
|
+
# GET /datasets/new
|
19
|
+
# @return [void]
|
20
|
+
def new
|
21
|
+
@dataset = Dataset.new
|
22
|
+
end
|
23
|
+
|
24
|
+
# GET /datasets/1/edit
|
25
|
+
# @return [void]
|
26
|
+
def edit
|
27
|
+
end
|
28
|
+
|
29
|
+
# POST /datasets
|
30
|
+
# @return [void]
|
31
|
+
def create
|
32
|
+
@dataset = Dataset.new(dataset_params)
|
33
|
+
|
34
|
+
if @dataset.save
|
35
|
+
redirect_to @dataset, notice: 'Dataset was successfully created.'
|
36
|
+
else
|
37
|
+
render :new
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# PATCH/PUT /datasets/1
|
42
|
+
# @return [void]
|
43
|
+
def update
|
44
|
+
if @dataset.update(dataset_params)
|
45
|
+
redirect_to @dataset, notice: 'Dataset was successfully updated.'
|
46
|
+
else
|
47
|
+
render :edit
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# DELETE /datasets/1
|
52
|
+
# @return [void]
|
53
|
+
def destroy
|
54
|
+
@dataset.destroy
|
55
|
+
redirect_to datasets_url, notice: 'Dataset was successfully destroyed.'
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# Use callbacks to share common setup or constraints between actions.
|
61
|
+
# @return [void]
|
62
|
+
def set_dataset
|
63
|
+
@dataset = Dataset.find(params[:id])
|
64
|
+
end
|
65
|
+
|
66
|
+
# Only allow a list of trusted parameters through.
|
67
|
+
# @return [ActionController::Parameters]
|
68
|
+
def dataset_params
|
69
|
+
params.require(:dataset).permit(:name, :description)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Leva
|
4
|
+
class ExperimentsController < ApplicationController
|
5
|
+
before_action :set_experiment, only: [:show, :edit, :update, :destroy]
|
6
|
+
|
7
|
+
# GET /experiments
|
8
|
+
# @return [void]
|
9
|
+
def index
|
10
|
+
@experiments = Experiment.all
|
11
|
+
end
|
12
|
+
|
13
|
+
# GET /experiments/1
|
14
|
+
# @return [void]
|
15
|
+
def show
|
16
|
+
end
|
17
|
+
|
18
|
+
# GET /experiments/new
|
19
|
+
# @return [void]
|
20
|
+
def new
|
21
|
+
@experiment = Experiment.new
|
22
|
+
end
|
23
|
+
|
24
|
+
# GET /experiments/1/edit
|
25
|
+
# @return [void]
|
26
|
+
def edit
|
27
|
+
end
|
28
|
+
|
29
|
+
# POST /experiments
|
30
|
+
# @return [void]
|
31
|
+
def create
|
32
|
+
@experiment = Experiment.new(experiment_params)
|
33
|
+
|
34
|
+
if @experiment.save
|
35
|
+
ExperimentJob.perform_later(@experiment)
|
36
|
+
redirect_to @experiment, notice: 'Experiment was successfully created.'
|
37
|
+
else
|
38
|
+
render :new
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# PATCH/PUT /experiments/1
|
43
|
+
# @return [void]
|
44
|
+
def update
|
45
|
+
if @experiment.update(experiment_params)
|
46
|
+
redirect_to @experiment, notice: 'Experiment was successfully updated.'
|
47
|
+
else
|
48
|
+
render :edit
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# DELETE /experiments/1
|
53
|
+
# @return [void]
|
54
|
+
def destroy
|
55
|
+
@experiment.destroy
|
56
|
+
redirect_to experiments_url, notice: 'Experiment was successfully destroyed.'
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
# Use callbacks to share common setup or constraints between actions.
|
62
|
+
# @return [void]
|
63
|
+
def set_experiment
|
64
|
+
@experiment = Experiment.find(params[:id])
|
65
|
+
end
|
66
|
+
|
67
|
+
# Only allow a list of trusted parameters through.
|
68
|
+
# @return [ActionController::Parameters]
|
69
|
+
def experiment_params
|
70
|
+
params.require(:experiment).permit(:name, :description, :dataset_id)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Leva
|
4
|
+
class WorkbenchController < ApplicationController
|
5
|
+
# GET /workbench
|
6
|
+
# @return [void]
|
7
|
+
def index
|
8
|
+
@prompts = Prompt.all
|
9
|
+
@selected_prompt = Prompt.first || Prompt.create!(name: "Test Prompt", version: 1, system_prompt: "You are a helpful assistant.", user_prompt: "Hello, how can I help you today?")
|
10
|
+
@evaluators = ['Evaluator 1', 'Evaluator 2', 'Evaluator 3']
|
11
|
+
end
|
12
|
+
|
13
|
+
# GET /workbench/new
|
14
|
+
# @return [void]
|
15
|
+
def new
|
16
|
+
@experiment = Experiment.new
|
17
|
+
end
|
18
|
+
|
19
|
+
# GET /workbench/1
|
20
|
+
# @return [void]
|
21
|
+
def show
|
22
|
+
@experiment = Experiment.find(params[:id])
|
23
|
+
end
|
24
|
+
|
25
|
+
def run
|
26
|
+
# Implement the logic for running the prompt
|
27
|
+
redirect_to workbench_index_path, notice: 'Prompt run successfully'
|
28
|
+
end
|
29
|
+
|
30
|
+
def run_with_evaluation
|
31
|
+
# Implement the logic for running the prompt with evaluation
|
32
|
+
redirect_to workbench_index_path, notice: 'Prompt run with evaluation successfully'
|
33
|
+
end
|
34
|
+
|
35
|
+
def run_evaluator
|
36
|
+
# Implement the logic for running a single evaluator
|
37
|
+
redirect_to workbench_index_path, notice: 'Evaluator run successfully'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Leva
|
4
|
+
class ExperimentJob < ApplicationJob
|
5
|
+
queue_as :default
|
6
|
+
|
7
|
+
# Perform the experiment
|
8
|
+
#
|
9
|
+
# @param experiment [Experiment] The experiment to run
|
10
|
+
# @return [void]
|
11
|
+
def perform(eval, record)
|
12
|
+
result = eval.run_each(record)
|
13
|
+
eval.save_result(result)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/app/models/leva/dataset.rb
CHANGED
@@ -10,7 +10,10 @@
|
|
10
10
|
module Leva
|
11
11
|
class Dataset < ApplicationRecord
|
12
12
|
has_many :dataset_records, dependent: :destroy
|
13
|
-
has_many :records, through: :dataset_records, source: :recordable
|
14
13
|
has_many :experiments, dependent: :destroy
|
14
|
+
|
15
|
+
def add_record(record)
|
16
|
+
dataset_records.create(recordable: record)
|
17
|
+
end
|
15
18
|
end
|
16
19
|
end
|
@@ -18,12 +18,14 @@
|
|
18
18
|
#
|
19
19
|
# Foreign Keys
|
20
20
|
#
|
21
|
-
# dataset_record_id (dataset_record_id =>
|
22
|
-
# experiment_id (experiment_id =>
|
21
|
+
# dataset_record_id (dataset_record_id => leva_dataset_records.id)
|
22
|
+
# experiment_id (experiment_id => leva_experiments.id)
|
23
23
|
#
|
24
24
|
module Leva
|
25
25
|
class EvaluationResult < ApplicationRecord
|
26
26
|
belongs_to :experiment
|
27
27
|
belongs_to :dataset_record
|
28
|
+
|
29
|
+
delegate :record, to: :dataset_record, allow_nil: true
|
28
30
|
end
|
29
31
|
end
|
@@ -9,7 +9,7 @@
|
|
9
9
|
# created_at :datetime not null
|
10
10
|
# updated_at :datetime not null
|
11
11
|
# dataset_id :integer not null
|
12
|
-
# prompt_id :integer
|
12
|
+
# prompt_id :integer
|
13
13
|
#
|
14
14
|
# Indexes
|
15
15
|
#
|
@@ -18,12 +18,14 @@
|
|
18
18
|
#
|
19
19
|
# Foreign Keys
|
20
20
|
#
|
21
|
-
# dataset_id (dataset_id =>
|
22
|
-
# prompt_id (prompt_id =>
|
21
|
+
# dataset_id (dataset_id => leva_datasets.id)
|
22
|
+
# prompt_id (prompt_id => leva_prompts.id)
|
23
23
|
#
|
24
24
|
module Leva
|
25
25
|
class Experiment < ApplicationRecord
|
26
26
|
belongs_to :dataset
|
27
|
-
belongs_to :prompt
|
27
|
+
belongs_to :prompt, optional: true
|
28
|
+
|
29
|
+
has_many :evaluation_results, dependent: :destroy
|
28
30
|
end
|
29
31
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class TestSentimentRun < Leva::BaseRun
|
2
|
+
def execute(record)
|
3
|
+
# Simple sentiment analysis logic for testing
|
4
|
+
case record.content.downcase
|
5
|
+
when /love|great|excellent/
|
6
|
+
"Positive"
|
7
|
+
when /terrible|bad|awful/
|
8
|
+
"Negative"
|
9
|
+
else
|
10
|
+
"Neutral"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -1,17 +1,30 @@
|
|
1
1
|
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
<
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
2
|
+
<html lang="en" class="bg-gray-900">
|
3
|
+
<head>
|
4
|
+
<meta charset="UTF-8">
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
+
<title>Leva - <%= yield(:title) || 'AI Evaluation Engine' %></title>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
<%= csp_meta_tag %>
|
9
|
+
<script src="https://cdn.tailwindcss.com"></script>
|
10
|
+
</head>
|
11
|
+
<body class="bg-gray-900 text-white">
|
12
|
+
<nav class="bg-gray-800 border-b border-gray-700">
|
13
|
+
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
14
|
+
<div class="flex justify-between h-16">
|
15
|
+
<div class="flex">
|
16
|
+
<%= link_to 'Leva', leva.root_path, class: 'flex-shrink-0 flex items-center text-2xl font-bold text-indigo-400' %>
|
17
|
+
<div class="hidden sm:ml-6 sm:flex sm:space-x-8">
|
18
|
+
<%= link_to 'Workbench', leva.workbench_index_path, class: "border-transparent #{request.path.start_with?(leva.workbench_index_path) ? 'border-indigo-500 text-white' : 'text-gray-400 hover:border-gray-700 hover:text-gray-300'} inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium" %>
|
19
|
+
<%= link_to 'Datasets', leva.datasets_path, class: "border-transparent #{request.path.start_with?(leva.datasets_path) ? 'border-indigo-500 text-white' : 'text-gray-400 hover:border-gray-700 hover:text-gray-300'} inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium" %>
|
20
|
+
<%= link_to 'Experiments', leva.experiments_path, class: "border-transparent #{request.path.start_with?(leva.experiments_path) ? 'border-indigo-500 text-white' : 'text-gray-400 hover:border-gray-700 hover:text-gray-300'} inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium" %>
|
21
|
+
</div>
|
22
|
+
</div>
|
23
|
+
</div>
|
24
|
+
</div>
|
25
|
+
</nav>
|
26
|
+
<main class="bg-gray-900">
|
27
|
+
<%= yield %>
|
28
|
+
</main>
|
29
|
+
</body>
|
30
|
+
</html>
|
@@ -0,0 +1,43 @@
|
|
1
|
+
<% content_for :title, 'Datasets' %>
|
2
|
+
<div class="px-4 sm:px-6 lg:px-8">
|
3
|
+
<div class="sm:flex sm:items-center">
|
4
|
+
<div class="sm:flex-auto">
|
5
|
+
<h1 class="text-2xl font-semibold text-gray-900">Datasets</h1>
|
6
|
+
<p class="mt-2 text-sm text-gray-700">A list of all datasets in your account.</p>
|
7
|
+
</div>
|
8
|
+
<div class="mt-4 sm:mt-0 sm:ml-16 sm:flex-none">
|
9
|
+
<%= link_to 'Add Dataset', new_dataset_path, class: 'btn btn-primary' %>
|
10
|
+
</div>
|
11
|
+
</div>
|
12
|
+
<div class="mt-8 flex flex-col">
|
13
|
+
<div class="-my-2 -mx-4 overflow-x-auto sm:-mx-6 lg:-mx-8">
|
14
|
+
<div class="inline-block min-w-full py-2 align-middle md:px-6 lg:px-8">
|
15
|
+
<div class="overflow-hidden shadow ring-1 ring-black ring-opacity-5 md:rounded-lg">
|
16
|
+
<table class="min-w-full divide-y divide-gray-300">
|
17
|
+
<thead class="bg-gray-50">
|
18
|
+
<tr>
|
19
|
+
<th scope="col" class="py-3.5 pl-4 pr-3 text-left text-sm font-semibold text-gray-900 sm:pl-6">Name</th>
|
20
|
+
<th scope="col" class="px-3 py-3.5 text-left text-sm font-semibold text-gray-900">Description</th>
|
21
|
+
<th scope="col" class="relative py-3.5 pl-3 pr-4 sm:pr-6">
|
22
|
+
<span class="sr-only">Actions</span>
|
23
|
+
</th>
|
24
|
+
</tr>
|
25
|
+
</thead>
|
26
|
+
<tbody class="divide-y divide-gray-200 bg-white">
|
27
|
+
<% @datasets.each do |dataset| %>
|
28
|
+
<tr>
|
29
|
+
<td class="whitespace-nowrap py-4 pl-4 pr-3 text-sm font-medium text-gray-900 sm:pl-6"><%= dataset.name %></td>
|
30
|
+
<td class="whitespace-nowrap px-3 py-4 text-sm text-gray-500"><%= dataset.description %></td>
|
31
|
+
<td class="relative whitespace-nowrap py-4 pl-3 pr-4 text-right text-sm font-medium sm:pr-6">
|
32
|
+
<%= link_to 'View', dataset_path(dataset), class: 'text-indigo-600 hover:text-indigo-900' %>
|
33
|
+
<%= link_to 'Edit', edit_dataset_path(dataset), class: 'ml-4 text-indigo-600 hover:text-indigo-900' %>
|
34
|
+
</td>
|
35
|
+
</tr>
|
36
|
+
<% end %>
|
37
|
+
</tbody>
|
38
|
+
</table>
|
39
|
+
</div>
|
40
|
+
</div>
|
41
|
+
</div>
|
42
|
+
</div>
|
43
|
+
</div>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
<% content_for :title, @dataset.name %>
|
2
|
+
<div class="px-4 sm:px-6 lg:px-8">
|
3
|
+
<div class="sm:flex sm:items-center">
|
4
|
+
<div class="sm:flex-auto">
|
5
|
+
<h1 class="text-2xl font-semibold text-gray-900"><%= @dataset.name %></h1>
|
6
|
+
<p class="mt-2 text-sm text-gray-700"><%= @dataset.description %></p>
|
7
|
+
</div>
|
8
|
+
<div class="mt-4 sm:mt-0 sm:ml-16 sm:flex-none">
|
9
|
+
<%= link_to 'Edit Dataset', edit_dataset_path(@dataset), class: 'btn btn-primary' %>
|
10
|
+
</div>
|
11
|
+
</div>
|
12
|
+
<!-- Add more dataset details here -->
|
13
|
+
</div>
|
@@ -0,0 +1,44 @@
|
|
1
|
+
<% content_for :title, 'Experiments' %>
|
2
|
+
<div class="px-4 sm:px-6 lg:px-8">
|
3
|
+
<div class="sm:flex sm:items-center">
|
4
|
+
<div class="sm:flex-auto">
|
5
|
+
<h1 class="text-2xl font-semibold text-gray-900">Experiments</h1>
|
6
|
+
<p class="mt-2 text-sm text-gray-700">A list of all experiments in your account.</p>
|
7
|
+
</div>
|
8
|
+
<div class="mt-4 sm:mt-0 sm:ml-16 sm:flex-none">
|
9
|
+
<%= link_to 'New Experiment', new_experiment_path, class: 'btn btn-primary' %>
|
10
|
+
</div>
|
11
|
+
</div>
|
12
|
+
<div class="mt-8 flex flex-col">
|
13
|
+
<div class="-my-2 -mx-4 overflow-x-auto sm:-mx-6 lg:-mx-8">
|
14
|
+
<div class="inline-block min-w-full py-2 align-middle md:px-6 lg:px-8">
|
15
|
+
<div class="overflow-hidden shadow ring-1 ring-black ring-opacity-5 md:rounded-lg">
|
16
|
+
<table class="min-w-full divide-y divide-gray-300">
|
17
|
+
<thead class="bg-gray-50">
|
18
|
+
<tr>
|
19
|
+
<th scope="col" class="py-3.5 pl-4 pr-3 text-left text-sm font-semibold text-gray-900 sm:pl-6">Name</th>
|
20
|
+
<th scope="col" class="px-3 py-3.5 text-left text-sm font-semibold text-gray-900">Status</th>
|
21
|
+
<th scope="col" class="px-3 py-3.5 text-left text-sm font-semibold text-gray-900">Dataset</th>
|
22
|
+
<th scope="col" class="relative py-3.5 pl-3 pr-4 sm:pr-6">
|
23
|
+
<span class="sr-only">Actions</span>
|
24
|
+
</th>
|
25
|
+
</tr>
|
26
|
+
</thead>
|
27
|
+
<tbody class="divide-y divide-gray-200 bg-white">
|
28
|
+
<% @experiments.each do |experiment| %>
|
29
|
+
<tr>
|
30
|
+
<td class="whitespace-nowrap py-4 pl-4 pr-3 text-sm font-medium text-gray-900 sm:pl-6"><%= experiment.name %></td>
|
31
|
+
<td class="whitespace-nowrap px-3 py-4 text-sm text-gray-500"><%= experiment.status %></td>
|
32
|
+
<td class="whitespace-nowrap px-3 py-4 text-sm text-gray-500"><%= experiment.dataset.name %></td>
|
33
|
+
<td class="relative whitespace-nowrap py-4 pl-3 pr-4 text-right text-sm font-medium sm:pr-6">
|
34
|
+
<%= link_to 'View', experiment_path(experiment), class: 'text-indigo-600 hover:text-indigo-900' %>
|
35
|
+
</td>
|
36
|
+
</tr>
|
37
|
+
<% end %>
|
38
|
+
</tbody>
|
39
|
+
</table>
|
40
|
+
</div>
|
41
|
+
</div>
|
42
|
+
</div>
|
43
|
+
</div>
|
44
|
+
</div>
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<% content_for :title, @experiment.name %>
|
2
|
+
<div class="px-4 sm:px-6 lg:px-8">
|
3
|
+
<div class="sm:flex sm:items-center">
|
4
|
+
<div class="sm:flex-auto">
|
5
|
+
<h1 class="text-2xl font-semibold text-gray-900"><%= @experiment.name %></h1>
|
6
|
+
<p class="mt-2 text-sm text-gray-700">Status: <%= @experiment.status %></p>
|
7
|
+
</div>
|
8
|
+
</div>
|
9
|
+
<div class="mt-8 bg-white shadow overflow-hidden sm:rounded-lg">
|
10
|
+
<div class="px-4 py-5 sm:px-6">
|
11
|
+
<h3 class="text-lg leading-6 font-medium text-gray-900">Experiment Details</h3>
|
12
|
+
</div>
|
13
|
+
<div class="border-t border-gray-200 px-4 py-5 sm:p-0">
|
14
|
+
<dl class="sm:divide-y sm:divide-gray-200">
|
15
|
+
<div class="py-4 sm:py-5 sm:grid sm:grid-cols-3 sm:gap-4 sm:px-6">
|
16
|
+
<dt class="text-sm font-medium text-gray-500">Dataset</dt>
|
17
|
+
<dd class="mt-1 text-sm text-gray-900 sm:mt-0 sm:col-span-2"><%= @experiment.dataset.name %></dd>
|
18
|
+
</div>
|
19
|
+
<!-- Add more experiment details here -->
|
20
|
+
</dl>
|
21
|
+
</div>
|
22
|
+
</div>
|
23
|
+
<!-- Add experiment results or other relevant information here -->
|
24
|
+
</div>
|
@@ -0,0 +1,101 @@
|
|
1
|
+
<% content_for :title, 'Workbench' %>
|
2
|
+
<div class="flex h-[calc(100vh-4rem)] bg-gray-900 text-white">
|
3
|
+
<!-- Left Sidebar -->
|
4
|
+
<div class="w-64 h-full bg-gray-800 border-r border-gray-700 flex flex-col">
|
5
|
+
<div class="p-4">
|
6
|
+
<h2 class="text-xl font-bold mb-4">Prompts</h2>
|
7
|
+
<div class="space-y-2">
|
8
|
+
<% @prompts.each do |prompt| %>
|
9
|
+
<div class="bg-gray-700 p-2 rounded">
|
10
|
+
<span class="text-sm font-medium"><%= prompt.name %></span>
|
11
|
+
<span class="text-xs text-gray-400 ml-2">v<%= prompt.version %></span>
|
12
|
+
</div>
|
13
|
+
<% end %>
|
14
|
+
</div>
|
15
|
+
</div>
|
16
|
+
<div class="mt-auto p-4">
|
17
|
+
<%= link_to new_prompt_path, class: "w-full flex items-center justify-center space-x-2 bg-indigo-600 hover:bg-indigo-700 p-2 rounded" do %>
|
18
|
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
|
19
|
+
<path fill-rule="evenodd" d="M10 3a1 1 0 011 1v5h5a1 1 0 110 2h-5v5a1 1 0 11-2 0v-5H4a1 1 0 110-2h5V4a1 1 0 011-1z" clip-rule="evenodd" />
|
20
|
+
</svg>
|
21
|
+
<span>New Prompt</span>
|
22
|
+
<% end %>
|
23
|
+
</div>
|
24
|
+
</div>
|
25
|
+
<!-- Main Content -->
|
26
|
+
<div class="flex-1 flex flex-col">
|
27
|
+
<!-- Top Bar -->
|
28
|
+
<div class="bg-gray-800 p-4 flex items-center justify-between border-b border-gray-700">
|
29
|
+
<div>
|
30
|
+
<span class="font-medium"><%= @selected_prompt.name %></span>
|
31
|
+
<span class="text-xs text-gray-400 ml-2">v<%= @selected_prompt.version %></span>
|
32
|
+
</div>
|
33
|
+
<div class="flex items-center space-x-2">
|
34
|
+
<%= button_to run_workbench_index_path, method: :post, class: "flex items-center space-x-2 px-4 py-2 rounded bg-indigo-600 hover:bg-indigo-700" do %>
|
35
|
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
|
36
|
+
<path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zM9.555 7.168A1 1 0 008 8v4a1 1 0 001.555.832l3-2a1 1 0 000-1.664l-3-2z" clip-rule="evenodd" />
|
37
|
+
</svg>
|
38
|
+
<span>Run</span>
|
39
|
+
<% end %>
|
40
|
+
<%= button_to run_with_evaluation_workbench_index_path, method: :post, class: "flex items-center space-x-2 px-4 py-2 rounded bg-green-600 hover:bg-green-700" do %>
|
41
|
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" viewBox="0 0 20 20" fill="currentColor">
|
42
|
+
<path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zM9.555 7.168A1 1 0 008 8v4a1 1 0 001.555.832l3-2a1 1 0 000-1.664l-3-2z" clip-rule="evenodd" />
|
43
|
+
</svg>
|
44
|
+
<span>Run + Evaluate</span>
|
45
|
+
<% end %>
|
46
|
+
</div>
|
47
|
+
</div>
|
48
|
+
<!-- Scrollable Content -->
|
49
|
+
<div class="flex-1 flex overflow-hidden">
|
50
|
+
<div class="flex-1 overflow-y-auto p-6 space-y-4">
|
51
|
+
<!-- System Prompt -->
|
52
|
+
<div class="bg-gray-800 p-4 rounded">
|
53
|
+
<h2 class="text-sm font-semibold mb-2">SYSTEM PROMPT</h2>
|
54
|
+
<textarea
|
55
|
+
class="w-full bg-gray-700 text-white p-2 rounded resize-none"
|
56
|
+
style="height: auto; min-height: 100px;"
|
57
|
+
name="system_prompt"
|
58
|
+
><%= @selected_prompt.system_prompt %></textarea>
|
59
|
+
</div>
|
60
|
+
<!-- User Message -->
|
61
|
+
<div class="bg-gray-800 p-4 rounded">
|
62
|
+
<h2 class="text-sm font-semibold mb-2">USER</h2>
|
63
|
+
<textarea
|
64
|
+
class="w-full bg-gray-700 text-white p-2 rounded resize-none"
|
65
|
+
style="height: auto; min-height: 200px;"
|
66
|
+
name="user_prompt"
|
67
|
+
><%= @selected_prompt.user_prompt %></textarea>
|
68
|
+
</div>
|
69
|
+
</div>
|
70
|
+
<!-- Results Section -->
|
71
|
+
<div class="w-1/3 bg-gray-800 border-l border-gray-700 p-4 overflow-y-auto">
|
72
|
+
<h2 class="text-lg font-semibold mb-4">Results</h2>
|
73
|
+
<!-- Run Result -->
|
74
|
+
<div class="bg-gray-700 p-4 rounded mb-4">
|
75
|
+
<h3 class="text-sm font-semibold mb-2">Run Result</h3>
|
76
|
+
<p class="text-sm">
|
77
|
+
<%= flash[:notice] || "No results yet. Click 'Run' or 'Run + Evaluate' to start the analysis." %>
|
78
|
+
</p>
|
79
|
+
</div>
|
80
|
+
<!-- Evaluators -->
|
81
|
+
<div class="space-y-4">
|
82
|
+
<h3 class="text-sm font-semibold">Evaluators</h3>
|
83
|
+
<% @evaluators.each do |evaluator| %>
|
84
|
+
<div class="bg-gray-700 p-4 rounded">
|
85
|
+
<div class="flex items-center justify-between mb-2">
|
86
|
+
<span class="text-sm font-medium"><%= evaluator %></span>
|
87
|
+
<div class="flex items-center space-x-2">
|
88
|
+
<%= button_to run_evaluator_workbench_index_path, method: :post, params: { evaluator: evaluator }, class: "p-1 bg-blue-500 hover:bg-blue-600 rounded" do %>
|
89
|
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-3 w-3" viewBox="0 0 20 20" fill="currentColor">
|
90
|
+
<path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zM9.555 7.168A1 1 0 008 8v4a1 1 0 001.555.832l3-2a1 1 0 000-1.664l-3-2z" clip-rule="evenodd" />
|
91
|
+
</svg>
|
92
|
+
<% end %>
|
93
|
+
</div>
|
94
|
+
</div>
|
95
|
+
</div>
|
96
|
+
<% end %>
|
97
|
+
</div>
|
98
|
+
</div>
|
99
|
+
</div>
|
100
|
+
</div>
|
101
|
+
</div>
|
@@ -0,0 +1,38 @@
|
|
1
|
+
<% content_for :title, 'New Experiment' %>
|
2
|
+
<div class="px-4 sm:px-6 lg:px-8">
|
3
|
+
<div class="sm:flex sm:items-center">
|
4
|
+
<div class="sm:flex-auto">
|
5
|
+
<h1 class="text-2xl font-semibold text-gray-900">New Experiment</h1>
|
6
|
+
<p class="mt-2 text-sm text-gray-700">Create a new experiment in your workbench.</p>
|
7
|
+
</div>
|
8
|
+
</div>
|
9
|
+
<div class="mt-8 max-w-xl">
|
10
|
+
<%= form_with(model: @experiment, url: experiments_path, local: true, class: "space-y-8 divide-y divide-gray-200") do |form| %>
|
11
|
+
<div class="space-y-8 divide-y divide-gray-200">
|
12
|
+
<div>
|
13
|
+
<div>
|
14
|
+
<h3 class="text-lg leading-6 font-medium text-gray-900">Experiment Information</h3>
|
15
|
+
<p class="mt-1 text-sm text-gray-500">Provide details for your new experiment.</p>
|
16
|
+
</div>
|
17
|
+
<div class="mt-6 grid grid-cols-1 gap-y-6 gap-x-4 sm:grid-cols-6">
|
18
|
+
<div class="sm:col-span-4">
|
19
|
+
<%= form.label :name, class: "block text-sm font-medium text-gray-700" %>
|
20
|
+
<div class="mt-1">
|
21
|
+
<%= form.text_field :name, class: "shadow-sm focus:ring-indigo-500 focus:border-indigo-500 block w-full sm:text-sm border-gray-300 rounded-md" %>
|
22
|
+
</div>
|
23
|
+
</div>
|
24
|
+
<div class="sm:col-span-2">
|
25
|
+
<%= form.label :dataset_id, class: "block text-sm font-medium text-gray-700" %>
|
26
|
+
<div class="mt-1">
|
27
|
+
<%= form.select :dataset_id, options_for_select(@datasets.map { |dataset| [dataset.name, dataset.id] }), class: "shadow-sm focus:ring-indigo-500 focus:border-indigo-500 block w-full sm:text-sm border-gray-300 rounded-md" %>
|
28
|
+
</div>
|
29
|
+
</div>
|
30
|
+
</div>
|
31
|
+
</div>
|
32
|
+
</div>
|
33
|
+
<div class="pt-5">
|
34
|
+
<%= form.submit class: "btn btn-primary btn-block" %>
|
35
|
+
</div>
|
36
|
+
<% end %>
|
37
|
+
</div>
|
38
|
+
</div>
|
data/config/routes.rb
CHANGED
@@ -1,2 +1,12 @@
|
|
1
1
|
Leva::Engine.routes.draw do
|
2
|
-
|
2
|
+
root 'workbench#index'
|
3
|
+
|
4
|
+
resources :datasets
|
5
|
+
resources :experiments
|
6
|
+
resources :prompts
|
7
|
+
resources :workbench, only: [:index, :new, :show] do
|
8
|
+
post 'run', on: :collection
|
9
|
+
post 'run_with_evaluation', on: :collection
|
10
|
+
post 'run_evaluator', on: :collection
|
11
|
+
end
|
12
|
+
end
|
@@ -3,7 +3,7 @@ class CreateLevaExperiments < ActiveRecord::Migration[7.2]
|
|
3
3
|
create_table :leva_experiments do |t|
|
4
4
|
t.string :name
|
5
5
|
t.references :dataset, null: false, foreign_key: true
|
6
|
-
t.references :prompt, null:
|
6
|
+
t.references :prompt, null: true, foreign_key: true
|
7
7
|
t.integer :status
|
8
8
|
t.text :metadata
|
9
9
|
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Leva
|
4
|
+
module Generators
|
5
|
+
class EvalGenerator < Rails::Generators::NamedBase
|
6
|
+
source_root File.expand_path('templates', __dir__)
|
7
|
+
|
8
|
+
def create_eval_file
|
9
|
+
template 'eval.rb.erb', File.join('app/evals', class_path, "#{file_name}_eval.rb")
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def file_name
|
15
|
+
@_file_name ||= remove_possible_suffix(super)
|
16
|
+
end
|
17
|
+
|
18
|
+
def remove_possible_suffix(name)
|
19
|
+
name.sub(/_?eval$/i, '')
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Leva
|
4
|
+
module Generators
|
5
|
+
class RunnerGenerator < Rails::Generators::NamedBase
|
6
|
+
source_root File.expand_path('templates', __dir__)
|
7
|
+
|
8
|
+
def create_runner_file
|
9
|
+
template 'runner.rb.erb', File.join('app/runners', class_path, "#{file_name}_run.rb")
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def file_name
|
15
|
+
@_file_name ||= remove_possible_suffix(super)
|
16
|
+
end
|
17
|
+
|
18
|
+
def remove_possible_suffix(name)
|
19
|
+
name.sub(/_?runner$/i, '')
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class <%= class_name %>Eval < Leva::BaseEval
|
4
|
+
# @param prediction [String] The prediction to evaluate
|
5
|
+
# @param record [YourRecordClass] The record to evaluate
|
6
|
+
# @return [Leva::Result] The result of the evaluation
|
7
|
+
def evaluate(prediction, record)
|
8
|
+
# Implement your evaluation logic here
|
9
|
+
|
10
|
+
Leva::Result.new(
|
11
|
+
label: "<%= file_name.underscore %>",
|
12
|
+
score: score
|
13
|
+
)
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class <%= class_name %>Run < Leva::BaseRun
|
4
|
+
# @param record [YourRecordClass] The record to run
|
5
|
+
# @return [String] The result of the run
|
6
|
+
def execute(record)
|
7
|
+
# Your model execution logic here
|
8
|
+
# This could involve calling an API, running a local model, etc.
|
9
|
+
# Return the result of the run to be used to evaluate the model
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Leva
|
4
|
+
class BaseEval
|
5
|
+
class << self
|
6
|
+
attr_reader :dataset_record_class_name
|
7
|
+
|
8
|
+
# Set the dataset record class for the eval
|
9
|
+
# @param class_name [String] The name of the dataset record class
|
10
|
+
def leva_dataset_record_class(class_name)
|
11
|
+
@dataset_record_class_name = class_name
|
12
|
+
end
|
13
|
+
|
14
|
+
# Run the experiment
|
15
|
+
# @param experiment [Leva::Experiment] The experiment to run
|
16
|
+
def run_experiment(experiment)
|
17
|
+
new.run_experiment(experiment)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Run the experiment
|
22
|
+
# @param experiment [Leva::Experiment] The experiment to run
|
23
|
+
def run_experiment(experiment)
|
24
|
+
@experiment = experiment
|
25
|
+
@experiment.update(status: :running)
|
26
|
+
|
27
|
+
@experiment.dataset.records.each do |record|
|
28
|
+
@record = record
|
29
|
+
unless @record.class_name == self.class.dataset_record_class_name
|
30
|
+
raise ArgumentError, "Record class #{@record.class_name} does not match expected class #{self.class.dataset_record_class_name}"
|
31
|
+
end
|
32
|
+
ExperimentJob.perform_later(self, @record)
|
33
|
+
end
|
34
|
+
|
35
|
+
@experiment.update(status: :completed)
|
36
|
+
rescue StandardError => e
|
37
|
+
@experiment.update(status: :failed)
|
38
|
+
Rails.logger.error "Error in experiment #{@experiment.name}: #{e.message}"
|
39
|
+
end
|
40
|
+
|
41
|
+
# Run the evaluation for a single record
|
42
|
+
# @param record [ActiveRecord::Base] The record to evaluate
|
43
|
+
# @return [Leva::Result] The result of the evaluation
|
44
|
+
def run_each(record)
|
45
|
+
raise NotImplementedError, "Subclasses must implement the 'run_each' method"
|
46
|
+
end
|
47
|
+
|
48
|
+
# Save the result of an evaluation
|
49
|
+
# @param result [Leva::Result] The result of the evaluation
|
50
|
+
def save_result(result)
|
51
|
+
Leva::EvaluationResult.create!(
|
52
|
+
experiment: @experiment,
|
53
|
+
dataset_record: Leva::DatasetRecord.find_by(recordable: @record, dataset: @experiment.dataset),
|
54
|
+
prediction: result.prediction,
|
55
|
+
score: result.score,
|
56
|
+
label: result.label
|
57
|
+
)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Represents the result of an evaluation
|
62
|
+
class Result
|
63
|
+
attr_reader :label, :prediction, :score
|
64
|
+
|
65
|
+
# Initialize a new Result
|
66
|
+
# @param label [String] The label for the result
|
67
|
+
# @param prediction [String] The prediction made by the evaluation
|
68
|
+
# @param score [Float] The score of the evaluation (0.0 to 1.0)
|
69
|
+
def initialize(label:, prediction:, score:)
|
70
|
+
@label = label
|
71
|
+
@prediction = prediction
|
72
|
+
@score = score
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/lib/leva/version.rb
CHANGED
data/lib/leva.rb
CHANGED
@@ -2,5 +2,93 @@ require "leva/version"
|
|
2
2
|
require "leva/engine"
|
3
3
|
|
4
4
|
module Leva
|
5
|
-
#
|
6
|
-
|
5
|
+
# Runs an evaluation experiment with the given run and evals.
|
6
|
+
#
|
7
|
+
# @param experiment [Leva::Experiment] The experiment to run.
|
8
|
+
# @param run [Leva::BaseRun] The run implementation to use.
|
9
|
+
# @param evals [Array<Leva::BaseEval>] The evaluation implementations to use.
|
10
|
+
# @return [void]
|
11
|
+
def self.run_evaluation(experiment:, run:, evals:)
|
12
|
+
results = run.run(experiment)
|
13
|
+
evals.each do |eval|
|
14
|
+
eval.evaluate_all(experiment, results)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Base class for all run implementations in Leva.
|
19
|
+
#
|
20
|
+
# @abstract Subclass and override {#execute} to implement
|
21
|
+
# custom run logic.
|
22
|
+
class BaseRun
|
23
|
+
# Executes the run on a given record.
|
24
|
+
#
|
25
|
+
# @param record [Leva::DatasetRecord] The record to run the model on.
|
26
|
+
# @return [Object] The output of the model execution.
|
27
|
+
# @raise [NotImplementedError] if the method is not implemented in a subclass.
|
28
|
+
def execute(record)
|
29
|
+
raise NotImplementedError, "#{self.class} must implement #execute"
|
30
|
+
end
|
31
|
+
|
32
|
+
# Runs the model on all records in an experiment.
|
33
|
+
#
|
34
|
+
# @param experiment [Leva::Experiment] The experiment to run.
|
35
|
+
# @return [Hash] A hash mapping dataset_record_ids to their execution results.
|
36
|
+
def run(experiment)
|
37
|
+
results = {}
|
38
|
+
experiment.dataset.dataset_records.find_each do |dataset_record|
|
39
|
+
result = execute(dataset_record.recordable)
|
40
|
+
results[dataset_record.id] = result
|
41
|
+
end
|
42
|
+
results
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Base class for all evaluation implementations in Leva.
|
47
|
+
#
|
48
|
+
# @abstract Subclass and override {#evaluate} to implement
|
49
|
+
# custom evaluation logic.
|
50
|
+
class BaseEval
|
51
|
+
# Evaluates the model's prediction against the expected result.
|
52
|
+
#
|
53
|
+
# @param prediction [Object] The model's prediction.
|
54
|
+
# @param record [Object] The expected result.
|
55
|
+
# @return [Leva::Result] The evaluation result.
|
56
|
+
# @raise [NotImplementedError] if the method is not implemented in a subclass.
|
57
|
+
def evaluate(prediction, record)
|
58
|
+
raise NotImplementedError, "#{self.class} must implement #evaluate"
|
59
|
+
end
|
60
|
+
|
61
|
+
# Evaluates all results for an experiment.
|
62
|
+
#
|
63
|
+
# @param experiment [Leva::Experiment] The experiment to evaluate.
|
64
|
+
# @param results [Hash] A hash mapping dataset_record_ids to their execution results.
|
65
|
+
# @return [void]
|
66
|
+
def evaluate_all(experiment, results)
|
67
|
+
experiment.dataset.dataset_records.find_each do |dataset_record|
|
68
|
+
prediction = results[dataset_record.id]
|
69
|
+
evaluation = evaluate(prediction, dataset_record.recordable)
|
70
|
+
|
71
|
+
Leva::EvaluationResult.create!(
|
72
|
+
experiment: experiment,
|
73
|
+
dataset_record: dataset_record,
|
74
|
+
prediction: prediction,
|
75
|
+
score: evaluation.score,
|
76
|
+
label: evaluation.label
|
77
|
+
)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Represents the result of an evaluation
|
83
|
+
class Result
|
84
|
+
attr_reader :label, :prediction, :score
|
85
|
+
|
86
|
+
# Initialize a new Result
|
87
|
+
# @param label [String] The label for the result
|
88
|
+
# @param score [Float] The score of the evaluation (0.0 to 1.0)
|
89
|
+
def initialize(label:, score:)
|
90
|
+
@label = label
|
91
|
+
@score = score
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: leva
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kieran Klaassen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-08-
|
11
|
+
date: 2024-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -39,8 +39,13 @@ files:
|
|
39
39
|
- app/assets/config/leva_manifest.js
|
40
40
|
- app/assets/stylesheets/leva/application.css
|
41
41
|
- app/controllers/leva/application_controller.rb
|
42
|
+
- app/controllers/leva/datasets_controller.rb
|
43
|
+
- app/controllers/leva/experiments_controller.rb
|
44
|
+
- app/controllers/leva/workbench_controller.rb
|
45
|
+
- app/evals/test_sentiment_accuracy_eval.rb
|
42
46
|
- app/helpers/leva/application_helper.rb
|
43
47
|
- app/jobs/leva/application_job.rb
|
48
|
+
- app/jobs/leva/experiment_job.rb
|
44
49
|
- app/mailers/leva/application_mailer.rb
|
45
50
|
- app/models/leva/application_record.rb
|
46
51
|
- app/models/leva/dataset.rb
|
@@ -48,14 +53,26 @@ files:
|
|
48
53
|
- app/models/leva/evaluation_result.rb
|
49
54
|
- app/models/leva/experiment.rb
|
50
55
|
- app/models/leva/prompt.rb
|
56
|
+
- app/runners/test_sentiment_run.rb
|
51
57
|
- app/views/layouts/leva/application.html.erb
|
58
|
+
- app/views/leva/datasets/index.html.erb
|
59
|
+
- app/views/leva/datasets/show.html.erb
|
60
|
+
- app/views/leva/experiments/index.html.erb
|
61
|
+
- app/views/leva/experiments/show.html.erb
|
62
|
+
- app/views/leva/workbench/index.html.erb
|
63
|
+
- app/views/leva/workbench/new.html.erb
|
52
64
|
- config/routes.rb
|
53
65
|
- db/migrate/20240813172916_create_leva_datasets.rb
|
54
66
|
- db/migrate/20240813173033_create_leva_dataset_records.rb
|
55
67
|
- db/migrate/20240813173050_create_leva_evaluation_results.rb
|
56
68
|
- db/migrate/20240813173105_create_leva_prompts.rb
|
57
69
|
- db/migrate/20240813173222_create_leva_experiments.rb
|
70
|
+
- lib/generators/leva/eval_generator.rb
|
71
|
+
- lib/generators/leva/runner_generator.rb
|
72
|
+
- lib/generators/leva/templates/eval.rb.erb
|
73
|
+
- lib/generators/leva/templates/runner.rb.erb
|
58
74
|
- lib/leva.rb
|
75
|
+
- lib/leva/base_eval.rb
|
59
76
|
- lib/leva/engine.rb
|
60
77
|
- lib/leva/version.rb
|
61
78
|
- lib/tasks/auto_annotate_models.rake
|