leva 0.1.9.1 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -5
- data/app/assets/stylesheets/leva/application.css +3083 -15
- data/app/controllers/leva/application_controller.rb +1 -1
- data/app/controllers/leva/dataset_records_controller.rb +1 -1
- data/app/controllers/leva/datasets_controller.rb +6 -6
- data/app/controllers/leva/design_system_controller.rb +9 -0
- data/app/controllers/leva/experiments_controller.rb +8 -8
- data/app/controllers/leva/runner_results_controller.rb +1 -1
- data/app/controllers/leva/workbench_controller.rb +26 -15
- data/app/helpers/leva/application_helper.rb +7 -7
- data/app/jobs/leva/experiment_job.rb +1 -1
- data/app/jobs/leva/run_eval_job.rb +1 -1
- data/app/models/concerns/leva/recordable.rb +5 -5
- data/app/models/leva/dataset.rb +1 -1
- data/app/models/leva/evaluation_result.rb +1 -1
- data/app/models/leva/experiment.rb +1 -1
- data/app/models/leva/prompt.rb +1 -1
- data/app/views/layouts/leva/application.html.erb +23 -24
- data/app/views/leva/dataset_records/index.html.erb +70 -43
- data/app/views/leva/dataset_records/show.html.erb +115 -25
- data/app/views/leva/datasets/_dataset.html.erb +11 -18
- data/app/views/leva/datasets/_form.html.erb +18 -14
- data/app/views/leva/datasets/edit.html.erb +16 -4
- data/app/views/leva/datasets/index.html.erb +33 -41
- data/app/views/leva/datasets/new.html.erb +15 -4
- data/app/views/leva/datasets/show.html.erb +120 -139
- data/app/views/leva/design_system/index.html.erb +1731 -0
- data/app/views/leva/experiments/_experiment.html.erb +46 -31
- data/app/views/leva/experiments/_form.html.erb +62 -35
- data/app/views/leva/experiments/edit.html.erb +17 -3
- data/app/views/leva/experiments/index.html.erb +41 -36
- data/app/views/leva/experiments/new.html.erb +52 -4
- data/app/views/leva/experiments/show.html.erb +155 -98
- data/app/views/leva/runner_results/show.html.erb +271 -54
- data/app/views/leva/workbench/_evaluation_area.html.erb +18 -4
- data/app/views/leva/workbench/_prompt_content.html.erb +124 -73
- data/app/views/leva/workbench/_prompt_form.html.erb +24 -23
- data/app/views/leva/workbench/_prompt_sidebar.html.erb +57 -12
- data/app/views/leva/workbench/_results_section.html.erb +274 -112
- data/app/views/leva/workbench/_top_bar.html.erb +16 -6
- data/app/views/leva/workbench/edit.html.erb +46 -15
- data/app/views/leva/workbench/index.html.erb +5 -8
- data/app/views/leva/workbench/new.html.erb +74 -42
- data/config/routes.rb +11 -9
- data/db/migrate/20240813173033_create_leva_dataset_records.rb +1 -0
- data/db/migrate/20240813173035_create_leva_experiments.rb +2 -0
- data/db/migrate/{20240816201419_create_leva_runner_results.rb → 20240813173040_create_leva_runner_results.rb} +4 -1
- data/db/migrate/20240813173050_create_leva_evaluation_results.rb +3 -3
- data/lib/generators/leva/eval_generator.rb +4 -4
- data/lib/generators/leva/runner_generator.rb +4 -4
- data/lib/generators/leva/templates/runner.rb.erb +20 -0
- data/lib/leva/version.rb +1 -1
- data/lib/leva.rb +24 -2
- metadata +5 -11
- data/db/migrate/20240816201433_update_leva_evaluation_results.rb +0 -8
- data/db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb +0 -6
- data/db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb +0 -5
- data/db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb +0 -6
- data/db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb +0 -5
- data/db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb +0 -5
- data/db/migrate/20240912183556_add_runner_class_to_leva_runner_results.rb +0 -5
- data/lib/tasks/auto_annotate_models.rake +0 -59
|
@@ -1,10 +1,23 @@
|
|
|
1
1
|
<% content_for :title, "New Prompt" %>
|
|
2
|
-
<div class="container
|
|
3
|
-
<
|
|
4
|
-
|
|
2
|
+
<div class="container page">
|
|
3
|
+
<div class="page-header">
|
|
4
|
+
<div class="flex items-center gap-3">
|
|
5
|
+
<%= link_to workbench_index_path, class: "btn btn-ghost btn-sm" do %>
|
|
6
|
+
<svg class="icon-sm" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
|
7
|
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 19l-7-7m0 0l7-7m-7 7h18" />
|
|
8
|
+
</svg>
|
|
9
|
+
<% end %>
|
|
10
|
+
<div>
|
|
11
|
+
<h1 class="page-title" style="margin-bottom: 0;">New Prompt</h1>
|
|
12
|
+
<p class="text-sm text-muted" style="margin: 0;">Create a new prompt template</p>
|
|
13
|
+
</div>
|
|
14
|
+
</div>
|
|
15
|
+
</div>
|
|
16
|
+
|
|
17
|
+
<%= form_with(model: @prompt, url: workbench_index_path, local: true, class: "card", data: { controller: "prompt-selector" }) do |form| %>
|
|
5
18
|
<% if @prompt.errors.any? %>
|
|
6
|
-
<div class="
|
|
7
|
-
<
|
|
19
|
+
<div class="form-errors">
|
|
20
|
+
<p class="form-errors-title"><%= pluralize(@prompt.errors.count, "error") %> prohibited this prompt from being saved:</p>
|
|
8
21
|
<ul>
|
|
9
22
|
<% @prompt.errors.full_messages.each do |message| %>
|
|
10
23
|
<li><%= message %></li>
|
|
@@ -12,58 +25,85 @@
|
|
|
12
25
|
</ul>
|
|
13
26
|
</div>
|
|
14
27
|
<% end %>
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
28
|
+
|
|
29
|
+
<div class="form-group">
|
|
30
|
+
<div class="flex items-center gap-2 mb-2">
|
|
31
|
+
<svg class="icon-sm text-muted" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
|
32
|
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 7h.01M7 3h5c.512 0 1.024.195 1.414.586l7 7a2 2 0 010 2.828l-7 7a2 2 0 01-2.828 0l-7-7A1.994 1.994 0 013 12V7a4 4 0 014-4z" />
|
|
33
|
+
</svg>
|
|
34
|
+
<%= form.label :name, class: "form-label", style: "margin-bottom: 0;" %>
|
|
35
|
+
</div>
|
|
36
|
+
<%= form.text_field :name, autofocus: true, class: "form-input", placeholder: "Enter a descriptive name..." %>
|
|
18
37
|
</div>
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
38
|
+
|
|
39
|
+
<div class="form-group">
|
|
40
|
+
<div class="flex items-center gap-2 mb-2">
|
|
41
|
+
<svg class="icon-sm text-muted" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
|
42
|
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z" />
|
|
43
|
+
</svg>
|
|
44
|
+
<%= form.label :system_prompt, "System Prompt", class: "form-label", style: "margin-bottom: 0;" %>
|
|
45
|
+
</div>
|
|
46
|
+
<%= form.text_area :system_prompt, rows: 2, class: "form-textarea prompt-textarea", placeholder: "Define the AI's role and behavior..." %>
|
|
22
47
|
</div>
|
|
23
|
-
|
|
24
|
-
|
|
48
|
+
|
|
49
|
+
<div class="form-group">
|
|
50
|
+
<div class="flex items-center gap-2 mb-2">
|
|
51
|
+
<svg class="icon-sm text-muted" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
|
52
|
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 5a1 1 0 011-1h14a1 1 0 011 1v2a1 1 0 01-1 1H5a1 1 0 01-1-1V5zM4 13a1 1 0 011-1h6a1 1 0 011 1v6a1 1 0 01-1 1H5a1 1 0 01-1-1v-6zM16 13a1 1 0 011-1h2a1 1 0 011 1v6a1 1 0 01-1 1h-2a1 1 0 01-1-1v-6z" />
|
|
53
|
+
</svg>
|
|
54
|
+
<%= form.label :predefined_prompt, "Template", class: "form-label", style: "margin-bottom: 0;" %>
|
|
55
|
+
</div>
|
|
25
56
|
<%= form.select :predefined_prompt,
|
|
26
57
|
options_for_select([['Custom Prompt', '']] + @predefined_prompts.map { |name, content| [name, content] }),
|
|
27
58
|
{},
|
|
28
|
-
class: "
|
|
59
|
+
class: "form-select",
|
|
29
60
|
data: { action: "change->prompt-selector#toggleUserPrompt" }
|
|
30
61
|
%>
|
|
31
62
|
</div>
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
63
|
+
|
|
64
|
+
<div class="form-group" data-prompt-selector-target="userPromptField">
|
|
65
|
+
<div class="flex items-center gap-2 mb-2">
|
|
66
|
+
<svg class="icon-sm text-muted" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
|
67
|
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M16 7a4 4 0 11-8 0 4 4 0 018 0zM12 14a7 7 0 00-7 7h14a7 7 0 00-7-7z" />
|
|
68
|
+
</svg>
|
|
69
|
+
<%= form.label :user_prompt, "User Prompt Template", class: "form-label", style: "margin-bottom: 0;" %>
|
|
70
|
+
</div>
|
|
71
|
+
<%= form.text_area :user_prompt, rows: 5, class: "form-textarea prompt-textarea", placeholder: "Use {{ variable }} syntax for dynamic content..." %>
|
|
35
72
|
</div>
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
<
|
|
39
|
-
|
|
73
|
+
|
|
74
|
+
<div class="form-group hidden" data-prompt-selector-target="promptPreview" id="prompt-preview">
|
|
75
|
+
<label class="form-label">Prompt Preview</label>
|
|
76
|
+
<div
|
|
77
|
+
class="card-subtle p-4"
|
|
78
|
+
style="background: var(--gray-800); white-space: pre-wrap;"
|
|
40
79
|
data-prompt-selector-target="previewContent"
|
|
41
80
|
id="preview-content"
|
|
42
81
|
></div>
|
|
43
82
|
|
|
44
|
-
<!-- show-full button, hidden until overflow is detected -->
|
|
45
83
|
<button
|
|
46
84
|
id="show-full-preview"
|
|
47
85
|
type="button"
|
|
48
|
-
class="
|
|
86
|
+
class="btn btn-ghost btn-sm mt-2 hidden"
|
|
49
87
|
>Show full</button>
|
|
50
88
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
</
|
|
89
|
+
<dialog id="full-preview-dialog">
|
|
90
|
+
<div class="dialog-header">
|
|
91
|
+
<h3 class="dialog-title">Full Preview</h3>
|
|
92
|
+
</div>
|
|
93
|
+
<div class="dialog-body" id="dialog-content" style="white-space: pre-wrap;"></div>
|
|
94
|
+
<div class="dialog-footer">
|
|
95
|
+
<button id="close-full-preview" class="btn btn-primary">Close</button>
|
|
58
96
|
</div>
|
|
59
97
|
</dialog>
|
|
60
98
|
</div>
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
<%=
|
|
99
|
+
|
|
100
|
+
<div class="form-actions justify-end">
|
|
101
|
+
<%= link_to "Cancel", workbench_index_path, class: "btn btn-ghost" %>
|
|
102
|
+
<%= form.submit "Create Prompt", class: "btn btn-primary" %>
|
|
64
103
|
</div>
|
|
65
104
|
<% end %>
|
|
66
105
|
</div>
|
|
106
|
+
|
|
67
107
|
<script>
|
|
68
108
|
(() => {
|
|
69
109
|
const application = Stimulus.Application.start()
|
|
@@ -98,7 +138,6 @@
|
|
|
98
138
|
})
|
|
99
139
|
})()
|
|
100
140
|
|
|
101
|
-
// Prompt preview scrollbar and dialog functionality
|
|
102
141
|
document.addEventListener('DOMContentLoaded', () => {
|
|
103
142
|
const wrapper = document.getElementById('prompt-preview');
|
|
104
143
|
const preview = document.getElementById('preview-content');
|
|
@@ -109,28 +148,24 @@
|
|
|
109
148
|
|
|
110
149
|
if (!preview) return;
|
|
111
150
|
|
|
112
|
-
// Check if the preview content is already populated by Stimulus
|
|
113
151
|
const checkPreviewContent = () => {
|
|
114
152
|
if (preview.textContent.trim().length > 0) {
|
|
115
|
-
// Detect overflow
|
|
116
153
|
if (preview.scrollHeight > preview.clientHeight || preview.scrollWidth > preview.clientWidth) {
|
|
117
154
|
preview.style.maxHeight = '12em';
|
|
118
155
|
preview.style.overflow = 'auto';
|
|
119
156
|
showBtn.classList.remove('hidden');
|
|
120
157
|
}
|
|
121
158
|
} else {
|
|
122
|
-
// If not populated yet, check again after a short delay
|
|
123
159
|
setTimeout(checkPreviewContent, 100);
|
|
124
160
|
}
|
|
125
161
|
};
|
|
126
162
|
|
|
127
|
-
// Start checking once the wrapper is visible
|
|
128
163
|
const observer = new MutationObserver((mutations) => {
|
|
129
164
|
mutations.forEach((mutation) => {
|
|
130
165
|
if (mutation.type === 'attributes' && mutation.attributeName === 'class') {
|
|
131
166
|
if (!wrapper.classList.contains('hidden')) {
|
|
132
167
|
checkPreviewContent();
|
|
133
|
-
observer.disconnect();
|
|
168
|
+
observer.disconnect();
|
|
134
169
|
}
|
|
135
170
|
}
|
|
136
171
|
});
|
|
@@ -138,15 +173,12 @@
|
|
|
138
173
|
|
|
139
174
|
observer.observe(wrapper, { attributes: true });
|
|
140
175
|
|
|
141
|
-
// Show full in dialog
|
|
142
176
|
showBtn.addEventListener('click', () => {
|
|
143
177
|
dialogBody.textContent = preview.textContent;
|
|
144
178
|
dialog.showModal();
|
|
145
179
|
});
|
|
146
180
|
|
|
147
|
-
// Close dialog
|
|
148
181
|
closeBtn.addEventListener('click', () => dialog.close());
|
|
149
182
|
});
|
|
150
183
|
</script>
|
|
151
|
-
<!-- Include marked.js for Markdown parsing -->
|
|
152
184
|
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
data/config/routes.rb
CHANGED
|
@@ -1,21 +1,23 @@
|
|
|
1
1
|
Leva::Engine.routes.draw do
|
|
2
|
-
root
|
|
2
|
+
root "workbench#index"
|
|
3
|
+
|
|
4
|
+
get "design_system", to: "design_system#index"
|
|
3
5
|
|
|
4
6
|
resources :datasets do
|
|
5
|
-
resources :dataset_records, path:
|
|
7
|
+
resources :dataset_records, path: "records", only: [ :index, :show ]
|
|
6
8
|
end
|
|
7
|
-
resources :experiments, except: [:destroy] do
|
|
9
|
+
resources :experiments, except: [ :destroy ] do
|
|
8
10
|
member do
|
|
9
11
|
post :rerun
|
|
10
12
|
end
|
|
11
|
-
resources :runner_results, only: [:show]
|
|
13
|
+
resources :runner_results, only: [ :show ]
|
|
12
14
|
end
|
|
13
15
|
resources :prompts
|
|
14
|
-
resources :workbench, only: [:index, :new, :create, :edit, :update] do
|
|
16
|
+
resources :workbench, only: [ :index, :new, :create, :edit, :update ] do
|
|
15
17
|
collection do
|
|
16
|
-
post
|
|
17
|
-
post
|
|
18
|
-
post
|
|
18
|
+
post "run"
|
|
19
|
+
post "run_all_evals"
|
|
20
|
+
post "run_evaluator"
|
|
19
21
|
end
|
|
20
22
|
end
|
|
21
|
-
end
|
|
23
|
+
end
|
|
@@ -3,6 +3,7 @@ class CreateLevaDatasetRecords < ActiveRecord::Migration[7.2]
|
|
|
3
3
|
create_table :leva_dataset_records do |t|
|
|
4
4
|
t.references :dataset, null: false, foreign_key: { to_table: :leva_datasets }
|
|
5
5
|
t.references :recordable, polymorphic: true, null: false
|
|
6
|
+
t.text :actual_result
|
|
6
7
|
|
|
7
8
|
t.timestamps
|
|
8
9
|
end
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
class CreateLevaRunnerResults < ActiveRecord::Migration[7.2]
|
|
2
2
|
def change
|
|
3
3
|
create_table :leva_runner_results do |t|
|
|
4
|
-
t.references :experiment, null:
|
|
4
|
+
t.references :experiment, null: true, foreign_key: { to_table: :leva_experiments }
|
|
5
5
|
t.references :dataset_record, null: false, foreign_key: { to_table: :leva_dataset_records }
|
|
6
|
+
t.references :prompt, null: false, foreign_key: { to_table: :leva_prompts }
|
|
7
|
+
t.integer :prompt_version
|
|
6
8
|
t.text :prediction
|
|
9
|
+
t.string :runner_class
|
|
7
10
|
|
|
8
11
|
t.timestamps
|
|
9
12
|
end
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
class CreateLevaEvaluationResults < ActiveRecord::Migration[7.2]
|
|
2
2
|
def change
|
|
3
3
|
create_table :leva_evaluation_results do |t|
|
|
4
|
-
t.references :experiment, null:
|
|
4
|
+
t.references :experiment, null: true, foreign_key: { to_table: :leva_experiments }
|
|
5
5
|
t.references :dataset_record, null: false, foreign_key: { to_table: :leva_dataset_records }
|
|
6
|
-
t.
|
|
6
|
+
t.references :runner_result, null: false, foreign_key: { to_table: :leva_runner_results }
|
|
7
|
+
t.string :evaluator_class, null: false
|
|
7
8
|
t.float :score
|
|
8
|
-
t.string :label
|
|
9
9
|
|
|
10
10
|
t.timestamps
|
|
11
11
|
end
|
|
@@ -3,10 +3,10 @@
|
|
|
3
3
|
module Leva
|
|
4
4
|
module Generators
|
|
5
5
|
class EvalGenerator < Rails::Generators::NamedBase
|
|
6
|
-
source_root File.expand_path(
|
|
6
|
+
source_root File.expand_path("templates", __dir__)
|
|
7
7
|
|
|
8
8
|
def create_eval_file
|
|
9
|
-
template
|
|
9
|
+
template "eval.rb.erb", File.join("app/evals", class_path, "#{file_name}_eval.rb")
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
private
|
|
@@ -16,8 +16,8 @@ module Leva
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def remove_possible_suffix(name)
|
|
19
|
-
name.sub(/_?eval$/i,
|
|
19
|
+
name.sub(/_?eval$/i, "")
|
|
20
20
|
end
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
|
-
end
|
|
23
|
+
end
|
|
@@ -3,10 +3,10 @@
|
|
|
3
3
|
module Leva
|
|
4
4
|
module Generators
|
|
5
5
|
class RunnerGenerator < Rails::Generators::NamedBase
|
|
6
|
-
source_root File.expand_path(
|
|
6
|
+
source_root File.expand_path("templates", __dir__)
|
|
7
7
|
|
|
8
8
|
def create_runner_file
|
|
9
|
-
template
|
|
9
|
+
template "runner.rb.erb", File.join("app/runners", class_path, "#{file_name}_run.rb")
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
private
|
|
@@ -16,8 +16,8 @@ module Leva
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def remove_possible_suffix(name)
|
|
19
|
-
name.sub(/_?runner$/i,
|
|
19
|
+
name.sub(/_?runner$/i, "")
|
|
20
20
|
end
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
|
-
end
|
|
23
|
+
end
|
|
@@ -33,4 +33,24 @@ class <%= class_name %>Run < Leva::BaseRun
|
|
|
33
33
|
# # For example:
|
|
34
34
|
# # /\<result\>(.*?)\<\/result\>/
|
|
35
35
|
# end
|
|
36
|
+
|
|
37
|
+
# Override this method to provide additional context for LLM prompts
|
|
38
|
+
# This is useful for expensive operations or runner-specific data
|
|
39
|
+
# that you don't want to compute in the record's to_llm_context
|
|
40
|
+
#
|
|
41
|
+
# @param record [YourRecordClass] The record to generate context for
|
|
42
|
+
# @return [Hash] Additional context to merge with the record's context
|
|
43
|
+
# def to_llm_context(record)
|
|
44
|
+
# # Example: Add runner-specific expensive computations
|
|
45
|
+
# {
|
|
46
|
+
# # Count of similar records (expensive database query)
|
|
47
|
+
# similar_<%= singular_name %>_count: record.class.where(
|
|
48
|
+
# "some_field LIKE ?", "%#{record.some_field.split.first}%"
|
|
49
|
+
# ).count,
|
|
50
|
+
#
|
|
51
|
+
# # Runner-specific configuration
|
|
52
|
+
# model_version: "v1.2.3",
|
|
53
|
+
# temperature: 0.7
|
|
54
|
+
# }
|
|
55
|
+
# end
|
|
36
56
|
end
|
data/lib/leva/version.rb
CHANGED
data/lib/leva.rb
CHANGED
|
@@ -65,6 +65,7 @@ module Leva
|
|
|
65
65
|
# Expose these to the subclass execution
|
|
66
66
|
@experiment = experiment
|
|
67
67
|
@prompt = prompt
|
|
68
|
+
@dataset_record = dataset_record
|
|
68
69
|
|
|
69
70
|
result = execute(dataset_record.recordable)
|
|
70
71
|
RunnerResult.create!(
|
|
@@ -76,13 +77,25 @@ module Leva
|
|
|
76
77
|
)
|
|
77
78
|
end
|
|
78
79
|
|
|
80
|
+
# Gets the merged LLM context for the current execution.
|
|
81
|
+
# Combines the record's context with the runner's additional context.
|
|
82
|
+
#
|
|
83
|
+
# @return [Hash] The merged context for LLM prompt rendering
|
|
84
|
+
def merged_llm_context
|
|
85
|
+
return {} unless @dataset_record
|
|
86
|
+
|
|
87
|
+
record_context = @dataset_record.recordable.to_llm_context
|
|
88
|
+
runner_context = to_llm_context(@dataset_record.recordable)
|
|
89
|
+
record_context.merge(runner_context)
|
|
90
|
+
end
|
|
91
|
+
|
|
79
92
|
# @param runner_result [Leva::RunnerResult] The runner result to parse
|
|
80
93
|
# @return [Array<String>] The parsed predictions
|
|
81
94
|
def parsed_predictions(runner_result)
|
|
82
95
|
if extract_regex_pattern(runner_result)
|
|
83
96
|
runner_result.prediction.scan(extract_regex_pattern(runner_result)).map { |match| match.first&.strip }.compact
|
|
84
97
|
else
|
|
85
|
-
[runner_result.prediction]
|
|
98
|
+
[ runner_result.prediction ]
|
|
86
99
|
end
|
|
87
100
|
end
|
|
88
101
|
|
|
@@ -97,6 +110,15 @@ module Leva
|
|
|
97
110
|
def ground_truth(runner_result)
|
|
98
111
|
runner_result.dataset_record.ground_truth
|
|
99
112
|
end
|
|
113
|
+
|
|
114
|
+
# Provides additional LLM context specific to this runner.
|
|
115
|
+
# Override in subclasses to add expensive or runner-specific context.
|
|
116
|
+
#
|
|
117
|
+
# @param record [Object] The recordable object to generate context for
|
|
118
|
+
# @return [Hash] Additional context to merge with the record's context
|
|
119
|
+
def to_llm_context(record)
|
|
120
|
+
{}
|
|
121
|
+
end
|
|
100
122
|
end
|
|
101
123
|
|
|
102
124
|
# Base class for all evaluation implementations in Leva.
|
|
@@ -134,4 +156,4 @@ module Leva
|
|
|
134
156
|
)
|
|
135
157
|
end
|
|
136
158
|
end
|
|
137
|
-
end
|
|
159
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: leva
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kieran Klaassen
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-
|
|
11
|
+
date: 2025-11-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rails
|
|
@@ -55,6 +55,7 @@ files:
|
|
|
55
55
|
- app/controllers/leva/application_controller.rb
|
|
56
56
|
- app/controllers/leva/dataset_records_controller.rb
|
|
57
57
|
- app/controllers/leva/datasets_controller.rb
|
|
58
|
+
- app/controllers/leva/design_system_controller.rb
|
|
58
59
|
- app/controllers/leva/experiments_controller.rb
|
|
59
60
|
- app/controllers/leva/runner_results_controller.rb
|
|
60
61
|
- app/controllers/leva/workbench_controller.rb
|
|
@@ -82,6 +83,7 @@ files:
|
|
|
82
83
|
- app/views/leva/datasets/index.html.erb
|
|
83
84
|
- app/views/leva/datasets/new.html.erb
|
|
84
85
|
- app/views/leva/datasets/show.html.erb
|
|
86
|
+
- app/views/leva/design_system/index.html.erb
|
|
85
87
|
- app/views/leva/experiments/_experiment.html.erb
|
|
86
88
|
- app/views/leva/experiments/_form.html.erb
|
|
87
89
|
- app/views/leva/experiments/edit.html.erb
|
|
@@ -103,15 +105,8 @@ files:
|
|
|
103
105
|
- db/migrate/20240813173033_create_leva_dataset_records.rb
|
|
104
106
|
- db/migrate/20240813173034_create_leva_prompts.rb
|
|
105
107
|
- db/migrate/20240813173035_create_leva_experiments.rb
|
|
108
|
+
- db/migrate/20240813173040_create_leva_runner_results.rb
|
|
106
109
|
- db/migrate/20240813173050_create_leva_evaluation_results.rb
|
|
107
|
-
- db/migrate/20240816201419_create_leva_runner_results.rb
|
|
108
|
-
- db/migrate/20240816201433_update_leva_evaluation_results.rb
|
|
109
|
-
- db/migrate/20240821163608_make_experiment_optional_for_runner_results.rb
|
|
110
|
-
- db/migrate/20240821181934_add_prompt_to_leva_runner_results.rb
|
|
111
|
-
- db/migrate/20240821183153_add_runner_and_evaluator_to_leva_experiments.rb
|
|
112
|
-
- db/migrate/20240821191713_add_actual_result_to_leva_dataset_records.rb
|
|
113
|
-
- db/migrate/20240822143201_remove_actual_result_from_leva_runner_results.rb
|
|
114
|
-
- db/migrate/20240912183556_add_runner_class_to_leva_runner_results.rb
|
|
115
110
|
- lib/generators/leva/eval_generator.rb
|
|
116
111
|
- lib/generators/leva/runner_generator.rb
|
|
117
112
|
- lib/generators/leva/templates/eval.rb.erb
|
|
@@ -119,7 +114,6 @@ files:
|
|
|
119
114
|
- lib/leva.rb
|
|
120
115
|
- lib/leva/engine.rb
|
|
121
116
|
- lib/leva/version.rb
|
|
122
|
-
- lib/tasks/auto_annotate_models.rake
|
|
123
117
|
- lib/tasks/leva_tasks.rake
|
|
124
118
|
homepage: https://github.com/kieranklaassen/leva
|
|
125
119
|
licenses:
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
class UpdateLevaEvaluationResults < ActiveRecord::Migration[7.2]
|
|
2
|
-
def change
|
|
3
|
-
add_reference :leva_evaluation_results, :runner_result, null: false, foreign_key: { to_table: :leva_runner_results }
|
|
4
|
-
add_column :leva_evaluation_results, :evaluator_class, :string, null: false
|
|
5
|
-
remove_column :leva_evaluation_results, :prediction, :string
|
|
6
|
-
remove_column :leva_evaluation_results, :label, :string
|
|
7
|
-
end
|
|
8
|
-
end
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
# NOTE: only doing this in development as some production environments (Heroku)
|
|
2
|
-
# NOTE: are sensitive to local FS writes, and besides -- it's just not proper
|
|
3
|
-
# NOTE: to have a dev-mode tool do its thing in production.
|
|
4
|
-
if Rails.env.development?
|
|
5
|
-
require 'annotate'
|
|
6
|
-
task :set_annotation_options do
|
|
7
|
-
# You can override any of these by setting an environment variable of the
|
|
8
|
-
# same name.
|
|
9
|
-
Annotate.set_defaults(
|
|
10
|
-
'active_admin' => 'false',
|
|
11
|
-
'additional_file_patterns' => [],
|
|
12
|
-
'routes' => 'false',
|
|
13
|
-
'models' => 'true',
|
|
14
|
-
'position_in_routes' => 'before',
|
|
15
|
-
'position_in_class' => 'before',
|
|
16
|
-
'position_in_test' => 'before',
|
|
17
|
-
'position_in_fixture' => 'before',
|
|
18
|
-
'position_in_factory' => 'before',
|
|
19
|
-
'position_in_serializer' => 'before',
|
|
20
|
-
'show_foreign_keys' => 'true',
|
|
21
|
-
'show_complete_foreign_keys' => 'false',
|
|
22
|
-
'show_indexes' => 'true',
|
|
23
|
-
'simple_indexes' => 'false',
|
|
24
|
-
'model_dir' => 'app/models',
|
|
25
|
-
'root_dir' => '',
|
|
26
|
-
'include_version' => 'false',
|
|
27
|
-
'require' => '',
|
|
28
|
-
'exclude_tests' => 'false',
|
|
29
|
-
'exclude_fixtures' => 'false',
|
|
30
|
-
'exclude_factories' => 'false',
|
|
31
|
-
'exclude_serializers' => 'false',
|
|
32
|
-
'exclude_scaffolds' => 'true',
|
|
33
|
-
'exclude_controllers' => 'true',
|
|
34
|
-
'exclude_helpers' => 'true',
|
|
35
|
-
'exclude_sti_subclasses' => 'false',
|
|
36
|
-
'ignore_model_sub_dir' => 'false',
|
|
37
|
-
'ignore_columns' => nil,
|
|
38
|
-
'ignore_routes' => nil,
|
|
39
|
-
'ignore_unknown_models' => 'false',
|
|
40
|
-
'hide_limit_column_types' => 'integer,bigint,boolean',
|
|
41
|
-
'hide_default_column_types' => 'json,jsonb,hstore',
|
|
42
|
-
'skip_on_db_migrate' => 'false',
|
|
43
|
-
'format_bare' => 'true',
|
|
44
|
-
'format_rdoc' => 'false',
|
|
45
|
-
'format_yard' => 'false',
|
|
46
|
-
'format_markdown' => 'false',
|
|
47
|
-
'sort' => 'false',
|
|
48
|
-
'force' => 'false',
|
|
49
|
-
'frozen' => 'false',
|
|
50
|
-
'classified_sort' => 'true',
|
|
51
|
-
'trace' => 'false',
|
|
52
|
-
'wrapper_open' => nil,
|
|
53
|
-
'wrapper_close' => nil,
|
|
54
|
-
'with_comment' => 'true'
|
|
55
|
-
)
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
Annotate.load_tasks
|
|
59
|
-
end
|