prompt_engine 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +67 -0
- data/Rakefile +22 -0
- data/app/assets/stylesheets/prompt_engine/application.css +22 -0
- data/app/assets/stylesheets/prompt_engine/buttons.css +124 -0
- data/app/assets/stylesheets/prompt_engine/cards.css +63 -0
- data/app/assets/stylesheets/prompt_engine/comparison.css +244 -0
- data/app/assets/stylesheets/prompt_engine/components/_test_runs.css +144 -0
- data/app/assets/stylesheets/prompt_engine/dashboard.css +343 -0
- data/app/assets/stylesheets/prompt_engine/evaluations.css +124 -0
- data/app/assets/stylesheets/prompt_engine/forms.css +198 -0
- data/app/assets/stylesheets/prompt_engine/foundation.css +182 -0
- data/app/assets/stylesheets/prompt_engine/layout.css +75 -0
- data/app/assets/stylesheets/prompt_engine/loading.css +229 -0
- data/app/assets/stylesheets/prompt_engine/notifications.css +78 -0
- data/app/assets/stylesheets/prompt_engine/overrides.css +42 -0
- data/app/assets/stylesheets/prompt_engine/prompts.css +237 -0
- data/app/assets/stylesheets/prompt_engine/sidebar.css +90 -0
- data/app/assets/stylesheets/prompt_engine/tables.css +250 -0
- data/app/assets/stylesheets/prompt_engine/utilities.css +52 -0
- data/app/assets/stylesheets/prompt_engine/versions.css +370 -0
- data/app/clients/prompt_engine/open_ai_evals_client.rb +135 -0
- data/app/controllers/prompt_engine/admin/base_controller.rb +7 -0
- data/app/controllers/prompt_engine/application_controller.rb +4 -0
- data/app/controllers/prompt_engine/dashboard_controller.rb +24 -0
- data/app/controllers/prompt_engine/eval_runs_controller.rb +23 -0
- data/app/controllers/prompt_engine/eval_sets_controller.rb +200 -0
- data/app/controllers/prompt_engine/evaluations_controller.rb +32 -0
- data/app/controllers/prompt_engine/playground_controller.rb +57 -0
- data/app/controllers/prompt_engine/playground_run_results_controller.rb +41 -0
- data/app/controllers/prompt_engine/prompts_controller.rb +70 -0
- data/app/controllers/prompt_engine/settings_controller.rb +28 -0
- data/app/controllers/prompt_engine/test_cases_controller.rb +231 -0
- data/app/controllers/prompt_engine/versions_controller.rb +90 -0
- data/app/helpers/prompt_engine/application_helper.rb +4 -0
- data/app/jobs/prompt_engine/application_job.rb +4 -0
- data/app/mailers/prompt_engine/application_mailer.rb +6 -0
- data/app/models/prompt_engine/application_record.rb +5 -0
- data/app/models/prompt_engine/eval_result.rb +19 -0
- data/app/models/prompt_engine/eval_run.rb +40 -0
- data/app/models/prompt_engine/eval_set.rb +97 -0
- data/app/models/prompt_engine/parameter.rb +126 -0
- data/app/models/prompt_engine/parameter_parser.rb +39 -0
- data/app/models/prompt_engine/playground_run_result.rb +20 -0
- data/app/models/prompt_engine/prompt.rb +192 -0
- data/app/models/prompt_engine/prompt_version.rb +72 -0
- data/app/models/prompt_engine/setting.rb +45 -0
- data/app/models/prompt_engine/test_case.rb +29 -0
- data/app/services/prompt_engine/evaluation_runner.rb +258 -0
- data/app/services/prompt_engine/playground_executor.rb +124 -0
- data/app/services/prompt_engine/variable_detector.rb +97 -0
- data/app/views/layouts/prompt_engine/admin.html.erb +65 -0
- data/app/views/layouts/prompt_engine/application.html.erb +17 -0
- data/app/views/prompt_engine/dashboard/index.html.erb +230 -0
- data/app/views/prompt_engine/eval_runs/show.html.erb +204 -0
- data/app/views/prompt_engine/eval_sets/compare.html.erb +229 -0
- data/app/views/prompt_engine/eval_sets/edit.html.erb +111 -0
- data/app/views/prompt_engine/eval_sets/index.html.erb +63 -0
- data/app/views/prompt_engine/eval_sets/metrics.html.erb +371 -0
- data/app/views/prompt_engine/eval_sets/new.html.erb +113 -0
- data/app/views/prompt_engine/eval_sets/show.html.erb +235 -0
- data/app/views/prompt_engine/evaluations/index.html.erb +194 -0
- data/app/views/prompt_engine/playground/result.html.erb +58 -0
- data/app/views/prompt_engine/playground/show.html.erb +129 -0
- data/app/views/prompt_engine/playground_run_results/index.html.erb +99 -0
- data/app/views/prompt_engine/playground_run_results/show.html.erb +123 -0
- data/app/views/prompt_engine/prompts/_form.html.erb +224 -0
- data/app/views/prompt_engine/prompts/edit.html.erb +9 -0
- data/app/views/prompt_engine/prompts/index.html.erb +80 -0
- data/app/views/prompt_engine/prompts/new.html.erb +9 -0
- data/app/views/prompt_engine/prompts/show.html.erb +297 -0
- data/app/views/prompt_engine/settings/edit.html.erb +93 -0
- data/app/views/prompt_engine/shared/_form_errors.html.erb +16 -0
- data/app/views/prompt_engine/test_cases/edit.html.erb +72 -0
- data/app/views/prompt_engine/test_cases/import.html.erb +92 -0
- data/app/views/prompt_engine/test_cases/import_preview.html.erb +103 -0
- data/app/views/prompt_engine/test_cases/new.html.erb +79 -0
- data/app/views/prompt_engine/versions/_version_card.html.erb +56 -0
- data/app/views/prompt_engine/versions/compare.html.erb +82 -0
- data/app/views/prompt_engine/versions/index.html.erb +96 -0
- data/app/views/prompt_engine/versions/show.html.erb +98 -0
- data/config/routes.rb +61 -0
- data/db/migrate/20250124000001_create_eval_tables.rb +43 -0
- data/db/migrate/20250124000002_add_open_ai_fields_to_evals.rb +11 -0
- data/db/migrate/20250125000001_add_grader_fields_to_eval_sets.rb +8 -0
- data/db/migrate/20250723161909_create_prompts.rb +17 -0
- data/db/migrate/20250723184757_create_prompt_engine_versions.rb +24 -0
- data/db/migrate/20250723203838_create_prompt_engine_parameters.rb +20 -0
- data/db/migrate/20250724160623_create_prompt_engine_playground_run_results.rb +30 -0
- data/db/migrate/20250724165118_create_prompt_engine_settings.rb +14 -0
- data/lib/prompt_engine/engine.rb +25 -0
- data/lib/prompt_engine/version.rb +3 -0
- data/lib/prompt_engine.rb +33 -0
- data/lib/tasks/active_prompt_tasks.rake +32 -0
- data/lib/tasks/eval_demo.rake +149 -0
- metadata +293 -0
@@ -0,0 +1,235 @@
|
|
1
|
+
<div class="admin-header">
|
2
|
+
<div>
|
3
|
+
<h1><%= @eval_set.name %></h1>
|
4
|
+
<% if @eval_set.description.present? %>
|
5
|
+
<p class="text-muted"><%= @eval_set.description %></p>
|
6
|
+
<% end %>
|
7
|
+
<p class="text-muted">
|
8
|
+
<strong>Grader Type:</strong> <%= @eval_set.grader_type_display %>
|
9
|
+
<% if @eval_set.grader_type == 'regex' && @eval_set.grader_config['pattern'].present? %>
|
10
|
+
<br><strong>Pattern:</strong> <code><%= @eval_set.grader_config['pattern'] %></code>
|
11
|
+
<% elsif @eval_set.grader_type == 'json_schema' && @eval_set.grader_config['schema'].present? %>
|
12
|
+
<br><strong>Schema:</strong> <code><%= @eval_set.grader_config['schema'].to_json.truncate(100) %></code>
|
13
|
+
<% end %>
|
14
|
+
</p>
|
15
|
+
</div>
|
16
|
+
<div class="btn-group">
|
17
|
+
<%= link_to "Back to Eval Sets", prompt_eval_sets_path(@prompt), class: "btn btn--secondary btn--medium" %>
|
18
|
+
<% if @recent_runs.any? %>
|
19
|
+
<%= link_to "View Metrics", metrics_prompt_eval_set_path(@prompt, @eval_set), class: "btn btn--secondary btn--medium" %>
|
20
|
+
<% end %>
|
21
|
+
<%= link_to "Add Test Case", new_prompt_eval_set_test_case_path(@prompt, @eval_set), class: "btn btn--secondary btn--medium" %>
|
22
|
+
<% if controller.send(:api_key_configured?) %>
|
23
|
+
<%= button_to "Run Evaluation", run_prompt_eval_set_path(@prompt, @eval_set),
|
24
|
+
method: :post,
|
25
|
+
class: "btn btn--primary btn--medium",
|
26
|
+
id: "run-evaluation-btn",
|
27
|
+
data: {
|
28
|
+
disable_with: "Running Evaluation...",
|
29
|
+
confirm: @test_cases.any? ? nil : "No test cases defined. Are you sure you want to run an empty evaluation?"
|
30
|
+
} %>
|
31
|
+
<% else %>
|
32
|
+
<%= link_to "Configure API Key", settings_path,
|
33
|
+
class: "btn btn--primary btn--medium",
|
34
|
+
title: "OpenAI API key required to run evaluations" %>
|
35
|
+
<% end %>
|
36
|
+
<%= link_to "Edit", edit_prompt_eval_set_path(@prompt, @eval_set), class: "btn btn--secondary btn--medium" %>
|
37
|
+
</div>
|
38
|
+
</div>
|
39
|
+
|
40
|
+
<div class="card mb-lg">
|
41
|
+
<div class="card__header">
|
42
|
+
<h3 class="card__title">Test Cases</h3>
|
43
|
+
<div class="card__actions">
|
44
|
+
<%= link_to "Import", import_prompt_eval_set_test_cases_path(@prompt, @eval_set), class: "btn btn--secondary btn--small" %>
|
45
|
+
<%= link_to "Add Test Case", new_prompt_eval_set_test_case_path(@prompt, @eval_set), class: "btn btn--secondary btn--small" %>
|
46
|
+
</div>
|
47
|
+
</div>
|
48
|
+
<div class="card__body">
|
49
|
+
<% if @test_cases.any? %>
|
50
|
+
<div class="table-container">
|
51
|
+
<table class="table">
|
52
|
+
<thead>
|
53
|
+
<tr>
|
54
|
+
<th>Description</th>
|
55
|
+
<th>Input Variables</th>
|
56
|
+
<th>Expected Output</th>
|
57
|
+
<th class="table__actions">Actions</th>
|
58
|
+
</tr>
|
59
|
+
</thead>
|
60
|
+
<tbody>
|
61
|
+
<% @test_cases.each do |test_case| %>
|
62
|
+
<tr>
|
63
|
+
<td>
|
64
|
+
<div class="table__primary">
|
65
|
+
<%= test_case.description || "Test case ##{test_case.id}" %>
|
66
|
+
</div>
|
67
|
+
</td>
|
68
|
+
<td>
|
69
|
+
<code class="code-inline">
|
70
|
+
<%= truncate(test_case.input_variables.to_json, length: 100) %>
|
71
|
+
</code>
|
72
|
+
</td>
|
73
|
+
<td>
|
74
|
+
<code class="code-inline">
|
75
|
+
<%= truncate(test_case.expected_output, length: 100) %>
|
76
|
+
</code>
|
77
|
+
</td>
|
78
|
+
<td class="table__actions">
|
79
|
+
<%= link_to "Edit", edit_prompt_eval_set_test_case_path(@prompt, @eval_set, test_case),
|
80
|
+
class: "btn btn--secondary btn--small" %>
|
81
|
+
<%= button_to "Delete", prompt_eval_set_test_case_path(@prompt, @eval_set, test_case),
|
82
|
+
method: :delete,
|
83
|
+
data: { confirm: "Are you sure you want to delete this test case?" },
|
84
|
+
class: "btn btn--danger btn--small" %>
|
85
|
+
</td>
|
86
|
+
</tr>
|
87
|
+
<% end %>
|
88
|
+
</tbody>
|
89
|
+
</table>
|
90
|
+
</div>
|
91
|
+
<% else %>
|
92
|
+
<div class="table-empty">
|
93
|
+
<p class="text-muted">No test cases added yet.</p>
|
94
|
+
<div class="btn-group">
|
95
|
+
<%= link_to "Import Test Cases", import_prompt_eval_set_test_cases_path(@prompt, @eval_set),
|
96
|
+
class: "btn btn--secondary btn--medium" %>
|
97
|
+
<%= link_to "Add Your First Test Case", new_prompt_eval_set_test_case_path(@prompt, @eval_set),
|
98
|
+
class: "btn btn--primary btn--medium" %>
|
99
|
+
</div>
|
100
|
+
</div>
|
101
|
+
<% end %>
|
102
|
+
</div>
|
103
|
+
</div>
|
104
|
+
|
105
|
+
<div class="card">
|
106
|
+
<div class="card__header">
|
107
|
+
<h3 class="card__title">Recent Evaluation Runs</h3>
|
108
|
+
<% if @recent_runs.select { |r| r.status == 'completed' }.count >= 2 %>
|
109
|
+
<div class="card__actions">
|
110
|
+
<button type="submit" form="compare-runs-form" class="btn btn--secondary btn--small"
|
111
|
+
id="compare-runs-btn" disabled>
|
112
|
+
Compare Selected (0/2)
|
113
|
+
</button>
|
114
|
+
</div>
|
115
|
+
<% end %>
|
116
|
+
</div>
|
117
|
+
<div class="card__body">
|
118
|
+
<% if @recent_runs.any? %>
|
119
|
+
<%= form_with url: compare_prompt_eval_set_path(@prompt, @eval_set), method: :get,
|
120
|
+
local: true, id: "compare-runs-form" do |form| %>
|
121
|
+
<div class="table-container">
|
122
|
+
<table class="table">
|
123
|
+
<thead>
|
124
|
+
<tr>
|
125
|
+
<% if @recent_runs.select { |r| r.status == 'completed' }.count >= 2 %>
|
126
|
+
<th class="table__checkbox-column">Compare</th>
|
127
|
+
<% end %>
|
128
|
+
<th>Run Date</th>
|
129
|
+
<th>Prompt Version</th>
|
130
|
+
<th>Status</th>
|
131
|
+
<th>Results</th>
|
132
|
+
<th class="table__actions">Actions</th>
|
133
|
+
</tr>
|
134
|
+
</thead>
|
135
|
+
<tbody>
|
136
|
+
<% @recent_runs.each do |run| %>
|
137
|
+
<tr>
|
138
|
+
<% if @recent_runs.select { |r| r.status == 'completed' }.count >= 2 %>
|
139
|
+
<td class="table__checkbox-column">
|
140
|
+
<% if run.status == 'completed' %>
|
141
|
+
<%= check_box_tag "run_ids[]", run.id, false,
|
142
|
+
class: "compare-checkbox",
|
143
|
+
data: {
|
144
|
+
run_id: run.id,
|
145
|
+
version: "v#{run.prompt_version.version_number}",
|
146
|
+
date: run.created_at.strftime("%b %d, %Y %I:%M %p")
|
147
|
+
} %>
|
148
|
+
<% else %>
|
149
|
+
<span class="text-muted" title="Only completed runs can be compared">—</span>
|
150
|
+
<% end %>
|
151
|
+
</td>
|
152
|
+
<% end %>
|
153
|
+
<td>
|
154
|
+
<div class="table__primary">
|
155
|
+
<%= run.created_at.strftime("%b %d, %Y %I:%M %p") %>
|
156
|
+
</div>
|
157
|
+
<span class="table__secondary"><%= time_ago_in_words(run.created_at) %> ago</span>
|
158
|
+
</td>
|
159
|
+
<td>
|
160
|
+
<span class="table__badge table__badge--info">v<%= run.prompt_version.version_number %></span>
|
161
|
+
</td>
|
162
|
+
<td>
|
163
|
+
<span class="table__badge table__badge--<%= run.status %>">
|
164
|
+
<%= run.status.humanize %>
|
165
|
+
</span>
|
166
|
+
</td>
|
167
|
+
<td>
|
168
|
+
<% if run.total_count > 0 %>
|
169
|
+
<div class="table__metric">
|
170
|
+
<span class="table__metric-value"><%= run.passed_count %> / <%= run.total_count %></span>
|
171
|
+
<span class="table__metric-unit">passed</span>
|
172
|
+
</div>
|
173
|
+
<span class="table__secondary">
|
174
|
+
<%= number_to_percentage((run.passed_count.to_f / run.total_count * 100), precision: 0) %> success rate
|
175
|
+
</span>
|
176
|
+
<% else %>
|
177
|
+
<span class="table__secondary text-muted">—</span>
|
178
|
+
<% end %>
|
179
|
+
</td>
|
180
|
+
<td class="table__actions">
|
181
|
+
<%= link_to "View Results", prompt_eval_run_path(@prompt, run),
|
182
|
+
class: "btn btn--secondary btn--small" %>
|
183
|
+
</td>
|
184
|
+
</tr>
|
185
|
+
<% end %>
|
186
|
+
</tbody>
|
187
|
+
</table>
|
188
|
+
</div>
|
189
|
+
<% end %>
|
190
|
+
<% else %>
|
191
|
+
<div class="table-empty">
|
192
|
+
<p class="text-muted">No evaluation runs yet.</p>
|
193
|
+
<% if @test_cases.any? %>
|
194
|
+
<%= button_to "Run Your First Evaluation", run_prompt_eval_set_path(@prompt, @eval_set),
|
195
|
+
method: :post, class: "btn btn--primary btn--medium" %>
|
196
|
+
<% else %>
|
197
|
+
<p class="text-muted">Add test cases before running evaluations.</p>
|
198
|
+
<% end %>
|
199
|
+
</div>
|
200
|
+
<% end %>
|
201
|
+
</div>
|
202
|
+
</div>
|
203
|
+
|
204
|
+
<% if @recent_runs.select { |r| r.status == 'completed' }.count >= 2 %>
|
205
|
+
<script>
|
206
|
+
document.addEventListener('DOMContentLoaded', function() {
|
207
|
+
const checkboxes = document.querySelectorAll('.compare-checkbox');
|
208
|
+
const compareBtn = document.getElementById('compare-runs-btn');
|
209
|
+
|
210
|
+
function updateCompareButton() {
|
211
|
+
const checkedBoxes = document.querySelectorAll('.compare-checkbox:checked');
|
212
|
+
const count = checkedBoxes.length;
|
213
|
+
|
214
|
+
compareBtn.disabled = count !== 2;
|
215
|
+
compareBtn.textContent = `Compare Selected (${count}/2)`;
|
216
|
+
|
217
|
+
// Disable unchecked checkboxes if 2 are already selected
|
218
|
+
checkboxes.forEach(checkbox => {
|
219
|
+
if (!checkbox.checked && count === 2) {
|
220
|
+
checkbox.disabled = true;
|
221
|
+
} else {
|
222
|
+
checkbox.disabled = false;
|
223
|
+
}
|
224
|
+
});
|
225
|
+
}
|
226
|
+
|
227
|
+
checkboxes.forEach(checkbox => {
|
228
|
+
checkbox.addEventListener('change', updateCompareButton);
|
229
|
+
});
|
230
|
+
|
231
|
+
// Initialize button state
|
232
|
+
updateCompareButton();
|
233
|
+
});
|
234
|
+
</script>
|
235
|
+
<% end %>
|
@@ -0,0 +1,194 @@
|
|
1
|
+
<div class="admin-header">
|
2
|
+
<div>
|
3
|
+
<h1>Evaluations</h1>
|
4
|
+
<p class="text-muted">Manage evaluation sets and view results across all prompts</p>
|
5
|
+
</div>
|
6
|
+
</div>
|
7
|
+
|
8
|
+
<!-- Summary Cards -->
|
9
|
+
<div class="metrics-summary mb-lg">
|
10
|
+
<div class="metrics-card">
|
11
|
+
<div class="metrics-card__icon">
|
12
|
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
13
|
+
<path d="M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2"></path>
|
14
|
+
</svg>
|
15
|
+
</div>
|
16
|
+
<div class="metrics-card__content">
|
17
|
+
<h3 class="metrics-card__label">Evaluation Sets</h3>
|
18
|
+
<p class="metrics-card__value"><%= @total_eval_sets %></p>
|
19
|
+
</div>
|
20
|
+
</div>
|
21
|
+
|
22
|
+
<div class="metrics-card">
|
23
|
+
<div class="metrics-card__icon">
|
24
|
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
25
|
+
<path d="M12 8v4l3 3m6-3a9 9 0 11-18 0 9 9 0 0118 0z"></path>
|
26
|
+
</svg>
|
27
|
+
</div>
|
28
|
+
<div class="metrics-card__content">
|
29
|
+
<h3 class="metrics-card__label">Total Runs</h3>
|
30
|
+
<p class="metrics-card__value"><%= @total_eval_runs %></p>
|
31
|
+
</div>
|
32
|
+
</div>
|
33
|
+
|
34
|
+
<div class="metrics-card">
|
35
|
+
<div class="metrics-card__icon">
|
36
|
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
37
|
+
<path d="M9 11l3 3L22 4"></path>
|
38
|
+
<path d="M21 12v7a2 2 0 01-2 2H5a2 2 0 01-2-2V5a2 2 0 012-2h11"></path>
|
39
|
+
</svg>
|
40
|
+
</div>
|
41
|
+
<div class="metrics-card__content">
|
42
|
+
<h3 class="metrics-card__label">Test Cases</h3>
|
43
|
+
<p class="metrics-card__value"><%= @total_test_cases %></p>
|
44
|
+
</div>
|
45
|
+
</div>
|
46
|
+
|
47
|
+
<div class="metrics-card">
|
48
|
+
<div class="metrics-card__icon">
|
49
|
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
50
|
+
<path d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6"></path>
|
51
|
+
</svg>
|
52
|
+
</div>
|
53
|
+
<div class="metrics-card__content">
|
54
|
+
<h3 class="metrics-card__label">Overall Pass Rate</h3>
|
55
|
+
<p class="metrics-card__value"><%= number_to_percentage(@overall_pass_rate, precision: 1) %></p>
|
56
|
+
</div>
|
57
|
+
</div>
|
58
|
+
</div>
|
59
|
+
|
60
|
+
<!-- Prompts with Evaluation Sets -->
|
61
|
+
<div class="card mb-lg">
|
62
|
+
<div class="card__header">
|
63
|
+
<h3 class="card__title">Prompts with Evaluations</h3>
|
64
|
+
</div>
|
65
|
+
<div class="card__body">
|
66
|
+
<% if @prompts_with_eval_sets.any? %>
|
67
|
+
<div class="eval-prompts-list">
|
68
|
+
<% @prompts_with_eval_sets.each do |prompt| %>
|
69
|
+
<div class="eval-prompt-item">
|
70
|
+
<div class="eval-prompt-header">
|
71
|
+
<h4 class="eval-prompt-title">
|
72
|
+
<%= link_to prompt.name, prompt_path(prompt), class: "table__link" %>
|
73
|
+
</h4>
|
74
|
+
<span class="table__badge table__badge--<%= prompt.status %>">
|
75
|
+
<%= prompt.status %>
|
76
|
+
</span>
|
77
|
+
</div>
|
78
|
+
|
79
|
+
<div class="eval-sets-grid">
|
80
|
+
<% prompt.eval_sets.each do |eval_set| %>
|
81
|
+
<div class="eval-set-card">
|
82
|
+
<div class="eval-set-card__header">
|
83
|
+
<h5 class="eval-set-card__title">
|
84
|
+
<%= link_to eval_set.name, prompt_eval_set_path(prompt, eval_set), class: "table__link" %>
|
85
|
+
</h5>
|
86
|
+
<div class="eval-set-card__stats">
|
87
|
+
<span class="eval-set-card__stat">
|
88
|
+
<%= pluralize(eval_set.test_cases.count, 'test') %>
|
89
|
+
</span>
|
90
|
+
<span class="eval-set-card__stat">
|
91
|
+
<%= pluralize(eval_set.eval_runs.count, 'run') %>
|
92
|
+
</span>
|
93
|
+
</div>
|
94
|
+
</div>
|
95
|
+
|
96
|
+
<% if eval_set.description.present? %>
|
97
|
+
<p class="eval-set-card__description">
|
98
|
+
<%= truncate(eval_set.description, length: 100) %>
|
99
|
+
</p>
|
100
|
+
<% end %>
|
101
|
+
|
102
|
+
<div class="eval-set-card__actions">
|
103
|
+
<%= link_to "View", prompt_eval_set_path(prompt, eval_set),
|
104
|
+
class: "btn btn--secondary btn--small" %>
|
105
|
+
<% if eval_set.eval_runs.where(status: 'completed').any? %>
|
106
|
+
<%= link_to "Metrics", metrics_prompt_eval_set_path(prompt, eval_set),
|
107
|
+
class: "btn btn--secondary btn--small" %>
|
108
|
+
<% end %>
|
109
|
+
</div>
|
110
|
+
</div>
|
111
|
+
<% end %>
|
112
|
+
</div>
|
113
|
+
</div>
|
114
|
+
<% end %>
|
115
|
+
</div>
|
116
|
+
<% else %>
|
117
|
+
<div class="table-empty">
|
118
|
+
<p class="text-muted">No evaluation sets created yet.</p>
|
119
|
+
<p class="text-muted">Create evaluation sets from the prompt details page.</p>
|
120
|
+
<%= link_to "View Prompts", prompts_path, class: "btn btn--primary btn--medium mt-md" %>
|
121
|
+
</div>
|
122
|
+
<% end %>
|
123
|
+
</div>
|
124
|
+
</div>
|
125
|
+
|
126
|
+
<!-- Recent Activity -->
|
127
|
+
<div class="card">
|
128
|
+
<div class="card__header">
|
129
|
+
<h3 class="card__title">Recent Evaluation Activity</h3>
|
130
|
+
</div>
|
131
|
+
<div class="card__body">
|
132
|
+
<% if @recent_runs.any? %>
|
133
|
+
<div class="table-container">
|
134
|
+
<table class="table table--simple">
|
135
|
+
<thead>
|
136
|
+
<tr>
|
137
|
+
<th>Date</th>
|
138
|
+
<th>Prompt</th>
|
139
|
+
<th>Evaluation Set</th>
|
140
|
+
<th>Status</th>
|
141
|
+
<th>Results</th>
|
142
|
+
<th class="table__actions">Actions</th>
|
143
|
+
</tr>
|
144
|
+
</thead>
|
145
|
+
<tbody>
|
146
|
+
<% @recent_runs.each do |run| %>
|
147
|
+
<tr>
|
148
|
+
<td>
|
149
|
+
<div class="table__primary">
|
150
|
+
<%= run.created_at.strftime("%b %d, %Y %I:%M %p") %>
|
151
|
+
</div>
|
152
|
+
</td>
|
153
|
+
<td>
|
154
|
+
<%= link_to run.eval_set.prompt.name, prompt_path(run.eval_set.prompt),
|
155
|
+
class: "table__link" %>
|
156
|
+
</td>
|
157
|
+
<td>
|
158
|
+
<%= link_to run.eval_set.name, prompt_eval_set_path(run.eval_set.prompt, run.eval_set),
|
159
|
+
class: "table__link" %>
|
160
|
+
</td>
|
161
|
+
<td>
|
162
|
+
<span class="table__badge table__badge--<%= run.status %>">
|
163
|
+
<%= run.status.humanize %>
|
164
|
+
</span>
|
165
|
+
</td>
|
166
|
+
<td>
|
167
|
+
<% if run.status == 'completed' && run.total_count > 0 %>
|
168
|
+
<% success_rate = (run.passed_count.to_f / run.total_count * 100) %>
|
169
|
+
<div class="table__metric">
|
170
|
+
<span class="table__metric-value
|
171
|
+
<%= success_rate >= 80 ? 'text-success' : success_rate >= 60 ? 'text-warning' : 'text-danger' %>">
|
172
|
+
<%= number_to_percentage(success_rate, precision: 1) %>
|
173
|
+
</span>
|
174
|
+
</div>
|
175
|
+
<% else %>
|
176
|
+
<span class="table__secondary text-muted">—</span>
|
177
|
+
<% end %>
|
178
|
+
</td>
|
179
|
+
<td class="table__actions">
|
180
|
+
<%= link_to "View", prompt_eval_run_path(run.eval_set.prompt, run),
|
181
|
+
class: "table__action" %>
|
182
|
+
</td>
|
183
|
+
</tr>
|
184
|
+
<% end %>
|
185
|
+
</tbody>
|
186
|
+
</table>
|
187
|
+
</div>
|
188
|
+
<% else %>
|
189
|
+
<div class="table-empty">
|
190
|
+
<p class="text-muted">No evaluation runs yet.</p>
|
191
|
+
</div>
|
192
|
+
<% end %>
|
193
|
+
</div>
|
194
|
+
</div>
|
@@ -0,0 +1,58 @@
|
|
1
|
+
<div class="admin-header">
|
2
|
+
<div>
|
3
|
+
<h1>Test Results: <%= @prompt.name %></h1>
|
4
|
+
<p class="text-muted">Prompt execution completed</p>
|
5
|
+
</div>
|
6
|
+
</div>
|
7
|
+
|
8
|
+
<% if @error.present? %>
|
9
|
+
<div class="card mb-lg">
|
10
|
+
<div class="card__header">
|
11
|
+
<h3 class="card__title text-danger">Error</h3>
|
12
|
+
</div>
|
13
|
+
<div class="card__body">
|
14
|
+
<p class="text-danger"><%= @error %></p>
|
15
|
+
</div>
|
16
|
+
</div>
|
17
|
+
<% else %>
|
18
|
+
<div class="card mb-lg">
|
19
|
+
<div class="card__header">
|
20
|
+
<h3 class="card__title">Rendered Prompt</h3>
|
21
|
+
</div>
|
22
|
+
<div class="card__body">
|
23
|
+
<pre class="prompt-content"><%= @rendered_prompt %></pre>
|
24
|
+
</div>
|
25
|
+
</div>
|
26
|
+
|
27
|
+
<div class="card mb-lg">
|
28
|
+
<div class="card__header">
|
29
|
+
<h3 class="card__title">AI Response</h3>
|
30
|
+
</div>
|
31
|
+
<div class="card__body">
|
32
|
+
<pre class="prompt-content"><%= @response %></pre>
|
33
|
+
</div>
|
34
|
+
</div>
|
35
|
+
|
36
|
+
<div class="card mb-lg">
|
37
|
+
<div class="card__header">
|
38
|
+
<h3 class="card__title">Execution Details</h3>
|
39
|
+
</div>
|
40
|
+
<div class="card__body">
|
41
|
+
<div class="detail-grid">
|
42
|
+
<div class="detail-item">
|
43
|
+
<label class="detail-label">Execution Time</label>
|
44
|
+
<div class="detail-value"><%= @execution_time %> seconds</div>
|
45
|
+
</div>
|
46
|
+
<div class="detail-item">
|
47
|
+
<label class="detail-label">Tokens Used</label>
|
48
|
+
<div class="detail-value"><%= @token_count %></div>
|
49
|
+
</div>
|
50
|
+
</div>
|
51
|
+
</div>
|
52
|
+
</div>
|
53
|
+
<% end %>
|
54
|
+
|
55
|
+
<div class="form__actions">
|
56
|
+
<%= link_to "Try Again", playground_prompt_path(@prompt), class: "btn btn--primary btn--medium" %>
|
57
|
+
<%= link_to "Back to Prompt", prompt_path(@prompt), class: "btn btn--secondary btn--medium" %>
|
58
|
+
</div>
|
@@ -0,0 +1,129 @@
|
|
1
|
+
<div class="admin-header">
|
2
|
+
<div>
|
3
|
+
<h1>Test Prompt: <%= @prompt.name %></h1>
|
4
|
+
<p class="text-muted">Test your prompt with real AI providers</p>
|
5
|
+
</div>
|
6
|
+
</div>
|
7
|
+
|
8
|
+
<div class="card">
|
9
|
+
<div class="card__header">
|
10
|
+
<h3 class="card__title">Playground Settings</h3>
|
11
|
+
</div>
|
12
|
+
<div class="card__body">
|
13
|
+
<%= form_with url: playground_prompt_path(@prompt), method: :post, local: true, html: { class: "form" } do |form| %>
|
14
|
+
<div class="form__group">
|
15
|
+
<%= form.label :provider, "AI Provider", class: "form__label form__label--required" %>
|
16
|
+
<%
|
17
|
+
# Determine default provider based on prompt's model
|
18
|
+
default_provider = case @prompt.model
|
19
|
+
when /claude/i, /anthropic/i
|
20
|
+
"anthropic"
|
21
|
+
when /gpt/i, /openai/i
|
22
|
+
"openai"
|
23
|
+
else
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
%>
|
27
|
+
<%= form.select :provider,
|
28
|
+
options_for_select([
|
29
|
+
["Anthropic (claude-3-5-sonnet-20241022)", "anthropic"],
|
30
|
+
["OpenAI (gpt-4o)", "openai"]
|
31
|
+
], default_provider),
|
32
|
+
{ prompt: "Select a provider" },
|
33
|
+
class: "form__select", required: true %>
|
34
|
+
<div class="form__help">
|
35
|
+
<% if @prompt.model.present? %>
|
36
|
+
Prompt is configured for <%= @prompt.model %>
|
37
|
+
<% else %>
|
38
|
+
Select which AI provider to test with
|
39
|
+
<% end %>
|
40
|
+
</div>
|
41
|
+
</div>
|
42
|
+
|
43
|
+
<div class="form__group">
|
44
|
+
<%= form.label :api_key, "API Key", class: "form__label form__label--required" %>
|
45
|
+
<%
|
46
|
+
# Get the appropriate API key based on selected provider
|
47
|
+
default_api_key = nil
|
48
|
+
api_key_configured = false
|
49
|
+
|
50
|
+
if default_provider == "anthropic" && @settings.anthropic_configured?
|
51
|
+
default_api_key = @settings.anthropic_api_key
|
52
|
+
api_key_configured = true
|
53
|
+
elsif default_provider == "openai" && @settings.openai_configured?
|
54
|
+
default_api_key = @settings.openai_api_key
|
55
|
+
api_key_configured = true
|
56
|
+
end
|
57
|
+
%>
|
58
|
+
<%= form.password_field :api_key,
|
59
|
+
class: "form__input",
|
60
|
+
required: true,
|
61
|
+
value: default_api_key,
|
62
|
+
placeholder: api_key_configured ? "Using saved API key" : "Enter your API key",
|
63
|
+
data: {
|
64
|
+
anthropic_key: @settings.anthropic_api_key,
|
65
|
+
openai_key: @settings.openai_api_key
|
66
|
+
} %>
|
67
|
+
<div class="form__help">
|
68
|
+
<% if api_key_configured %>
|
69
|
+
Using saved API key from settings. <%= link_to "Change in settings", edit_settings_path, class: "link" %>
|
70
|
+
<% else %>
|
71
|
+
Your API key will not be stored. <%= link_to "Save in settings", edit_settings_path, class: "link" %>
|
72
|
+
<% end %>
|
73
|
+
</div>
|
74
|
+
</div>
|
75
|
+
|
76
|
+
<% if @parameters.any? %>
|
77
|
+
<h3 class="form__section-title">Prompt Parameters</h3>
|
78
|
+
<div class="form__help mb-md">Fill in the values for each parameter:</div>
|
79
|
+
|
80
|
+
<% @parameters.each do |parameter_name| %>
|
81
|
+
<div class="form__group">
|
82
|
+
<%= form.label "parameters[#{parameter_name}]", parameter_name.humanize.titleize, class: "form__label" %>
|
83
|
+
<%= form.text_area "parameters[#{parameter_name}]",
|
84
|
+
class: "form__textarea",
|
85
|
+
rows: 3,
|
86
|
+
placeholder: "Enter value for #{parameter_name}" %>
|
87
|
+
</div>
|
88
|
+
<% end %>
|
89
|
+
<% end %>
|
90
|
+
|
91
|
+
<div class="form__actions">
|
92
|
+
<%= link_to "Cancel", prompt_path(@prompt), class: "btn btn--secondary btn--medium" %>
|
93
|
+
<%= form.submit "Test Prompt", class: "btn btn--primary btn--medium" %>
|
94
|
+
</div>
|
95
|
+
<% end %>
|
96
|
+
</div>
|
97
|
+
</div>
|
98
|
+
|
99
|
+
<script>
|
100
|
+
// Update API key when provider changes
|
101
|
+
document.addEventListener('DOMContentLoaded', function() {
|
102
|
+
const providerSelect = document.getElementById('provider');
|
103
|
+
const apiKeyField = document.getElementById('api_key');
|
104
|
+
const helpText = apiKeyField.parentElement.querySelector('.form__help');
|
105
|
+
|
106
|
+
if (providerSelect && apiKeyField) {
|
107
|
+
const anthropicKey = apiKeyField.dataset.anthropicKey;
|
108
|
+
const openaiKey = apiKeyField.dataset.openaiKey;
|
109
|
+
|
110
|
+
providerSelect.addEventListener('change', function() {
|
111
|
+
const selectedProvider = this.value;
|
112
|
+
|
113
|
+
if (selectedProvider === 'anthropic' && anthropicKey) {
|
114
|
+
apiKeyField.value = anthropicKey;
|
115
|
+
apiKeyField.placeholder = 'Using saved API key';
|
116
|
+
helpText.innerHTML = 'Using saved API key from settings. <a href="<%= edit_settings_path %>" class="link">Change in settings</a>';
|
117
|
+
} else if (selectedProvider === 'openai' && openaiKey) {
|
118
|
+
apiKeyField.value = openaiKey;
|
119
|
+
apiKeyField.placeholder = 'Using saved API key';
|
120
|
+
helpText.innerHTML = 'Using saved API key from settings. <a href="<%= edit_settings_path %>" class="link">Change in settings</a>';
|
121
|
+
} else {
|
122
|
+
apiKeyField.value = '';
|
123
|
+
apiKeyField.placeholder = 'Enter your API key';
|
124
|
+
helpText.innerHTML = 'Your API key will not be stored. <a href="<%= edit_settings_path %>" class="link">Save in settings</a>';
|
125
|
+
}
|
126
|
+
});
|
127
|
+
}
|
128
|
+
});
|
129
|
+
</script>
|