prompt_engine 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +67 -0
- data/Rakefile +22 -0
- data/app/assets/stylesheets/prompt_engine/application.css +22 -0
- data/app/assets/stylesheets/prompt_engine/buttons.css +124 -0
- data/app/assets/stylesheets/prompt_engine/cards.css +63 -0
- data/app/assets/stylesheets/prompt_engine/comparison.css +244 -0
- data/app/assets/stylesheets/prompt_engine/components/_test_runs.css +144 -0
- data/app/assets/stylesheets/prompt_engine/dashboard.css +343 -0
- data/app/assets/stylesheets/prompt_engine/evaluations.css +124 -0
- data/app/assets/stylesheets/prompt_engine/forms.css +198 -0
- data/app/assets/stylesheets/prompt_engine/foundation.css +182 -0
- data/app/assets/stylesheets/prompt_engine/layout.css +75 -0
- data/app/assets/stylesheets/prompt_engine/loading.css +229 -0
- data/app/assets/stylesheets/prompt_engine/notifications.css +78 -0
- data/app/assets/stylesheets/prompt_engine/overrides.css +42 -0
- data/app/assets/stylesheets/prompt_engine/prompts.css +237 -0
- data/app/assets/stylesheets/prompt_engine/sidebar.css +90 -0
- data/app/assets/stylesheets/prompt_engine/tables.css +250 -0
- data/app/assets/stylesheets/prompt_engine/utilities.css +52 -0
- data/app/assets/stylesheets/prompt_engine/versions.css +370 -0
- data/app/clients/prompt_engine/open_ai_evals_client.rb +135 -0
- data/app/controllers/prompt_engine/admin/base_controller.rb +7 -0
- data/app/controllers/prompt_engine/application_controller.rb +4 -0
- data/app/controllers/prompt_engine/dashboard_controller.rb +24 -0
- data/app/controllers/prompt_engine/eval_runs_controller.rb +23 -0
- data/app/controllers/prompt_engine/eval_sets_controller.rb +200 -0
- data/app/controllers/prompt_engine/evaluations_controller.rb +32 -0
- data/app/controllers/prompt_engine/playground_controller.rb +57 -0
- data/app/controllers/prompt_engine/playground_run_results_controller.rb +41 -0
- data/app/controllers/prompt_engine/prompts_controller.rb +70 -0
- data/app/controllers/prompt_engine/settings_controller.rb +28 -0
- data/app/controllers/prompt_engine/test_cases_controller.rb +231 -0
- data/app/controllers/prompt_engine/versions_controller.rb +90 -0
- data/app/helpers/prompt_engine/application_helper.rb +4 -0
- data/app/jobs/prompt_engine/application_job.rb +4 -0
- data/app/mailers/prompt_engine/application_mailer.rb +6 -0
- data/app/models/prompt_engine/application_record.rb +5 -0
- data/app/models/prompt_engine/eval_result.rb +19 -0
- data/app/models/prompt_engine/eval_run.rb +40 -0
- data/app/models/prompt_engine/eval_set.rb +97 -0
- data/app/models/prompt_engine/parameter.rb +126 -0
- data/app/models/prompt_engine/parameter_parser.rb +39 -0
- data/app/models/prompt_engine/playground_run_result.rb +20 -0
- data/app/models/prompt_engine/prompt.rb +192 -0
- data/app/models/prompt_engine/prompt_version.rb +72 -0
- data/app/models/prompt_engine/setting.rb +45 -0
- data/app/models/prompt_engine/test_case.rb +29 -0
- data/app/services/prompt_engine/evaluation_runner.rb +258 -0
- data/app/services/prompt_engine/playground_executor.rb +124 -0
- data/app/services/prompt_engine/variable_detector.rb +97 -0
- data/app/views/layouts/prompt_engine/admin.html.erb +65 -0
- data/app/views/layouts/prompt_engine/application.html.erb +17 -0
- data/app/views/prompt_engine/dashboard/index.html.erb +230 -0
- data/app/views/prompt_engine/eval_runs/show.html.erb +204 -0
- data/app/views/prompt_engine/eval_sets/compare.html.erb +229 -0
- data/app/views/prompt_engine/eval_sets/edit.html.erb +111 -0
- data/app/views/prompt_engine/eval_sets/index.html.erb +63 -0
- data/app/views/prompt_engine/eval_sets/metrics.html.erb +371 -0
- data/app/views/prompt_engine/eval_sets/new.html.erb +113 -0
- data/app/views/prompt_engine/eval_sets/show.html.erb +235 -0
- data/app/views/prompt_engine/evaluations/index.html.erb +194 -0
- data/app/views/prompt_engine/playground/result.html.erb +58 -0
- data/app/views/prompt_engine/playground/show.html.erb +129 -0
- data/app/views/prompt_engine/playground_run_results/index.html.erb +99 -0
- data/app/views/prompt_engine/playground_run_results/show.html.erb +123 -0
- data/app/views/prompt_engine/prompts/_form.html.erb +224 -0
- data/app/views/prompt_engine/prompts/edit.html.erb +9 -0
- data/app/views/prompt_engine/prompts/index.html.erb +80 -0
- data/app/views/prompt_engine/prompts/new.html.erb +9 -0
- data/app/views/prompt_engine/prompts/show.html.erb +297 -0
- data/app/views/prompt_engine/settings/edit.html.erb +93 -0
- data/app/views/prompt_engine/shared/_form_errors.html.erb +16 -0
- data/app/views/prompt_engine/test_cases/edit.html.erb +72 -0
- data/app/views/prompt_engine/test_cases/import.html.erb +92 -0
- data/app/views/prompt_engine/test_cases/import_preview.html.erb +103 -0
- data/app/views/prompt_engine/test_cases/new.html.erb +79 -0
- data/app/views/prompt_engine/versions/_version_card.html.erb +56 -0
- data/app/views/prompt_engine/versions/compare.html.erb +82 -0
- data/app/views/prompt_engine/versions/index.html.erb +96 -0
- data/app/views/prompt_engine/versions/show.html.erb +98 -0
- data/config/routes.rb +61 -0
- data/db/migrate/20250124000001_create_eval_tables.rb +43 -0
- data/db/migrate/20250124000002_add_open_ai_fields_to_evals.rb +11 -0
- data/db/migrate/20250125000001_add_grader_fields_to_eval_sets.rb +8 -0
- data/db/migrate/20250723161909_create_prompts.rb +17 -0
- data/db/migrate/20250723184757_create_prompt_engine_versions.rb +24 -0
- data/db/migrate/20250723203838_create_prompt_engine_parameters.rb +20 -0
- data/db/migrate/20250724160623_create_prompt_engine_playground_run_results.rb +30 -0
- data/db/migrate/20250724165118_create_prompt_engine_settings.rb +14 -0
- data/lib/prompt_engine/engine.rb +25 -0
- data/lib/prompt_engine/version.rb +3 -0
- data/lib/prompt_engine.rb +33 -0
- data/lib/tasks/active_prompt_tasks.rake +32 -0
- data/lib/tasks/eval_demo.rake +149 -0
- metadata +293 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>Prompt Engine Admin</title>
|
5
|
+
<%= csrf_meta_tags %>
|
6
|
+
<%= csp_meta_tag %>
|
7
|
+
|
8
|
+
<%= yield :head %>
|
9
|
+
|
10
|
+
<%= stylesheet_link_tag "prompt_engine/application", media: "all" %>
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
|
14
|
+
<%= yield %>
|
15
|
+
|
16
|
+
</body>
|
17
|
+
</html>
|
@@ -0,0 +1,230 @@
|
|
1
|
+
<div class="admin-header">
|
2
|
+
<div>
|
3
|
+
<h1>Dashboard</h1>
|
4
|
+
<p class="text-muted">Overview of your prompt management system</p>
|
5
|
+
</div>
|
6
|
+
</div>
|
7
|
+
|
8
|
+
<div class="dashboard">
|
9
|
+
<div class="dashboard__stats">
|
10
|
+
<div class="stat-card">
|
11
|
+
<div class="stat-card__icon stat-card__icon--primary">
|
12
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
13
|
+
<path d="M12 20h9"/>
|
14
|
+
<path d="M16.5 3.5a2.121 2.121 0 0 1 3 3L7 19l-4 1 1-4L16.5 3.5z"/>
|
15
|
+
</svg>
|
16
|
+
</div>
|
17
|
+
<div class="stat-card__content">
|
18
|
+
<h3 class="stat-card__title">Total Prompts</h3>
|
19
|
+
<p class="stat-card__value"><%= @total_prompts %></p>
|
20
|
+
</div>
|
21
|
+
</div>
|
22
|
+
|
23
|
+
<div class="stat-card">
|
24
|
+
<div class="stat-card__icon stat-card__icon--success">
|
25
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
26
|
+
<circle cx="12" cy="12" r="10"/>
|
27
|
+
<path d="m9 12 2 2 4-4"/>
|
28
|
+
</svg>
|
29
|
+
</div>
|
30
|
+
<div class="stat-card__content">
|
31
|
+
<h3 class="stat-card__title">Active Prompts</h3>
|
32
|
+
<p class="stat-card__value"><%= @prompt_engines %></p>
|
33
|
+
</div>
|
34
|
+
</div>
|
35
|
+
|
36
|
+
<div class="stat-card">
|
37
|
+
<div class="stat-card__icon stat-card__icon--info">
|
38
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
39
|
+
<polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/>
|
40
|
+
</svg>
|
41
|
+
</div>
|
42
|
+
<div class="stat-card__content">
|
43
|
+
<h3 class="stat-card__title">Test Runs</h3>
|
44
|
+
<p class="stat-card__value"><%= @total_test_runs %></p>
|
45
|
+
</div>
|
46
|
+
</div>
|
47
|
+
|
48
|
+
<div class="stat-card">
|
49
|
+
<div class="stat-card__icon stat-card__icon--warning">
|
50
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
51
|
+
<path d="M22 2 11 13"/>
|
52
|
+
<path d="m22 2-7 20-4-9-9-4Z"/>
|
53
|
+
</svg>
|
54
|
+
</div>
|
55
|
+
<div class="stat-card__content">
|
56
|
+
<h3 class="stat-card__title">Tokens Used</h3>
|
57
|
+
<p class="stat-card__value"><%= number_with_delimiter(@total_tokens_used) %></p>
|
58
|
+
</div>
|
59
|
+
</div>
|
60
|
+
|
61
|
+
<div class="stat-card">
|
62
|
+
<div class="stat-card__icon stat-card__icon--secondary">
|
63
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
64
|
+
<path d="M9 11l3 3L22 4"/>
|
65
|
+
<path d="M21 12v7a2 2 0 01-2 2H5a2 2 0 01-2-2V5a2 2 0 012-2h11"/>
|
66
|
+
</svg>
|
67
|
+
</div>
|
68
|
+
<div class="stat-card__content">
|
69
|
+
<h3 class="stat-card__title">Evaluation Sets</h3>
|
70
|
+
<p class="stat-card__value"><%= @total_eval_sets %></p>
|
71
|
+
</div>
|
72
|
+
</div>
|
73
|
+
|
74
|
+
<div class="stat-card">
|
75
|
+
<div class="stat-card__icon stat-card__icon--danger">
|
76
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
77
|
+
<path d="M12 8v4l3 3m6-3a9 9 0 11-18 0 9 9 0 0118 0z"/>
|
78
|
+
</svg>
|
79
|
+
</div>
|
80
|
+
<div class="stat-card__content">
|
81
|
+
<h3 class="stat-card__title">Evaluation Runs</h3>
|
82
|
+
<p class="stat-card__value"><%= @total_eval_runs %></p>
|
83
|
+
</div>
|
84
|
+
</div>
|
85
|
+
</div>
|
86
|
+
|
87
|
+
<div class="dashboard__content">
|
88
|
+
<div class="dashboard__section">
|
89
|
+
<div class="card">
|
90
|
+
<div class="card__header">
|
91
|
+
<h2 class="card__title">Recent Prompts</h2>
|
92
|
+
<%= link_to "View all", prompts_path, class: "btn btn--sm btn--ghost" %>
|
93
|
+
</div>
|
94
|
+
<div class="card__body">
|
95
|
+
<% if @recent_prompts.any? %>
|
96
|
+
<div class="table-container table-container--compact">
|
97
|
+
<table class="table">
|
98
|
+
<thead>
|
99
|
+
<tr class="table__row">
|
100
|
+
<th class="table__cell table__cell--header">Name</th>
|
101
|
+
<th class="table__cell table__cell--header">Status</th>
|
102
|
+
<th class="table__cell table__cell--header">Parameters</th>
|
103
|
+
<th class="table__cell table__cell--header">Updated</th>
|
104
|
+
<th class="table__cell table__cell--header table__actions-header"></th>
|
105
|
+
</tr>
|
106
|
+
</thead>
|
107
|
+
<tbody>
|
108
|
+
<% @recent_prompts.each do |prompt| %>
|
109
|
+
<tr class="table__row">
|
110
|
+
<td class="table__cell">
|
111
|
+
<%= link_to prompt.name, prompt_path(prompt), class: "table__action" %>
|
112
|
+
</td>
|
113
|
+
<td class="table__cell">
|
114
|
+
<span class="table__badge table__badge--<%= prompt.status %>">
|
115
|
+
<%= prompt.status.humanize %>
|
116
|
+
</span>
|
117
|
+
</td>
|
118
|
+
<td class="table__cell">
|
119
|
+
<span class="table__secondary"><%= prompt.parameters.count %> params</span>
|
120
|
+
</td>
|
121
|
+
<td class="table__cell">
|
122
|
+
<span class="table__secondary"><%= time_ago_in_words(prompt.updated_at) %> ago</span>
|
123
|
+
</td>
|
124
|
+
<td class="table__cell table__cell--actions">
|
125
|
+
<%= link_to "Edit", edit_prompt_path(prompt), class: "btn btn--sm btn--ghost" %>
|
126
|
+
</td>
|
127
|
+
</tr>
|
128
|
+
<% end %>
|
129
|
+
</tbody>
|
130
|
+
</table>
|
131
|
+
</div>
|
132
|
+
<% else %>
|
133
|
+
<div class="empty-state">
|
134
|
+
<p class="text-muted">No prompts created yet</p>
|
135
|
+
<%= link_to "Create your first prompt", new_prompt_path, class: "btn btn--primary" %>
|
136
|
+
</div>
|
137
|
+
<% end %>
|
138
|
+
</div>
|
139
|
+
</div>
|
140
|
+
</div>
|
141
|
+
|
142
|
+
<div class="dashboard__section">
|
143
|
+
<div class="card">
|
144
|
+
<div class="card__header">
|
145
|
+
<h2 class="card__title">Recent Test Runs</h2>
|
146
|
+
<%= link_to "View all", playground_run_results_path, class: "btn btn--sm btn--ghost" %>
|
147
|
+
</div>
|
148
|
+
<div class="card__body">
|
149
|
+
<% if @recent_test_runs.any? %>
|
150
|
+
<div class="test-runs-list">
|
151
|
+
<% @recent_test_runs.each do |run| %>
|
152
|
+
<div class="test-run-item">
|
153
|
+
<div class="test-run-item__main">
|
154
|
+
<div class="test-run-item__header">
|
155
|
+
<h4 class="test-run-item__title">
|
156
|
+
<%= link_to run.prompt_version.prompt.name, playground_run_result_path(run), class: "link" %>
|
157
|
+
</h4>
|
158
|
+
<span class="test-run-item__date text-muted">
|
159
|
+
<%= time_ago_in_words(run.created_at) %> ago
|
160
|
+
</span>
|
161
|
+
</div>
|
162
|
+
<div class="test-run-item__content">
|
163
|
+
<p class="test-run-item__model text-muted">
|
164
|
+
<%= run.model %> •
|
165
|
+
<%= number_with_delimiter(run.token_count || 0) %> tokens •
|
166
|
+
<%= number_with_precision(run.execution_time * 1000, precision: 0) %>ms
|
167
|
+
</p>
|
168
|
+
</div>
|
169
|
+
</div>
|
170
|
+
</div>
|
171
|
+
<% end %>
|
172
|
+
</div>
|
173
|
+
<% else %>
|
174
|
+
<div class="empty-state">
|
175
|
+
<p class="text-muted">No test runs yet</p>
|
176
|
+
<%= link_to "Test a prompt", prompts_path, class: "btn btn--primary" %>
|
177
|
+
</div>
|
178
|
+
<% end %>
|
179
|
+
</div>
|
180
|
+
</div>
|
181
|
+
</div>
|
182
|
+
|
183
|
+
<div class="dashboard__section">
|
184
|
+
<div class="card">
|
185
|
+
<div class="card__header">
|
186
|
+
<h2 class="card__title">Recent Evaluation Runs</h2>
|
187
|
+
<%= link_to "View all", evaluations_path, class: "btn btn--sm btn--ghost" %>
|
188
|
+
</div>
|
189
|
+
<div class="card__body">
|
190
|
+
<% if @recent_eval_runs.any? %>
|
191
|
+
<div class="test-runs-list">
|
192
|
+
<% @recent_eval_runs.each do |run| %>
|
193
|
+
<div class="test-run-item">
|
194
|
+
<div class="test-run-item__main">
|
195
|
+
<div class="test-run-item__header">
|
196
|
+
<h4 class="test-run-item__title">
|
197
|
+
<%= link_to run.eval_set.name, prompt_eval_run_path(run.eval_set.prompt, run), class: "link" %>
|
198
|
+
</h4>
|
199
|
+
<span class="test-run-item__date text-muted">
|
200
|
+
<%= time_ago_in_words(run.created_at) %> ago
|
201
|
+
</span>
|
202
|
+
</div>
|
203
|
+
<div class="test-run-item__content">
|
204
|
+
<p class="test-run-item__model text-muted">
|
205
|
+
<%= run.eval_set.prompt.name %> •
|
206
|
+
<%= pluralize(run.total_count, 'test') %> •
|
207
|
+
<% if run.total_count > 0 %>
|
208
|
+
<span class="<%= (run.passed_count.to_f / run.total_count * 100) >= 80 ? 'text-success' : 'text-danger' %>">
|
209
|
+
<%= number_to_percentage((run.passed_count.to_f / run.total_count * 100), precision: 1) %> passed
|
210
|
+
</span>
|
211
|
+
<% else %>
|
212
|
+
<span class="text-muted">No tests</span>
|
213
|
+
<% end %>
|
214
|
+
</p>
|
215
|
+
</div>
|
216
|
+
</div>
|
217
|
+
</div>
|
218
|
+
<% end %>
|
219
|
+
</div>
|
220
|
+
<% else %>
|
221
|
+
<div class="empty-state">
|
222
|
+
<p class="text-muted">No evaluation runs yet</p>
|
223
|
+
<%= link_to "View evaluations", evaluations_path, class: "btn btn--primary" %>
|
224
|
+
</div>
|
225
|
+
<% end %>
|
226
|
+
</div>
|
227
|
+
</div>
|
228
|
+
</div>
|
229
|
+
</div>
|
230
|
+
</div>
|
@@ -0,0 +1,204 @@
|
|
1
|
+
<div class="admin-header">
|
2
|
+
<div>
|
3
|
+
<h1>Evaluation Run Results</h1>
|
4
|
+
<p class="text-muted">
|
5
|
+
<%= @eval_run.eval_set.name %> •
|
6
|
+
<%= @eval_run.created_at.strftime("%b %d, %Y at %I:%M %p") %>
|
7
|
+
</p>
|
8
|
+
</div>
|
9
|
+
<div class="btn-group">
|
10
|
+
<%= link_to "Back to Eval Set", prompt_eval_set_path(@prompt, @eval_run.eval_set),
|
11
|
+
class: "btn btn--secondary btn--medium" %>
|
12
|
+
<% if @eval_run.report_url.present? %>
|
13
|
+
<%= link_to "View OpenAI Report", @eval_run.report_url,
|
14
|
+
target: "_blank", rel: "noopener", class: "btn btn--secondary btn--medium" %>
|
15
|
+
<% end %>
|
16
|
+
</div>
|
17
|
+
</div>
|
18
|
+
|
19
|
+
<div class="card mb-lg">
|
20
|
+
<div class="card__header">
|
21
|
+
<h3 class="card__title">Evaluation Summary</h3>
|
22
|
+
</div>
|
23
|
+
<div class="card__body">
|
24
|
+
<div class="detail-grid">
|
25
|
+
<div class="detail-item">
|
26
|
+
<label class="detail-label">Status</label>
|
27
|
+
<div class="detail-value">
|
28
|
+
<span class="table__badge table__badge--<%= @eval_run.status %>">
|
29
|
+
<%= @eval_run.status.humanize %>
|
30
|
+
</span>
|
31
|
+
</div>
|
32
|
+
</div>
|
33
|
+
|
34
|
+
<div class="detail-item">
|
35
|
+
<label class="detail-label">Prompt Version</label>
|
36
|
+
<div class="detail-value">
|
37
|
+
<%= link_to "v#{@eval_run.prompt_version.version_number}",
|
38
|
+
prompt_version_path(@prompt, @eval_run.prompt_version),
|
39
|
+
class: "table__link" %>
|
40
|
+
</div>
|
41
|
+
</div>
|
42
|
+
|
43
|
+
<div class="detail-item">
|
44
|
+
<label class="detail-label">Total Tests</label>
|
45
|
+
<div class="detail-value"><%= @eval_run.total_count %></div>
|
46
|
+
</div>
|
47
|
+
|
48
|
+
<div class="detail-item">
|
49
|
+
<label class="detail-label">Passed</label>
|
50
|
+
<div class="detail-value text-success"><%= @eval_run.passed_count %></div>
|
51
|
+
</div>
|
52
|
+
|
53
|
+
<div class="detail-item">
|
54
|
+
<label class="detail-label">Failed</label>
|
55
|
+
<div class="detail-value text-danger"><%= @eval_run.failed_count %></div>
|
56
|
+
</div>
|
57
|
+
|
58
|
+
<div class="detail-item">
|
59
|
+
<label class="detail-label">Success Rate</label>
|
60
|
+
<div class="detail-value">
|
61
|
+
<% if @eval_run.total_count > 0 %>
|
62
|
+
<%= number_to_percentage((@eval_run.passed_count.to_f / @eval_run.total_count * 100),
|
63
|
+
precision: 1) %>
|
64
|
+
<% else %>
|
65
|
+
<span class="text-muted">—</span>
|
66
|
+
<% end %>
|
67
|
+
</div>
|
68
|
+
</div>
|
69
|
+
|
70
|
+
<% if @eval_run.started_at.present? %>
|
71
|
+
<div class="detail-item">
|
72
|
+
<label class="detail-label">Started At</label>
|
73
|
+
<div class="detail-value"><%= @eval_run.started_at.strftime("%b %d, %Y %I:%M %p") %></div>
|
74
|
+
</div>
|
75
|
+
<% end %>
|
76
|
+
|
77
|
+
<% if @eval_run.completed_at.present? %>
|
78
|
+
<div class="detail-item">
|
79
|
+
<label class="detail-label">Completed At</label>
|
80
|
+
<div class="detail-value"><%= @eval_run.completed_at.strftime("%b %d, %Y %I:%M %p") %></div>
|
81
|
+
</div>
|
82
|
+
|
83
|
+
<div class="detail-item">
|
84
|
+
<label class="detail-label">Duration</label>
|
85
|
+
<div class="detail-value">
|
86
|
+
<%= distance_of_time_in_words(@eval_run.started_at, @eval_run.completed_at) %>
|
87
|
+
</div>
|
88
|
+
</div>
|
89
|
+
<% end %>
|
90
|
+
</div>
|
91
|
+
</div>
|
92
|
+
</div>
|
93
|
+
|
94
|
+
<% if @eval_run.status == 'pending' %>
|
95
|
+
<div class="card mb-lg">
|
96
|
+
<div class="card__body">
|
97
|
+
<div class="text-center py-lg">
|
98
|
+
<div class="mb-md">
|
99
|
+
<div class="spinner spinner--large"></div>
|
100
|
+
</div>
|
101
|
+
<h3 class="mb-md">Preparing Evaluation</h3>
|
102
|
+
<p class="text-muted">Setting up evaluation environment...</p>
|
103
|
+
<p class="text-muted">This page will refresh automatically.</p>
|
104
|
+
</div>
|
105
|
+
</div>
|
106
|
+
</div>
|
107
|
+
|
108
|
+
<script>
|
109
|
+
setTimeout(function() {
|
110
|
+
location.reload();
|
111
|
+
}, 3000);
|
112
|
+
</script>
|
113
|
+
<% elsif @eval_run.status == 'running' %>
|
114
|
+
<div class="card mb-lg">
|
115
|
+
<div class="card__body">
|
116
|
+
<div class="text-center py-lg">
|
117
|
+
<div class="mb-md">
|
118
|
+
<div class="spinner spinner--large"></div>
|
119
|
+
</div>
|
120
|
+
<h3 class="mb-md">Evaluation in Progress</h3>
|
121
|
+
<p class="text-muted">Running <%= @eval_run.eval_set.test_cases.count %> test cases on OpenAI's infrastructure...</p>
|
122
|
+
<p class="text-muted">This may take a few minutes depending on the number of test cases.</p>
|
123
|
+
<div class="mt-lg">
|
124
|
+
<div class="progress progress--striped progress--animated" style="max-width: 400px; margin: 0 auto;">
|
125
|
+
<div class="progress__bar" style="width: 50%"></div>
|
126
|
+
</div>
|
127
|
+
</div>
|
128
|
+
<p class="text-muted mt-md">This page will refresh automatically every 5 seconds.</p>
|
129
|
+
</div>
|
130
|
+
</div>
|
131
|
+
</div>
|
132
|
+
|
133
|
+
<script>
|
134
|
+
setTimeout(function() {
|
135
|
+
location.reload();
|
136
|
+
}, 5000);
|
137
|
+
</script>
|
138
|
+
<% elsif @eval_run.status == 'failed' %>
|
139
|
+
<div class="card mb-lg">
|
140
|
+
<div class="card__body">
|
141
|
+
<div class="alert alert--danger">
|
142
|
+
<h3 class="mb-md">Evaluation Failed</h3>
|
143
|
+
<p><%= @eval_run.error_message || "An unknown error occurred during evaluation." %></p>
|
144
|
+
|
145
|
+
<% if @eval_run.error_message&.include?("API key") %>
|
146
|
+
<div class="mt-md">
|
147
|
+
<p><strong>How to fix:</strong></p>
|
148
|
+
<ul>
|
149
|
+
<li>Check your OpenAI API key configuration in <%= link_to "Settings", settings_path %></li>
|
150
|
+
<li>Ensure your API key has access to the Evals API</li>
|
151
|
+
<li>Verify your API key is not expired or revoked</li>
|
152
|
+
</ul>
|
153
|
+
</div>
|
154
|
+
<% elsif @eval_run.error_message&.include?("rate limit") %>
|
155
|
+
<div class="mt-md">
|
156
|
+
<p><strong>How to fix:</strong></p>
|
157
|
+
<ul>
|
158
|
+
<li>Wait a few minutes before trying again</li>
|
159
|
+
<li>Consider reducing the number of test cases</li>
|
160
|
+
<li>Check your OpenAI API usage limits</li>
|
161
|
+
</ul>
|
162
|
+
</div>
|
163
|
+
<% end %>
|
164
|
+
|
165
|
+
<div class="mt-lg">
|
166
|
+
<%= link_to "Back to Evaluation Set", prompt_eval_set_path(@prompt, @eval_run.eval_set),
|
167
|
+
class: "btn btn--secondary btn--medium" %>
|
168
|
+
<%= link_to "View Settings", settings_path, class: "btn btn--secondary btn--medium" %>
|
169
|
+
</div>
|
170
|
+
</div>
|
171
|
+
</div>
|
172
|
+
</div>
|
173
|
+
<% elsif @eval_run.status == 'completed' %>
|
174
|
+
<div class="card">
|
175
|
+
<div class="card__header">
|
176
|
+
<h3 class="card__title">Results Overview</h3>
|
177
|
+
</div>
|
178
|
+
<div class="card__body">
|
179
|
+
<% if @eval_run.passed_count == @eval_run.total_count %>
|
180
|
+
<div class="alert alert--success mb-md">
|
181
|
+
<strong>All tests passed!</strong> Every test case produced the expected output.
|
182
|
+
</div>
|
183
|
+
<% elsif @eval_run.failed_count > 0 %>
|
184
|
+
<div class="alert alert--warning mb-md">
|
185
|
+
<strong><%= pluralize(@eval_run.failed_count, 'test') %> failed.</strong>
|
186
|
+
Some test cases did not produce the expected output.
|
187
|
+
</div>
|
188
|
+
<% end %>
|
189
|
+
|
190
|
+
<p class="text-muted">
|
191
|
+
Individual test results are available in the
|
192
|
+
<% if @eval_run.report_url.present? %>
|
193
|
+
<%= link_to "OpenAI evaluation report", @eval_run.report_url,
|
194
|
+
target: "_blank", rel: "noopener", class: "table__link" %>.
|
195
|
+
<% else %>
|
196
|
+
OpenAI evaluation report.
|
197
|
+
<% end %>
|
198
|
+
The report includes detailed information about each test case, including the actual outputs
|
199
|
+
and exact comparison results.
|
200
|
+
</p>
|
201
|
+
</div>
|
202
|
+
</div>
|
203
|
+
<% end %>
|
204
|
+
|