edsl 0.1.51__py3-none-any.whl → 0.1.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +45 -34
- edsl/__version__.py +1 -1
- edsl/conversation/Conversation.py +2 -1
- edsl/coop/coop.py +2 -0
- edsl/interviews/answering_function.py +20 -21
- edsl/interviews/exception_tracking.py +4 -3
- edsl/interviews/interview_task_manager.py +5 -2
- edsl/interviews/request_token_estimator.py +104 -2
- edsl/invigilators/invigilators.py +37 -4
- edsl/jobs/html_table_job_logger.py +494 -257
- edsl/jobs/jobs_status_enums.py +1 -0
- edsl/jobs/remote_inference.py +46 -12
- edsl/language_models/language_model.py +148 -146
- edsl/results/results.py +31 -2
- edsl/scenarios/file_store.py +73 -23
- edsl/tasks/task_history.py +45 -8
- edsl/templates/error_reporting/base.html +37 -4
- edsl/templates/error_reporting/exceptions_table.html +105 -33
- edsl/templates/error_reporting/interview_details.html +130 -126
- edsl/templates/error_reporting/overview.html +21 -25
- edsl/templates/error_reporting/report.css +215 -46
- edsl/templates/error_reporting/report.js +122 -20
- {edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/METADATA +1 -1
- {edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/RECORD +27 -27
- {edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/LICENSE +0 -0
- {edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/WHEEL +0 -0
- {edsl-0.1.51.dist-info → edsl-0.1.53.dist-info}/entry_points.txt +0 -0
@@ -1,130 +1,134 @@
|
|
1
|
-
<
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
color: white;
|
8
|
-
border: none;
|
9
|
-
padding: 10px 20px;
|
10
|
-
text-align: center;
|
11
|
-
text-decoration: none;
|
12
|
-
display: inline-block;
|
13
|
-
font-size: 16px;
|
14
|
-
margin: 4px 2px;
|
15
|
-
cursor: pointer;
|
16
|
-
border-radius: 8px;
|
17
|
-
white-space: nowrap;
|
18
|
-
}
|
19
|
-
.toggle-btn span.collapse {
|
20
|
-
display: none;
|
21
|
-
}
|
22
|
-
.exception-content {
|
23
|
-
max-width: 100%; /* Adjust this value based on your layout */
|
24
|
-
overflow-x: auto; /* Enables horizontal scrolling */
|
25
|
-
}
|
26
|
-
</style>
|
1
|
+
<div class="summary-section">
|
2
|
+
<div class="section-header">
|
3
|
+
<h2>Exception Details</h2>
|
4
|
+
</div>
|
5
|
+
<div class="p-4">
|
6
|
+
<div class="question">Question name: {{ question }}</div>
|
27
7
|
|
28
|
-
|
8
|
+
{% set seen_exceptions = [] %}
|
29
9
|
|
30
|
-
{% for exception_message in exceptions %}
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
<tr>
|
74
|
-
<td>System Prompt</td>
|
75
|
-
<td><pre>{{ exception_message.rendered_prompts['system_prompt'] }}</pre></td>
|
76
|
-
</tr>
|
77
|
-
<tr>
|
78
|
-
<td>Inference service</td>
|
79
|
-
<td>{{ interview.model._inference_service_ }}</td>
|
80
|
-
</tr>
|
81
|
-
<tr>
|
82
|
-
<td>Model name</td>
|
83
|
-
<td>{{ interview.model.model }}</td>
|
84
|
-
</tr>
|
85
|
-
<tr>
|
86
|
-
<td>Model parameters</td>
|
87
|
-
<td>{{ interview.model.__repr__() }}</td>
|
88
|
-
</tr>
|
89
|
-
<tr>
|
90
|
-
<td>Raw model response</td>
|
91
|
-
<td><pre>{{ exception_message.raw_model_response }}</pre>
|
92
|
-
</td>
|
93
|
-
</tr>
|
94
|
-
<tr>
|
95
|
-
<td>Generated token string (at {{ exception_message.key_sequence }}) in raw response</td>
|
96
|
-
<td><pre>{{ exception_message.generated_token_string }}</pre>
|
97
|
-
</td>
|
98
|
-
</tr>
|
99
|
-
<tr>
|
100
|
-
<td>Code likely to reproduce the error</td>
|
101
|
-
<td>
|
102
|
-
<textarea id="codeToCopy" rows="10" cols="90">{{ exception_message.code_to_reproduce }}</textarea>
|
103
|
-
<button onclick="copyCode()">Copy</button>
|
104
|
-
</td>
|
105
|
-
</tr>
|
10
|
+
{% for exception_message in exceptions %}
|
11
|
+
{% set exception_key = exception_message.name ~ '|' ~
|
12
|
+
exception_message.question_type|default('') ~ '|' ~
|
13
|
+
(exception_message.traceback|default(''))[0:100] %}
|
14
|
+
|
15
|
+
{% if exception_key not in seen_exceptions %}
|
16
|
+
{% set _ = seen_exceptions.append(exception_key) %}
|
17
|
+
|
18
|
+
<div class="exception-detail">
|
19
|
+
<div class="exception-header">
|
20
|
+
<span class="exception-exception">
|
21
|
+
{{ exception_message.exception.__class__.__name__ }}
|
22
|
+
</span>
|
23
|
+
<span class="chevron"></span>
|
24
|
+
</div>
|
25
|
+
<div class="exception-content">
|
26
|
+
<!-- Error Summary Section -->
|
27
|
+
<div>
|
28
|
+
<div class="section-header error-header">
|
29
|
+
<h3>Error Summary</h3>
|
30
|
+
</div>
|
31
|
+
|
32
|
+
<table class="details-table">
|
33
|
+
<tbody>
|
34
|
+
<tr>
|
35
|
+
<th>Exception</th>
|
36
|
+
<td>{{ exception_message.name }}</td>
|
37
|
+
</tr>
|
38
|
+
<tr>
|
39
|
+
<th>Model</th>
|
40
|
+
<td>{{ interview.model.model }}</td>
|
41
|
+
</tr>
|
42
|
+
<tr>
|
43
|
+
<th>Question</th>
|
44
|
+
<td>{{ question }} ({{ exception_message.question_type|default('Unknown type') }})</td>
|
45
|
+
</tr>
|
46
|
+
<tr>
|
47
|
+
<th>Time</th>
|
48
|
+
<td>{{ exception_message.time }}</td>
|
49
|
+
</tr>
|
50
|
+
</tbody>
|
51
|
+
</table>
|
52
|
+
</div>
|
106
53
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
</
|
54
|
+
<!-- Details Section -->
|
55
|
+
<div>
|
56
|
+
<div class="section-header">
|
57
|
+
<h3>Details</h3>
|
58
|
+
</div>
|
59
|
+
<table class="details-table">
|
60
|
+
<tbody>
|
61
|
+
<tr>
|
62
|
+
<th>Interview ID</th>
|
63
|
+
<td>{{ index }}</td>
|
64
|
+
</tr>
|
65
|
+
<tr>
|
66
|
+
<th>Question name</th>
|
67
|
+
<td>{{ question }}</td>
|
68
|
+
</tr>
|
69
|
+
<tr>
|
70
|
+
<th>Question type</th>
|
71
|
+
<td>{{ exception_message.question_type|default('Unknown') }}</td>
|
72
|
+
</tr>
|
73
|
+
<tr>
|
74
|
+
<th>User Prompt</th>
|
75
|
+
<td><pre>{{ exception_message.rendered_prompts['user_prompt'] }}</pre></td>
|
76
|
+
</tr>
|
77
|
+
<tr>
|
78
|
+
<th>Scenario</th>
|
79
|
+
<td>{{ interview.scenario.__repr__() }}</td>
|
80
|
+
</tr>
|
81
|
+
<tr>
|
82
|
+
<th>Agent</th>
|
83
|
+
<td>{{ interview.agent.__repr__() }}</td>
|
84
|
+
</tr>
|
85
|
+
<tr>
|
86
|
+
<th>System Prompt</th>
|
87
|
+
<td><pre>{{ exception_message.rendered_prompts['system_prompt'] }}</pre></td>
|
88
|
+
</tr>
|
89
|
+
<tr>
|
90
|
+
<th>Inference service</th>
|
91
|
+
<td>{{ interview.model._inference_service_ }}</td>
|
92
|
+
</tr>
|
93
|
+
<tr>
|
94
|
+
<th>Model name</th>
|
95
|
+
<td>{{ interview.model.model }}</td>
|
96
|
+
</tr>
|
97
|
+
<tr>
|
98
|
+
<th>Model parameters</th>
|
99
|
+
<td>{{ interview.model.__repr__() }}</td>
|
100
|
+
</tr>
|
101
|
+
<tr>
|
102
|
+
<th>Raw model response</th>
|
103
|
+
<td><pre>{{ exception_message.raw_model_response }}</pre></td>
|
104
|
+
</tr>
|
105
|
+
<tr>
|
106
|
+
<th>Generated token string</th>
|
107
|
+
<td><pre>{{ exception_message.generated_token_string }}</pre></td>
|
108
|
+
</tr>
|
109
|
+
</tbody>
|
110
|
+
</table>
|
111
|
+
</div>
|
129
112
|
|
130
|
-
|
113
|
+
<!-- Code to reproduce section -->
|
114
|
+
<div>
|
115
|
+
<div class="section-header code-header">
|
116
|
+
<h3>Code to reproduce error</h3>
|
117
|
+
<button class="copy-button">Copy</button>
|
118
|
+
</div>
|
119
|
+
<pre class="code-block"><code>{{ exception_message.code_to_reproduce }}</code></pre>
|
120
|
+
</div>
|
121
|
+
|
122
|
+
<!-- Traceback section -->
|
123
|
+
<div>
|
124
|
+
<div class="section-header">
|
125
|
+
<h3>Traceback</h3>
|
126
|
+
</div>
|
127
|
+
<pre class="traceback">{{ exception_message.traceback }}</pre>
|
128
|
+
</div>
|
129
|
+
</div>
|
130
|
+
</div>
|
131
|
+
{% endif %}
|
132
|
+
{% endfor %}
|
133
|
+
</div>
|
134
|
+
</div>
|
@@ -1,26 +1,22 @@
|
|
1
|
-
<
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
</
|
1
|
+
<div class="overview-section">
|
2
|
+
<h2>Exceptions Report</h2>
|
3
|
+
<p>
|
4
|
+
This report summarizes exceptions encountered in the job that was run.
|
5
|
+
</p>
|
6
6
|
|
7
|
-
<
|
8
|
-
<
|
9
|
-
|
10
|
-
</
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
</
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
</table>
|
24
|
-
<p>
|
25
|
-
An interview is the result of one survey, taken by one agent, with one model and one scenario (if any).
|
26
|
-
</p>
|
7
|
+
<table class="summary-table">
|
8
|
+
<tbody>
|
9
|
+
<tr>
|
10
|
+
<td><strong>Total interviews</strong></td>
|
11
|
+
<td>{{ interviews|length }}</td>
|
12
|
+
</tr>
|
13
|
+
<tr>
|
14
|
+
<td><strong>Interviews with exceptions</strong></td>
|
15
|
+
<td>{{ num_exceptions }}</td>
|
16
|
+
</tr>
|
17
|
+
</tbody>
|
18
|
+
</table>
|
19
|
+
<p>
|
20
|
+
An interview is the result of one survey, taken by one agent, with one model and one scenario (if any).
|
21
|
+
</p>
|
22
|
+
</div>
|
@@ -1,74 +1,243 @@
|
|
1
|
+
/* Base styles */
|
2
|
+
:root {
|
3
|
+
--primary-color: #3f51b5;
|
4
|
+
--secondary-color: #5c6bc0;
|
5
|
+
--success-color: #4caf50;
|
6
|
+
--error-color: #f44336;
|
7
|
+
--warning-color: #ff9800;
|
8
|
+
--text-color: #333;
|
9
|
+
--light-bg: #f5f7fa;
|
10
|
+
--border-color: #e0e0e0;
|
11
|
+
--header-bg: #f9f9f9;
|
12
|
+
--card-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
13
|
+
--font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
|
14
|
+
}
|
15
|
+
|
1
16
|
body {
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
17
|
+
font-family: var(--font-family);
|
18
|
+
line-height: 1.6;
|
19
|
+
color: var(--text-color);
|
20
|
+
background-color: var(--light-bg);
|
21
|
+
margin: 0;
|
22
|
+
padding: 20px;
|
7
23
|
}
|
8
24
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
background-color: #e3f2fd;
|
14
|
-
border-left: 5px solid #2196f3;
|
25
|
+
/* Container */
|
26
|
+
.container {
|
27
|
+
max-width: 1200px;
|
28
|
+
margin: 0 auto;
|
15
29
|
}
|
16
30
|
|
31
|
+
/* Headings */
|
32
|
+
h1, h2, h3, h4, h5, h6 {
|
33
|
+
color: var(--primary-color);
|
34
|
+
margin-top: 1em;
|
35
|
+
margin-bottom: 0.5em;
|
36
|
+
}
|
37
|
+
|
38
|
+
/* Question heading */
|
17
39
|
.question {
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
40
|
+
font-size: 18px;
|
41
|
+
font-weight: 600;
|
42
|
+
margin-bottom: 15px;
|
43
|
+
color: #333;
|
44
|
+
background-color: #fef8e2;
|
45
|
+
border: 1px solid #fbebbb;
|
46
|
+
border-radius: 4px;
|
47
|
+
padding: 12px;
|
23
48
|
}
|
24
49
|
|
50
|
+
/* Exception detail card */
|
25
51
|
.exception-detail {
|
26
|
-
|
27
|
-
|
28
|
-
|
52
|
+
background-color: white;
|
53
|
+
border-radius: 8px;
|
54
|
+
margin-bottom: 20px;
|
55
|
+
box-shadow: var(--card-shadow);
|
56
|
+
overflow: hidden;
|
57
|
+
border: 1px solid var(--border-color);
|
29
58
|
}
|
30
59
|
|
31
60
|
.exception-header {
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
61
|
+
display: flex;
|
62
|
+
justify-content: space-between;
|
63
|
+
align-items: center;
|
64
|
+
padding: 15px;
|
65
|
+
background-color: rgba(244, 67, 54, 0.08);
|
66
|
+
border-bottom: 1px solid rgba(244, 67, 54, 0.2);
|
67
|
+
cursor: pointer;
|
68
|
+
transition: background-color 0.2s;
|
69
|
+
}
|
70
|
+
|
71
|
+
.exception-header:hover {
|
72
|
+
background-color: rgba(244, 67, 54, 0.12);
|
73
|
+
}
|
74
|
+
|
75
|
+
.exception-exception {
|
76
|
+
font-weight: 600;
|
77
|
+
color: var(--error-color);
|
37
78
|
}
|
38
79
|
|
39
80
|
.exception-content {
|
40
|
-
|
41
|
-
|
81
|
+
padding: 0;
|
82
|
+
max-height: 0;
|
83
|
+
overflow: hidden;
|
84
|
+
transition: max-height 0.3s ease-out;
|
42
85
|
}
|
43
86
|
|
44
|
-
.exception-content.
|
45
|
-
|
87
|
+
.exception-content.expanded {
|
88
|
+
max-height: 5000px;
|
89
|
+
padding: 20px;
|
90
|
+
transition: max-height 0.5s ease-in;
|
46
91
|
}
|
47
92
|
|
48
|
-
|
49
|
-
|
50
|
-
|
93
|
+
/* Simple chevron indicator */
|
94
|
+
.chevron {
|
95
|
+
display: inline-block;
|
96
|
+
width: 20px;
|
97
|
+
height: 20px;
|
98
|
+
position: relative;
|
99
|
+
margin-left: 8px;
|
51
100
|
}
|
52
101
|
|
53
|
-
.
|
54
|
-
|
55
|
-
|
102
|
+
.chevron::before {
|
103
|
+
content: '';
|
104
|
+
position: absolute;
|
105
|
+
width: 8px;
|
106
|
+
height: 8px;
|
107
|
+
border-style: solid;
|
108
|
+
border-width: 0 2px 2px 0;
|
109
|
+
border-color: #666;
|
110
|
+
transform: rotate(45deg);
|
111
|
+
top: 4px;
|
112
|
+
transition: transform 0.2s;
|
113
|
+
}
|
114
|
+
|
115
|
+
.exception-header[aria-expanded="true"] .chevron::before {
|
116
|
+
transform: rotate(-135deg);
|
117
|
+
top: 8px;
|
118
|
+
}
|
119
|
+
|
120
|
+
/* Section headers */
|
121
|
+
.section-header {
|
122
|
+
border-bottom: 2px solid var(--primary-color);
|
123
|
+
padding-bottom: 8px;
|
124
|
+
margin: 24px 0 16px 0;
|
125
|
+
display: flex;
|
126
|
+
justify-content: space-between;
|
127
|
+
align-items: center;
|
128
|
+
}
|
129
|
+
|
130
|
+
.section-header h3 {
|
131
|
+
color: var(--primary-color);
|
132
|
+
font-weight: 500;
|
133
|
+
font-size: 18px;
|
134
|
+
margin: 0;
|
135
|
+
}
|
136
|
+
|
137
|
+
.error-header {
|
138
|
+
border-bottom-color: var(--error-color);
|
56
139
|
}
|
57
140
|
|
58
|
-
.
|
59
|
-
|
60
|
-
font-style: italic;
|
61
|
-
color: #555;
|
141
|
+
.error-header h3 {
|
142
|
+
color: var(--error-color);
|
62
143
|
}
|
63
144
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
145
|
+
/* Details tables */
|
146
|
+
.details-table {
|
147
|
+
width: 100%;
|
148
|
+
border-collapse: collapse;
|
149
|
+
margin-bottom: 16px;
|
150
|
+
table-layout: fixed;
|
70
151
|
}
|
71
152
|
|
72
|
-
.
|
73
|
-
|
153
|
+
.details-table th {
|
154
|
+
width: 200px;
|
155
|
+
text-align: left;
|
156
|
+
padding: 12px;
|
157
|
+
background-color: #f9f9f9;
|
158
|
+
font-weight: 500;
|
159
|
+
border-bottom: 1px solid var(--border-color);
|
160
|
+
vertical-align: top;
|
161
|
+
}
|
162
|
+
|
163
|
+
.details-table td {
|
164
|
+
padding: 12px;
|
165
|
+
border-bottom: 1px solid var(--border-color);
|
166
|
+
word-wrap: break-word;
|
167
|
+
overflow-wrap: break-word;
|
168
|
+
}
|
169
|
+
|
170
|
+
.details-table tr:last-child th,
|
171
|
+
.details-table tr:last-child td {
|
172
|
+
border-bottom: none;
|
173
|
+
}
|
174
|
+
|
175
|
+
.details-table tr:nth-child(even) {
|
176
|
+
background-color: rgba(0, 0, 0, 0.02);
|
177
|
+
}
|
178
|
+
|
179
|
+
/* Code blocks */
|
180
|
+
pre {
|
181
|
+
background-color: #f8f9fa;
|
182
|
+
border: 1px solid var(--border-color);
|
183
|
+
border-radius: 4px;
|
184
|
+
padding: 12px;
|
185
|
+
overflow: auto;
|
186
|
+
max-height: 300px;
|
187
|
+
font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
|
188
|
+
font-size: 13px;
|
189
|
+
white-space: pre-wrap;
|
190
|
+
margin: 0;
|
191
|
+
width: 100%;
|
192
|
+
}
|
193
|
+
|
194
|
+
.code-block {
|
195
|
+
font-family: monospace;
|
196
|
+
padding: 16px;
|
197
|
+
background-color: #f8f9fa;
|
198
|
+
border: 1px solid #ddd;
|
199
|
+
border-radius: 4px;
|
200
|
+
overflow-x: auto;
|
201
|
+
max-height: 300px;
|
202
|
+
margin-top: 0;
|
203
|
+
}
|
204
|
+
|
205
|
+
.traceback {
|
206
|
+
font-style: italic;
|
207
|
+
font-size: 13px;
|
208
|
+
}
|
209
|
+
|
210
|
+
/* Copy button */
|
211
|
+
.copy-button {
|
212
|
+
background-color: var(--primary-color);
|
213
|
+
color: white;
|
214
|
+
border: none;
|
215
|
+
padding: 6px 12px;
|
216
|
+
border-radius: 4px;
|
217
|
+
cursor: pointer;
|
218
|
+
font-size: 14px;
|
219
|
+
}
|
220
|
+
|
221
|
+
.copy-button:hover {
|
222
|
+
background-color: #303f9f;
|
223
|
+
}
|
224
|
+
|
225
|
+
/* Responsive design */
|
226
|
+
@media (max-width: 768px) {
|
227
|
+
.details-table {
|
228
|
+
display: block;
|
229
|
+
overflow-x: auto;
|
230
|
+
}
|
231
|
+
|
232
|
+
.details-table th {
|
233
|
+
width: 140px;
|
234
|
+
}
|
235
|
+
|
236
|
+
.exception-header {
|
237
|
+
padding: 12px;
|
238
|
+
}
|
239
|
+
|
240
|
+
.exception-content.expanded {
|
241
|
+
padding: 15px;
|
242
|
+
}
|
74
243
|
}
|