edsl 0.1.51__py3-none-any.whl → 0.1.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
edsl/results/results.py CHANGED
@@ -44,10 +44,10 @@ from typing import Optional, Callable, Any, Union, List, TYPE_CHECKING
44
44
  from bisect import bisect_left
45
45
 
46
46
  from ..base import Base
47
+ from ..caching import Cache, CacheEntry
47
48
 
48
49
  if TYPE_CHECKING:
49
50
  from ..surveys import Survey
50
- from ..caching import Cache
51
51
  from ..agents import AgentList
52
52
  from ..scenarios import ScenarioList
53
53
  from ..results import Result
@@ -707,12 +707,41 @@ class Results(UserList, ResultsOperationsMixin, Base):
707
707
  "b_not_a": [other_results[i] for i in indices_other],
708
708
  }
709
709
 
710
+ def initialize_cache_from_results(self):
711
+ cache = Cache(data={})
712
+
713
+ for result in self.data:
714
+ for key in result.data["prompt"]:
715
+ if key.endswith("_system_prompt"):
716
+ question_name = key.removesuffix("_system_prompt")
717
+ system_prompt = result.data["prompt"][key].text
718
+ user_key = f"{question_name}_user_prompt"
719
+ if user_key in result.data["prompt"]:
720
+ user_prompt = result.data["prompt"][user_key].text
721
+ else:
722
+ user_prompt = ""
723
+
724
+ # Get corresponding model response
725
+ response_key = f"{question_name}_raw_model_response"
726
+ output = result.data["raw_model_response"].get(response_key, "")
727
+
728
+ entry = CacheEntry(
729
+ model=result.model.model,
730
+ parameters=result.model.parameters,
731
+ system_prompt=system_prompt,
732
+ user_prompt=user_prompt,
733
+ output=json.dumps(output),
734
+ iteration=0,
735
+ )
736
+ cache.data[entry.key] = entry
737
+
738
+ self.cache = cache
739
+
710
740
  @property
711
741
  def has_unfixed_exceptions(self) -> bool:
712
742
  return self.task_history.has_unfixed_exceptions
713
743
 
714
744
  def __hash__(self) -> int:
715
-
716
745
  return dict_hash(
717
746
  self.to_dict(sort=True, add_edsl_version=False, include_cache_info=False)
718
747
  )
@@ -302,22 +302,59 @@ class TaskHistory(RepresentationMixin):
302
302
  js = env.joinpath("report.js").read_text()
303
303
  return js
304
304
 
305
+ # @property
306
+ # def exceptions_table(self) -> dict:
307
+ # """Return a dictionary of exceptions organized by type, service, model, and question name."""
308
+ # exceptions_table = {}
309
+ # for interview in self.total_interviews:
310
+ # for question_name, exceptions in interview.exceptions.items():
311
+ # for exception in exceptions:
312
+ # key = (
313
+ # exception.exception.__class__.__name__, # Exception type
314
+ # interview.model._inference_service_, # Service
315
+ # interview.model.model, # Model
316
+ # question_name, # Question name
317
+ # )
318
+ # if key not in exceptions_table:
319
+ # exceptions_table[key] = 0
320
+ # exceptions_table[key] += 1
321
+ # return exceptions_table
322
+
305
323
  @property
306
324
  def exceptions_table(self) -> dict:
307
- """Return a dictionary of exceptions organized by type, service, model, and question name."""
325
+ """Return a dictionary of unique exceptions organized by type, service, model, and question name."""
308
326
  exceptions_table = {}
327
+ seen_exceptions = set()
328
+
309
329
  for interview in self.total_interviews:
310
330
  for question_name, exceptions in interview.exceptions.items():
311
331
  for exception in exceptions:
312
- key = (
332
+ # Create a unique identifier for this exception based on its content
333
+ exception_key = (
313
334
  exception.exception.__class__.__name__, # Exception type
314
- interview.model._inference_service_, # Service
315
- interview.model.model, # Model
316
- question_name, # Question name
335
+ interview.model._inference_service_, # Service
336
+ interview.model.model, # Model
337
+ question_name, # Question name
338
+ exception.name, # Exception name
339
+ str(exception.traceback)[:100] if exception.traceback else "", # Truncated traceback
317
340
  )
318
- if key not in exceptions_table:
319
- exceptions_table[key] = 0
320
- exceptions_table[key] += 1
341
+
342
+ # Only count if we haven't seen this exact exception before
343
+ if exception_key not in seen_exceptions:
344
+ seen_exceptions.add(exception_key)
345
+
346
+ # Add to the summary table
347
+ table_key = (
348
+ exception.exception.__class__.__name__, # Exception type
349
+ interview.model._inference_service_, # Service
350
+ interview.model.model, # Model
351
+ question_name, # Question name
352
+ )
353
+
354
+ if table_key not in exceptions_table:
355
+ exceptions_table[table_key] = 0
356
+ exceptions_table[table_key] += 1
357
+
321
358
  return exceptions_table
322
359
 
323
360
  @property
@@ -5,6 +5,39 @@
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
6
  <title>Exceptions Report</title>
7
7
  <style>
8
+ /* Global styles */
9
+ :root {
10
+ --primary-color: #3f51b5;
11
+ --secondary-color: #5c6bc0;
12
+ --success-color: #4caf50;
13
+ --error-color: #f44336;
14
+ --warning-color: #ff9800;
15
+ --text-color: #333;
16
+ --light-bg: #f5f7fa;
17
+ --border-color: #e0e0e0;
18
+ --header-bg: #f9f9f9;
19
+ --card-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
20
+ --font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
21
+ }
22
+
23
+ * {
24
+ box-sizing: border-box;
25
+ }
26
+
27
+ body {
28
+ font-family: var(--font-family);
29
+ background-color: var(--light-bg);
30
+ color: var(--text-color);
31
+ line-height: 1.6;
32
+ margin: 0;
33
+ padding: 20px;
34
+ }
35
+
36
+ .container {
37
+ max-width: 1200px;
38
+ margin: 0 auto;
39
+ }
40
+
8
41
  {{ css }}
9
42
  </style>
10
43
 
@@ -14,9 +47,9 @@
14
47
 
15
48
  </head>
16
49
  <body>
17
- {% include 'overview.html' %}
18
- {% include 'exceptions_table.html' %}
19
- {% include 'interviews.html' %}
20
- {% include 'performance_plot.html' %}
50
+ <div class="container">
51
+ {% include 'exceptions_table.html' %}
52
+ {% include 'interviews.html' %}
53
+ </div>
21
54
  </body>
22
55
  </html>
@@ -1,34 +1,106 @@
1
+ <div class="summary-section">
2
+ <div class="table-container">
3
+ <h2>Exceptions Report</h2>
4
+ <table class="exceptions-table">
5
+ <thead>
6
+ <tr>
7
+ <th>Exception Type</th>
8
+ <th>Service</th>
9
+ <th>Model</th>
10
+ <th>Question Name</th>
11
+ <th class="count-column">Count</th>
12
+ </tr>
13
+ </thead>
14
+ <tbody>
15
+ {% for (exception_type, service, model, question_name), count in exceptions_table.items() %}
16
+ <tr>
17
+ <td>{{ exception_type }}</td>
18
+ <td>{{ service }}</td>
19
+ <td>{{ model }}</td>
20
+ <td>{{ question_name }}</td>
21
+ <td class="count-cell">{{ count }}</td>
22
+ </tr>
23
+ {% endfor %}
24
+ </tbody>
25
+ </table>
26
+ </div>
27
+
28
+ <p class="note">
29
+ Note: Each unique exception is counted only once. You may encounter repeated exceptions where retries were attempted.
30
+ </p>
31
+ </div>
32
+
1
33
  <style>
2
- th, td {
3
- padding: 0 10px; /* This applies the padding uniformly to all td elements */
4
- }
5
- </style>
6
-
7
- <table border="1">
8
- <thead>
9
- <tr>
10
- <th style="text-align: left">Exception Type</th>
11
- <th style="text-align: left">Service</th>
12
- <th style="text-align: left">Model</th>
13
- <th style="text-align: left">Question Name</th>
14
- <th style="text-align: left">Total</th>
15
- </tr>
16
- </thead>
17
- <tbody>
18
- {% for (exception_type, service, model, question_name), count in exceptions_table.items() %}
19
- <tr>
20
- <td>{{ exception_type }}</td>
21
- <td>{{ service }}</td>
22
- <td>{{ model }}</td>
23
- <td>{{ question_name }}</td>
24
- <td>{{ count }}</td>
25
- </tr>
26
- {% endfor %}
27
- </tbody>
28
- </table>
29
- <p>
30
- Note: You may encounter repeated exceptions where retries were attempted.
31
- </p>
32
- <p>
33
- See details about each exception, including code for reproducing it (click to expand).
34
- </p>
34
+ /* Summary section styles */
35
+ .summary-section {
36
+ background-color: white;
37
+ border-radius: 8px;
38
+ margin-bottom: 24px;
39
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
40
+ overflow: hidden;
41
+ border: 1px solid #e0e0e0;
42
+ padding: 0 0 16px 0;
43
+ }
44
+
45
+ .section-header {
46
+ background-color: #f9f9f9;
47
+ padding: 12px 16px;
48
+ border-bottom: 1px solid #e0e0e0;
49
+ }
50
+
51
+ .section-header h2 {
52
+ margin: 0;
53
+ font-size: 18px;
54
+ font-weight: 500;
55
+ color: #3f51b5;
56
+ }
57
+
58
+ .table-container {
59
+ padding: 16px;
60
+ overflow-x: auto;
61
+ }
62
+
63
+ /* Table styles */
64
+ .exceptions-table {
65
+ width: 100%;
66
+ border-collapse: collapse;
67
+ margin-bottom: 16px;
68
+ }
69
+
70
+ .exceptions-table th {
71
+ background-color: #f5f5f5;
72
+ color: #333;
73
+ font-weight: 500;
74
+ text-align: left;
75
+ padding: 12px;
76
+ border-bottom: 2px solid #e0e0e0;
77
+ }
78
+
79
+ .exceptions-table td {
80
+ padding: 10px 12px;
81
+ border-bottom: 1px solid #e0e0e0;
82
+ color: #333;
83
+ }
84
+
85
+ .exceptions-table tr:hover {
86
+ background-color: #f9f9f9;
87
+ }
88
+
89
+ .count-column {
90
+ width: 80px;
91
+ text-align: center;
92
+ }
93
+
94
+ .count-cell {
95
+ text-align: center;
96
+ font-weight: 500;
97
+ }
98
+
99
+ /* Note styles */
100
+ .note {
101
+ font-size: 14px;
102
+ color: #666;
103
+ margin: 0 16px;
104
+ line-height: 1.5;
105
+ }
106
+ </style>
@@ -1,130 +1,134 @@
1
- <style>
2
- td {
3
- padding: 0 10px; /* This applies the padding uniformly to all td elements */
4
- }
5
- .toggle-btn {
6
- background-color: #4CAF50;
7
- color: white;
8
- border: none;
9
- padding: 10px 20px;
10
- text-align: center;
11
- text-decoration: none;
12
- display: inline-block;
13
- font-size: 16px;
14
- margin: 4px 2px;
15
- cursor: pointer;
16
- border-radius: 8px;
17
- white-space: nowrap;
18
- }
19
- .toggle-btn span.collapse {
20
- display: none;
21
- }
22
- .exception-content {
23
- max-width: 100%; /* Adjust this value based on your layout */
24
- overflow-x: auto; /* Enables horizontal scrolling */
25
- }
26
- </style>
1
+ <div class="summary-section">
2
+ <div class="section-header">
3
+ <h2>Exception Details</h2>
4
+ </div>
5
+ <div class="p-4">
6
+ <div class="question">Question name: {{ question }}</div>
27
7
 
28
- <div class="question">Question name: {{ question }}</div>
8
+ {% set seen_exceptions = [] %}
29
9
 
30
- {% for exception_message in exceptions %}
31
- <div class="exception-detail">
32
- <div class="exception-header">
33
- <span class="exception-exception">Exception: {{ exception_message.name }}</span>
34
- <button id="toggleBtn" class="toggle-btn" onclick="toggleButton(this)" aria-expanded="false">
35
- <span class="expand"> </span>
36
- </button>
37
- </div>
38
- <div class="exception-content">
39
- <table border="1">
40
- <tr>
41
- <td>Interview ID (index in results)</td>
42
- <td>{{ index }}</td>
43
- </tr>
44
- <tr>
45
- <td>Question name</td>
46
- <td>{{ question }}</td>
47
- </tr>
48
- <tr>
49
- <td>Question type</td>
50
- <td>{{ exception_message.question_type }}</td>
51
- </tr>
52
- <tr>
53
- <td>Human-readable question</td>
54
- <td>{{ interview.survey._get_question_by_name(question).html(
55
- scenario = interview.scenario,
56
- agent = interview.agent,
57
- answers = exception_message.answers
58
- )
59
- }}</td>
60
- </tr>
61
- <tr>
62
- <td>User Prompt</td>
63
- <td><pre>{{ exception_message.rendered_prompts['user_prompt'] }}</pre></td>
64
- </tr>
65
- <tr>
66
- <td>Scenario</td>
67
- <td>{{ interview.scenario.__repr__() }}</td>
68
- </tr>
69
- <tr>
70
- <td>Agent</td>
71
- <td>{{ interview.agent.__repr__() }}</td>
72
- </tr>
73
- <tr>
74
- <td>System Prompt</td>
75
- <td><pre>{{ exception_message.rendered_prompts['system_prompt'] }}</pre></td>
76
- </tr>
77
- <tr>
78
- <td>Inference service</td>
79
- <td>{{ interview.model._inference_service_ }}</td>
80
- </tr>
81
- <tr>
82
- <td>Model name</td>
83
- <td>{{ interview.model.model }}</td>
84
- </tr>
85
- <tr>
86
- <td>Model parameters</td>
87
- <td>{{ interview.model.__repr__() }}</td>
88
- </tr>
89
- <tr>
90
- <td>Raw model response</td>
91
- <td><pre>{{ exception_message.raw_model_response }}</pre>
92
- </td>
93
- </tr>
94
- <tr>
95
- <td>Generated token string (at {{ exception_message.key_sequence }}) in raw response</td>
96
- <td><pre>{{ exception_message.generated_token_string }}</pre>
97
- </td>
98
- </tr>
99
- <tr>
100
- <td>Code likely to reproduce the error</td>
101
- <td>
102
- <textarea id="codeToCopy" rows="10" cols="90">{{ exception_message.code_to_reproduce }}</textarea>
103
- <button onclick="copyCode()">Copy</button>
104
- </td>
105
- </tr>
10
+ {% for exception_message in exceptions %}
11
+ {% set exception_key = exception_message.name ~ '|' ~
12
+ exception_message.question_type|default('') ~ '|' ~
13
+ (exception_message.traceback|default(''))[0:100] %}
14
+
15
+ {% if exception_key not in seen_exceptions %}
16
+ {% set _ = seen_exceptions.append(exception_key) %}
17
+
18
+ <div class="exception-detail">
19
+ <div class="exception-header">
20
+ <span class="exception-exception">
21
+ {{ exception_message.exception.__class__.__name__ }}
22
+ </span>
23
+ <span class="chevron"></span>
24
+ </div>
25
+ <div class="exception-content">
26
+ <!-- Error Summary Section -->
27
+ <div>
28
+ <div class="section-header error-header">
29
+ <h3>Error Summary</h3>
30
+ </div>
31
+
32
+ <table class="details-table">
33
+ <tbody>
34
+ <tr>
35
+ <th>Exception</th>
36
+ <td>{{ exception_message.name }}</td>
37
+ </tr>
38
+ <tr>
39
+ <th>Model</th>
40
+ <td>{{ interview.model.model }}</td>
41
+ </tr>
42
+ <tr>
43
+ <th>Question</th>
44
+ <td>{{ question }} ({{ exception_message.question_type|default('Unknown type') }})</td>
45
+ </tr>
46
+ <tr>
47
+ <th>Time</th>
48
+ <td>{{ exception_message.time }}</td>
49
+ </tr>
50
+ </tbody>
51
+ </table>
52
+ </div>
106
53
 
107
- </table>
108
-
109
- {% if exception_message.exception.__class__.__name__ == 'QuestionAnswerValidationError' %}
110
- <h3>Answer validation details</h3>
111
- <table border="1">
112
- {% for field, (open_tag, close_tag, value) in exception_message.exception.to_html_dict().items() %}
113
- <tr>
114
- <td>{{ field }}</td>
115
- <td><{{ open_tag }}> {{ value | escape }} <{{ close_tag }}></td>
116
- </tr>
117
- {% endfor %}
118
- </table>
119
- {% endif %}
120
- <br><br>
121
- <div class="exception-time">Time: {{ exception_message.time }}</div>
122
- <div class="exception-traceback">Traceback:
123
- <text>
124
- <pre>{{ exception_message.traceback }}</pre>
125
- </text>
126
- </div>
127
- </div>
128
- </div>
54
+ <!-- Details Section -->
55
+ <div>
56
+ <div class="section-header">
57
+ <h3>Details</h3>
58
+ </div>
59
+ <table class="details-table">
60
+ <tbody>
61
+ <tr>
62
+ <th>Interview ID</th>
63
+ <td>{{ index }}</td>
64
+ </tr>
65
+ <tr>
66
+ <th>Question name</th>
67
+ <td>{{ question }}</td>
68
+ </tr>
69
+ <tr>
70
+ <th>Question type</th>
71
+ <td>{{ exception_message.question_type|default('Unknown') }}</td>
72
+ </tr>
73
+ <tr>
74
+ <th>User Prompt</th>
75
+ <td><pre>{{ exception_message.rendered_prompts['user_prompt'] }}</pre></td>
76
+ </tr>
77
+ <tr>
78
+ <th>Scenario</th>
79
+ <td>{{ interview.scenario.__repr__() }}</td>
80
+ </tr>
81
+ <tr>
82
+ <th>Agent</th>
83
+ <td>{{ interview.agent.__repr__() }}</td>
84
+ </tr>
85
+ <tr>
86
+ <th>System Prompt</th>
87
+ <td><pre>{{ exception_message.rendered_prompts['system_prompt'] }}</pre></td>
88
+ </tr>
89
+ <tr>
90
+ <th>Inference service</th>
91
+ <td>{{ interview.model._inference_service_ }}</td>
92
+ </tr>
93
+ <tr>
94
+ <th>Model name</th>
95
+ <td>{{ interview.model.model }}</td>
96
+ </tr>
97
+ <tr>
98
+ <th>Model parameters</th>
99
+ <td>{{ interview.model.__repr__() }}</td>
100
+ </tr>
101
+ <tr>
102
+ <th>Raw model response</th>
103
+ <td><pre>{{ exception_message.raw_model_response }}</pre></td>
104
+ </tr>
105
+ <tr>
106
+ <th>Generated token string</th>
107
+ <td><pre>{{ exception_message.generated_token_string }}</pre></td>
108
+ </tr>
109
+ </tbody>
110
+ </table>
111
+ </div>
129
112
 
130
- {% endfor %}
113
+ <!-- Code to reproduce section -->
114
+ <div>
115
+ <div class="section-header code-header">
116
+ <h3>Code to reproduce error</h3>
117
+ <button class="copy-button">Copy</button>
118
+ </div>
119
+ <pre class="code-block"><code>{{ exception_message.code_to_reproduce }}</code></pre>
120
+ </div>
121
+
122
+ <!-- Traceback section -->
123
+ <div>
124
+ <div class="section-header">
125
+ <h3>Traceback</h3>
126
+ </div>
127
+ <pre class="traceback">{{ exception_message.traceback }}</pre>
128
+ </div>
129
+ </div>
130
+ </div>
131
+ {% endif %}
132
+ {% endfor %}
133
+ </div>
134
+ </div>
@@ -1,26 +1,22 @@
1
- <style>
2
- td {
3
- padding: 0 10px; /* This applies the padding uniformly to all td elements */
4
- }
5
- </style>
1
+ <div class="overview-section">
2
+ <h2>Exceptions Report</h2>
3
+ <p>
4
+ This report summarizes exceptions encountered in the job that was run.
5
+ </p>
6
6
 
7
- <h3>Exceptions Report</h3>
8
- <p>
9
- This report summarizes exceptions encountered in the job that was run.
10
- </p>
11
-
12
- <table border="1">
13
- <tbody>
14
- <tr>
15
- <td>Total interviews</td>
16
- <td>{{ interviews|length }}</td>
17
- </tr>
18
- <tr>
19
- <td>Interviews with exceptions</td>
20
- <td>{{ num_exceptions }}</td>
21
- </tr>
22
- </tbody>
23
- </table>
24
- <p>
25
- An interview is the result of one survey, taken by one agent, with one model and one scenario (if any).
26
- </p>
7
+ <table class="summary-table">
8
+ <tbody>
9
+ <tr>
10
+ <td><strong>Total interviews</strong></td>
11
+ <td>{{ interviews|length }}</td>
12
+ </tr>
13
+ <tr>
14
+ <td><strong>Interviews with exceptions</strong></td>
15
+ <td>{{ num_exceptions }}</td>
16
+ </tr>
17
+ </tbody>
18
+ </table>
19
+ <p>
20
+ An interview is the result of one survey, taken by one agent, with one model and one scenario (if any).
21
+ </p>
22
+ </div>