edsl 0.1.53__py3-none-any.whl → 0.1.55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +8 -1
- edsl/__init__original.py +134 -0
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +29 -0
- edsl/agents/agent_list.py +36 -1
- edsl/base/base_class.py +281 -151
- edsl/buckets/__init__.py +8 -3
- edsl/buckets/bucket_collection.py +9 -3
- edsl/buckets/model_buckets.py +4 -2
- edsl/buckets/token_bucket.py +2 -2
- edsl/buckets/token_bucket_client.py +5 -3
- edsl/caching/cache.py +131 -62
- edsl/caching/cache_entry.py +70 -58
- edsl/caching/sql_dict.py +17 -0
- edsl/cli.py +99 -0
- edsl/config/config_class.py +16 -0
- edsl/conversation/__init__.py +31 -0
- edsl/coop/coop.py +276 -242
- edsl/coop/coop_jobs_objects.py +59 -0
- edsl/coop/coop_objects.py +29 -0
- edsl/coop/coop_regular_objects.py +26 -0
- edsl/coop/utils.py +24 -19
- edsl/dataset/dataset.py +338 -101
- edsl/db_list/sqlite_list.py +349 -0
- edsl/inference_services/__init__.py +40 -5
- edsl/inference_services/exceptions.py +11 -0
- edsl/inference_services/services/anthropic_service.py +5 -2
- edsl/inference_services/services/aws_bedrock.py +6 -2
- edsl/inference_services/services/azure_ai.py +6 -2
- edsl/inference_services/services/google_service.py +3 -2
- edsl/inference_services/services/mistral_ai_service.py +6 -2
- edsl/inference_services/services/open_ai_service.py +6 -2
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +105 -7
- edsl/interviews/answering_function.py +167 -59
- edsl/interviews/interview.py +124 -72
- edsl/interviews/interview_task_manager.py +10 -0
- edsl/invigilators/invigilators.py +10 -1
- edsl/jobs/async_interview_runner.py +146 -104
- edsl/jobs/data_structures.py +6 -4
- edsl/jobs/decorators.py +61 -0
- edsl/jobs/fetch_invigilator.py +61 -18
- edsl/jobs/html_table_job_logger.py +14 -2
- edsl/jobs/jobs.py +180 -104
- edsl/jobs/jobs_component_constructor.py +2 -2
- edsl/jobs/jobs_interview_constructor.py +2 -0
- edsl/jobs/jobs_pricing_estimation.py +127 -46
- edsl/jobs/jobs_remote_inference_logger.py +4 -0
- edsl/jobs/jobs_runner_status.py +30 -25
- edsl/jobs/progress_bar_manager.py +79 -0
- edsl/jobs/remote_inference.py +35 -1
- edsl/key_management/key_lookup_builder.py +6 -1
- edsl/language_models/language_model.py +102 -12
- edsl/language_models/model.py +10 -3
- edsl/language_models/price_manager.py +45 -75
- edsl/language_models/registry.py +5 -0
- edsl/language_models/utilities.py +2 -1
- edsl/notebooks/notebook.py +77 -10
- edsl/questions/VALIDATION_README.md +134 -0
- edsl/questions/__init__.py +24 -1
- edsl/questions/exceptions.py +21 -0
- edsl/questions/question_check_box.py +171 -149
- edsl/questions/question_dict.py +243 -51
- edsl/questions/question_multiple_choice_with_other.py +624 -0
- edsl/questions/question_registry.py +2 -1
- edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
- edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
- edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
- edsl/questions/validation_analysis.py +185 -0
- edsl/questions/validation_cli.py +131 -0
- edsl/questions/validation_html_report.py +404 -0
- edsl/questions/validation_logger.py +136 -0
- edsl/results/result.py +63 -16
- edsl/results/results.py +702 -171
- edsl/scenarios/construct_download_link.py +16 -3
- edsl/scenarios/directory_scanner.py +226 -226
- edsl/scenarios/file_methods.py +5 -0
- edsl/scenarios/file_store.py +117 -6
- edsl/scenarios/handlers/__init__.py +5 -1
- edsl/scenarios/handlers/mp4_file_store.py +104 -0
- edsl/scenarios/handlers/webm_file_store.py +104 -0
- edsl/scenarios/scenario.py +120 -101
- edsl/scenarios/scenario_list.py +800 -727
- edsl/scenarios/scenario_list_gc_test.py +146 -0
- edsl/scenarios/scenario_list_memory_test.py +214 -0
- edsl/scenarios/scenario_list_source_refactor.md +35 -0
- edsl/scenarios/scenario_selector.py +5 -4
- edsl/scenarios/scenario_source.py +1990 -0
- edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
- edsl/surveys/survey.py +22 -0
- edsl/tasks/__init__.py +4 -2
- edsl/tasks/task_history.py +198 -36
- edsl/tests/scenarios/test_ScenarioSource.py +51 -0
- edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
- edsl/utilities/__init__.py +2 -1
- edsl/utilities/decorators.py +121 -0
- edsl/utilities/memory_debugger.py +1010 -0
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/METADATA +52 -76
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/RECORD +102 -78
- edsl/jobs/jobs_runner_asyncio.py +0 -281
- edsl/language_models/unused/fake_openai_service.py +0 -60
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/LICENSE +0 -0
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/WHEEL +0 -0
- {edsl-0.1.53.dist-info → edsl-0.1.55.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,52 @@
|
|
1
|
+
"""
|
2
|
+
Tests for all registered source types in the ScenarioSource system.
|
3
|
+
|
4
|
+
This module contains tests that verify the functionality of all registered
|
5
|
+
source types in the system. It uses the Source registry to automatically
|
6
|
+
test all available source types.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import pytest
|
10
|
+
from edsl.scenarios.scenario_source import Source
|
11
|
+
from edsl.scenarios.scenario_list import ScenarioList
|
12
|
+
|
13
|
+
def test_all_source_types():
|
14
|
+
"""
|
15
|
+
Test that all registered source types can create an example instance
|
16
|
+
and convert it to a ScenarioList.
|
17
|
+
"""
|
18
|
+
results = Source.test_all_sources()
|
19
|
+
|
20
|
+
# Print detailed results for debugging
|
21
|
+
for source_type, success in results.items():
|
22
|
+
print(f"Source type {source_type}: {'✓' if success else '✗'}")
|
23
|
+
|
24
|
+
# Assert that all source types succeeded
|
25
|
+
failed_sources = [source_type for source_type, success in results.items() if not success]
|
26
|
+
assert not failed_sources, f"The following source types failed: {failed_sources}"
|
27
|
+
|
28
|
+
def test_source_registry_not_empty():
|
29
|
+
"""Test that the source registry is not empty."""
|
30
|
+
registered_types = Source.get_registered_types()
|
31
|
+
assert len(registered_types) > 0, "No source types are registered"
|
32
|
+
|
33
|
+
def test_each_source_type():
|
34
|
+
"""
|
35
|
+
Test each source type individually with more detailed assertions.
|
36
|
+
"""
|
37
|
+
for source_type in Source.get_registered_types():
|
38
|
+
source_class = Source.get_source_class(source_type)
|
39
|
+
|
40
|
+
# Test example creation
|
41
|
+
example = source_class.example()
|
42
|
+
assert example is not None, f"Example creation failed for {source_type}"
|
43
|
+
|
44
|
+
# Test to_scenario_list
|
45
|
+
scenario_list = example.to_scenario_list()
|
46
|
+
assert isinstance(scenario_list, ScenarioList), f"to_scenario_list did not return a ScenarioList for {source_type}"
|
47
|
+
assert len(scenario_list) > 0, f"Empty ScenarioList returned for {source_type}"
|
48
|
+
|
49
|
+
def test_source_type_uniqueness():
|
50
|
+
"""Test that all source types are unique."""
|
51
|
+
registered_types = Source.get_registered_types()
|
52
|
+
assert len(registered_types) == len(set(registered_types)), "Duplicate source types found in registry"
|
edsl/surveys/survey.py
CHANGED
@@ -1691,6 +1691,28 @@ class Survey(Base):
|
|
1691
1691
|
scenario, filename, return_link, css, cta, include_question_name
|
1692
1692
|
)
|
1693
1693
|
|
1694
|
+
def copy(self) -> "Survey":
|
1695
|
+
"""Create a deep copy of the survey using serialization.
|
1696
|
+
|
1697
|
+
This method creates a completely independent copy of the survey by serializing
|
1698
|
+
and then deserializing it. This ensures all components are properly copied
|
1699
|
+
and maintains consistency with the survey's serialization format.
|
1700
|
+
|
1701
|
+
Returns:
|
1702
|
+
Survey: A new Survey instance that is a deep copy of the original.
|
1703
|
+
|
1704
|
+
Examples:
|
1705
|
+
>>> s = Survey.example()
|
1706
|
+
>>> s2 = s.copy()
|
1707
|
+
>>> s == s2
|
1708
|
+
True
|
1709
|
+
>>> s is s2
|
1710
|
+
False
|
1711
|
+
>>> s.questions[0] is s2.questions[0]
|
1712
|
+
False
|
1713
|
+
"""
|
1714
|
+
return Survey.from_dict(self.to_dict())
|
1715
|
+
|
1694
1716
|
|
1695
1717
|
def main():
|
1696
1718
|
"""Run the example survey."""
|
edsl/tasks/__init__.py
CHANGED
@@ -40,10 +40,12 @@ __all__ = [
|
|
40
40
|
'TaskHistoryError'
|
41
41
|
]
|
42
42
|
|
43
|
+
# Import TaskStatus first to prevent circular imports
|
44
|
+
from .task_status_enum import TaskStatus, TaskStatusDescriptor
|
43
45
|
from .task_history import TaskHistory
|
44
|
-
from .question_task_creator import QuestionTaskCreator
|
45
46
|
from .task_creators import TaskCreators
|
46
|
-
|
47
|
+
# Import question_task_creator last since it depends on other modules
|
48
|
+
from .question_task_creator import QuestionTaskCreator
|
47
49
|
from .exceptions import (
|
48
50
|
TaskError,
|
49
51
|
TaskStatusError,
|
edsl/tasks/task_history.py
CHANGED
@@ -23,20 +23,20 @@ from ..base import RepresentationMixin
|
|
23
23
|
class TaskHistory(RepresentationMixin):
|
24
24
|
"""
|
25
25
|
Records and analyzes the execution history of tasks across multiple interviews.
|
26
|
-
|
26
|
+
|
27
27
|
The TaskHistory class serves as a central repository for tracking task execution
|
28
28
|
across multiple interviews. It provides methods for:
|
29
|
-
|
29
|
+
|
30
30
|
1. Error Analysis - Collecting, categorizing, and reporting exceptions
|
31
31
|
2. Execution Visualization - Generating plots of task status over time
|
32
32
|
3. Performance Metrics - Calculating timing statistics for tasks
|
33
33
|
4. HTML Reports - Creating detailed interactive reports of execution
|
34
|
-
|
34
|
+
|
35
35
|
This class is particularly useful for debugging complex interview workflows,
|
36
36
|
identifying performance bottlenecks, and understanding patterns in task execution.
|
37
37
|
It supports both interactive exploration in notebooks and standalone report
|
38
38
|
generation.
|
39
|
-
|
39
|
+
|
40
40
|
Key features:
|
41
41
|
- Tracks exceptions with optional traceback storage
|
42
42
|
- Provides visualizations of task status transitions
|
@@ -44,7 +44,7 @@ class TaskHistory(RepresentationMixin):
|
|
44
44
|
- Computes statistics across interviews (by model, question type, etc.)
|
45
45
|
- Exports to various formats (HTML, notebook, etc.)
|
46
46
|
"""
|
47
|
-
|
47
|
+
|
48
48
|
def __init__(
|
49
49
|
self,
|
50
50
|
interviews: List["Interview"] = None,
|
@@ -54,13 +54,13 @@ class TaskHistory(RepresentationMixin):
|
|
54
54
|
):
|
55
55
|
"""
|
56
56
|
Initialize a TaskHistory to track execution across multiple interviews.
|
57
|
-
|
57
|
+
|
58
58
|
Parameters:
|
59
59
|
interviews: List of Interview objects to track
|
60
60
|
include_traceback: Whether to include full exception tracebacks
|
61
61
|
max_interviews: Maximum number of interviews to display in reports
|
62
62
|
interviews_with_exceptions_only: If True, only track interviews with exceptions
|
63
|
-
|
63
|
+
|
64
64
|
Example:
|
65
65
|
>>> _ = TaskHistory.example() # Create a sample TaskHistory
|
66
66
|
"""
|
@@ -86,8 +86,64 @@ class TaskHistory(RepresentationMixin):
|
|
86
86
|
if self.interviews_with_exceptions_only and interview.exceptions == {}:
|
87
87
|
return
|
88
88
|
|
89
|
-
|
90
|
-
|
89
|
+
# Store only essential data from the interview to break strong reference
|
90
|
+
# Instead of a deep copy, we create a lightweight reference holder
|
91
|
+
class InterviewReference:
|
92
|
+
def __init__(self, interview: "Interview"):
|
93
|
+
# Store only the data we need for reporting
|
94
|
+
self.exceptions = interview.exceptions
|
95
|
+
self.task_status_logs = interview.task_status_logs
|
96
|
+
self.model = interview.model
|
97
|
+
self.survey = interview.survey
|
98
|
+
|
99
|
+
# Store metadata needed for serialization
|
100
|
+
self._interview_id = id(interview)
|
101
|
+
|
102
|
+
def to_dict(self, add_edsl_version=True):
|
103
|
+
"""Create a serializable representation of the interview reference"""
|
104
|
+
# Create a simplified dict that has the required fields but doesn't
|
105
|
+
# maintain a strong reference to the original interview
|
106
|
+
data = {
|
107
|
+
"id": self._interview_id,
|
108
|
+
"type": "InterviewReference",
|
109
|
+
"exceptions": self.exceptions.to_dict()
|
110
|
+
if hasattr(self.exceptions, "to_dict")
|
111
|
+
else {},
|
112
|
+
"task_status_logs": {
|
113
|
+
name: log.to_dict() if hasattr(log, "to_dict") else {}
|
114
|
+
for name, log in self.task_status_logs.items()
|
115
|
+
},
|
116
|
+
}
|
117
|
+
|
118
|
+
# Add model and survey info if they have to_dict methods
|
119
|
+
if hasattr(self.model, "to_dict"):
|
120
|
+
data["model"] = self.model.to_dict(
|
121
|
+
add_edsl_version=add_edsl_version
|
122
|
+
)
|
123
|
+
|
124
|
+
if hasattr(self.survey, "to_dict"):
|
125
|
+
data["survey"] = self.survey.to_dict(
|
126
|
+
add_edsl_version=add_edsl_version
|
127
|
+
)
|
128
|
+
|
129
|
+
if add_edsl_version:
|
130
|
+
from edsl import __version__
|
131
|
+
|
132
|
+
data["edsl_version"] = __version__
|
133
|
+
|
134
|
+
return data
|
135
|
+
|
136
|
+
def __getattr__(self, name):
|
137
|
+
# Handle any missing attributes by returning None
|
138
|
+
# This provides compatibility with code that might access
|
139
|
+
# other interview attributes we haven't explicitly stored
|
140
|
+
return None
|
141
|
+
|
142
|
+
# Create a reference object instead of keeping the full interview
|
143
|
+
interview_ref = InterviewReference(interview)
|
144
|
+
|
145
|
+
self.total_interviews.append(interview_ref)
|
146
|
+
self._interviews[len(self._interviews)] = interview_ref
|
91
147
|
|
92
148
|
@classmethod
|
93
149
|
def example(cls):
|
@@ -96,7 +152,6 @@ class TaskHistory(RepresentationMixin):
|
|
96
152
|
|
97
153
|
j = Jobs.example(throw_exception_probability=1, test_model=True)
|
98
154
|
|
99
|
-
|
100
155
|
results = j.run(
|
101
156
|
print_exceptions=False,
|
102
157
|
skip_retry=True,
|
@@ -138,13 +193,37 @@ class TaskHistory(RepresentationMixin):
|
|
138
193
|
|
139
194
|
def to_dict(self, add_edsl_version=True):
|
140
195
|
"""Return the TaskHistory as a dictionary."""
|
196
|
+
# Serialize each interview object
|
197
|
+
interview_dicts = []
|
198
|
+
for i in self.total_interviews:
|
199
|
+
# Use to_dict method if available
|
200
|
+
if hasattr(i, "to_dict"):
|
201
|
+
try:
|
202
|
+
interview_dicts.append(i.to_dict(add_edsl_version=add_edsl_version))
|
203
|
+
except Exception:
|
204
|
+
# Fallback if to_dict fails
|
205
|
+
interview_dicts.append(
|
206
|
+
{
|
207
|
+
"type": "InterviewReference",
|
208
|
+
"exceptions": getattr(i, "exceptions", {}),
|
209
|
+
"task_status_logs": getattr(i, "task_status_logs", {}),
|
210
|
+
}
|
211
|
+
)
|
212
|
+
else:
|
213
|
+
# Fallback if no to_dict method
|
214
|
+
interview_dicts.append(
|
215
|
+
{
|
216
|
+
"type": "InterviewReference",
|
217
|
+
"exceptions": getattr(i, "exceptions", {}),
|
218
|
+
"task_status_logs": getattr(i, "task_status_logs", {}),
|
219
|
+
}
|
220
|
+
)
|
221
|
+
|
141
222
|
d = {
|
142
|
-
"interviews":
|
143
|
-
i.to_dict(add_edsl_version=add_edsl_version)
|
144
|
-
for i in self.total_interviews
|
145
|
-
],
|
223
|
+
"interviews": interview_dicts,
|
146
224
|
"include_traceback": self.include_traceback,
|
147
225
|
}
|
226
|
+
|
148
227
|
if add_edsl_version:
|
149
228
|
from .. import __version__
|
150
229
|
|
@@ -158,10 +237,76 @@ class TaskHistory(RepresentationMixin):
|
|
158
237
|
if data is None:
|
159
238
|
return cls([], include_traceback=False)
|
160
239
|
|
161
|
-
|
240
|
+
# Create an instance without interviews
|
241
|
+
instance = cls([], include_traceback=data.get("include_traceback", False))
|
162
242
|
|
163
|
-
|
164
|
-
|
243
|
+
# Create a custom interview-like object for each serialized interview
|
244
|
+
for interview_data in data.get("interviews", []):
|
245
|
+
# Check if this is one of our InterviewReference objects
|
246
|
+
if (
|
247
|
+
isinstance(interview_data, dict)
|
248
|
+
and interview_data.get("type") == "InterviewReference"
|
249
|
+
):
|
250
|
+
# Create our InterviewReference directly
|
251
|
+
class DeserializedInterviewRef:
|
252
|
+
def __init__(self, data):
|
253
|
+
# Convert exceptions dictionary to InterviewExceptionCollection
|
254
|
+
from ..interviews.exception_tracking import (
|
255
|
+
InterviewExceptionCollection,
|
256
|
+
)
|
257
|
+
|
258
|
+
exceptions_data = data.get("exceptions", {})
|
259
|
+
self.exceptions = (
|
260
|
+
InterviewExceptionCollection.from_dict(exceptions_data)
|
261
|
+
if exceptions_data
|
262
|
+
else InterviewExceptionCollection()
|
263
|
+
)
|
264
|
+
self.task_status_logs = data.get("task_status_logs", {})
|
265
|
+
self.model = data.get("model", {})
|
266
|
+
self.survey = data.get("survey", {})
|
267
|
+
|
268
|
+
def to_dict(self, add_edsl_version=True):
|
269
|
+
return {
|
270
|
+
"type": "InterviewReference",
|
271
|
+
"exceptions": self.exceptions.to_dict()
|
272
|
+
if hasattr(self.exceptions, "to_dict")
|
273
|
+
else self.exceptions,
|
274
|
+
"task_status_logs": self.task_status_logs,
|
275
|
+
"model": self.model,
|
276
|
+
"survey": self.survey,
|
277
|
+
}
|
278
|
+
|
279
|
+
# Create the reference and add it directly
|
280
|
+
ref = DeserializedInterviewRef(interview_data)
|
281
|
+
instance.total_interviews.append(ref)
|
282
|
+
instance._interviews[len(instance._interviews)] = ref
|
283
|
+
else:
|
284
|
+
# For backward compatibility, try to use Interview class
|
285
|
+
try:
|
286
|
+
from ..interviews import Interview
|
287
|
+
|
288
|
+
interview = Interview.from_dict(interview_data)
|
289
|
+
# This will make a reference copy through add_interview
|
290
|
+
instance.add_interview(interview)
|
291
|
+
except Exception:
|
292
|
+
# If we can't deserialize properly, add a minimal placeholder
|
293
|
+
class MinimalInterviewRef:
|
294
|
+
def __init__(self):
|
295
|
+
from ..interviews.exception_tracking import (
|
296
|
+
InterviewExceptionCollection,
|
297
|
+
)
|
298
|
+
|
299
|
+
self.exceptions = InterviewExceptionCollection()
|
300
|
+
self.task_status_logs = {}
|
301
|
+
|
302
|
+
def to_dict(self, add_edsl_version=True):
|
303
|
+
return {"type": "MinimalInterviewRef"}
|
304
|
+
|
305
|
+
ref = MinimalInterviewRef()
|
306
|
+
instance.total_interviews.append(ref)
|
307
|
+
instance._interviews[len(instance._interviews)] = ref
|
308
|
+
|
309
|
+
return instance
|
165
310
|
|
166
311
|
@property
|
167
312
|
def has_exceptions(self) -> bool:
|
@@ -187,8 +332,12 @@ class TaskHistory(RepresentationMixin):
|
|
187
332
|
"""Return a list of all the updates."""
|
188
333
|
updates = []
|
189
334
|
for interview in self.total_interviews:
|
190
|
-
|
191
|
-
|
335
|
+
# Check if task_status_logs exists and is a dictionary
|
336
|
+
if hasattr(interview, "task_status_logs") and isinstance(
|
337
|
+
interview.task_status_logs, dict
|
338
|
+
):
|
339
|
+
for question_name, logs in interview.task_status_logs.items():
|
340
|
+
updates.append(logs)
|
192
341
|
return updates
|
193
342
|
|
194
343
|
def print(self):
|
@@ -218,6 +367,14 @@ class TaskHistory(RepresentationMixin):
|
|
218
367
|
def plotting_data(self, num_periods=100):
|
219
368
|
updates = self.get_updates()
|
220
369
|
|
370
|
+
# Handle the case when updates is empty
|
371
|
+
if not updates:
|
372
|
+
# Return a list of dictionaries with all task statuses set to 0
|
373
|
+
return [
|
374
|
+
{task_status: 0 for task_status in TaskStatus}
|
375
|
+
for _ in range(num_periods)
|
376
|
+
]
|
377
|
+
|
221
378
|
min_t = min([update.min_time for update in updates])
|
222
379
|
max_t = max([update.max_time for update in updates])
|
223
380
|
delta_t = (max_t - min_t) / (num_periods * 1.0)
|
@@ -325,36 +482,38 @@ class TaskHistory(RepresentationMixin):
|
|
325
482
|
"""Return a dictionary of unique exceptions organized by type, service, model, and question name."""
|
326
483
|
exceptions_table = {}
|
327
484
|
seen_exceptions = set()
|
328
|
-
|
485
|
+
|
329
486
|
for interview in self.total_interviews:
|
330
487
|
for question_name, exceptions in interview.exceptions.items():
|
331
488
|
for exception in exceptions:
|
332
489
|
# Create a unique identifier for this exception based on its content
|
333
490
|
exception_key = (
|
334
491
|
exception.exception.__class__.__name__, # Exception type
|
335
|
-
interview.model._inference_service_,
|
336
|
-
interview.model.model,
|
337
|
-
question_name,
|
338
|
-
exception.name,
|
339
|
-
str(exception.traceback)[:100]
|
492
|
+
interview.model._inference_service_, # Service
|
493
|
+
interview.model.model, # Model
|
494
|
+
question_name, # Question name
|
495
|
+
exception.name, # Exception name
|
496
|
+
str(exception.traceback)[:100]
|
497
|
+
if exception.traceback
|
498
|
+
else "", # Truncated traceback
|
340
499
|
)
|
341
|
-
|
500
|
+
|
342
501
|
# Only count if we haven't seen this exact exception before
|
343
502
|
if exception_key not in seen_exceptions:
|
344
503
|
seen_exceptions.add(exception_key)
|
345
|
-
|
504
|
+
|
346
505
|
# Add to the summary table
|
347
506
|
table_key = (
|
348
507
|
exception.exception.__class__.__name__, # Exception type
|
349
|
-
interview.model._inference_service_,
|
350
|
-
interview.model.model,
|
351
|
-
question_name,
|
508
|
+
interview.model._inference_service_, # Service
|
509
|
+
interview.model.model, # Model
|
510
|
+
question_name, # Question name
|
352
511
|
)
|
353
|
-
|
512
|
+
|
354
513
|
if table_key not in exceptions_table:
|
355
514
|
exceptions_table[table_key] = 0
|
356
515
|
exceptions_table[table_key] += 1
|
357
|
-
|
516
|
+
|
358
517
|
return exceptions_table
|
359
518
|
|
360
519
|
@property
|
@@ -478,22 +637,22 @@ class TaskHistory(RepresentationMixin):
|
|
478
637
|
) -> Optional[str]:
|
479
638
|
"""
|
480
639
|
Generate and display an interactive HTML report of task execution.
|
481
|
-
|
640
|
+
|
482
641
|
This method creates a comprehensive HTML report showing task execution details,
|
483
642
|
exceptions, timing information, and statistics across all tracked interviews.
|
484
643
|
In notebook environments, it displays an embedded preview with a link to open
|
485
644
|
the full report in a new tab.
|
486
|
-
|
645
|
+
|
487
646
|
Parameters:
|
488
647
|
filename: Path to save the HTML report (if None, a temporary file is created)
|
489
648
|
return_link: If True, return the path to the saved HTML file
|
490
649
|
css: Custom CSS to apply to the report (if None, uses default styling)
|
491
650
|
cta: HTML for the "Call to Action" link text
|
492
651
|
open_in_browser: If True, automatically open the report in the default browser
|
493
|
-
|
652
|
+
|
494
653
|
Returns:
|
495
654
|
If return_link is True, returns the path to the saved HTML file; otherwise None
|
496
|
-
|
655
|
+
|
497
656
|
Notes:
|
498
657
|
- In Jupyter notebooks, displays an embedded preview with a link
|
499
658
|
- In terminal environments, saves the file and prints its location
|
@@ -574,3 +733,6 @@ if __name__ == "__main__":
|
|
574
733
|
import doctest
|
575
734
|
|
576
735
|
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
736
|
+
|
737
|
+
# Run the reference test
|
738
|
+
test_no_strong_reference()
|
@@ -0,0 +1,51 @@
|
|
1
|
+
"""
|
2
|
+
Tests for all registered source types in the ScenarioSource system.
|
3
|
+
|
4
|
+
This module contains tests that verify the functionality of all registered
|
5
|
+
source types in the system. It uses the Source registry to automatically
|
6
|
+
test all available source types.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import pytest
|
10
|
+
from edsl.scenarios.scenario_source import Source, ScenarioList
|
11
|
+
|
12
|
+
def test_all_source_types():
|
13
|
+
"""
|
14
|
+
Test that all registered source types can create an example instance
|
15
|
+
and convert it to a ScenarioList.
|
16
|
+
"""
|
17
|
+
results = Source.test_all_sources()
|
18
|
+
|
19
|
+
# Print detailed results for debugging
|
20
|
+
for source_type, success in results.items():
|
21
|
+
print(f"Source type {source_type}: {'✓' if success else '✗'}")
|
22
|
+
|
23
|
+
# Assert that all source types succeeded
|
24
|
+
failed_sources = [source_type for source_type, success in results.items() if not success]
|
25
|
+
assert not failed_sources, f"The following source types failed: {failed_sources}"
|
26
|
+
|
27
|
+
def test_source_registry_not_empty():
|
28
|
+
"""Test that the source registry is not empty."""
|
29
|
+
registered_types = Source.get_registered_types()
|
30
|
+
assert len(registered_types) > 0, "No source types are registered"
|
31
|
+
|
32
|
+
def test_each_source_type():
|
33
|
+
"""
|
34
|
+
Test each source type individually with more detailed assertions.
|
35
|
+
"""
|
36
|
+
for source_type in Source.get_registered_types():
|
37
|
+
source_class = Source.get_source_class(source_type)
|
38
|
+
|
39
|
+
# Test example creation
|
40
|
+
example = source_class.example()
|
41
|
+
assert example is not None, f"Example creation failed for {source_type}"
|
42
|
+
|
43
|
+
# Test to_scenario_list
|
44
|
+
scenario_list = example.to_scenario_list()
|
45
|
+
assert isinstance(scenario_list, ScenarioList), f"to_scenario_list did not return a ScenarioList for {source_type}"
|
46
|
+
assert len(scenario_list) > 0, f"Empty ScenarioList returned for {source_type}"
|
47
|
+
|
48
|
+
def test_source_type_uniqueness():
|
49
|
+
"""Test that all source types are unique."""
|
50
|
+
registered_types = Source.get_registered_types()
|
51
|
+
assert len(registered_types) == len(set(registered_types)), "Duplicate source types found in registry"
|
@@ -0,0 +1,51 @@
|
|
1
|
+
"""
|
2
|
+
Tests for all registered source types in the ScenarioSource system.
|
3
|
+
|
4
|
+
This module contains tests that verify the functionality of all registered
|
5
|
+
source types in the system. It uses the Source registry to automatically
|
6
|
+
test all available source types.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import pytest
|
10
|
+
from edsl.scenarios.scenario_source import Source, ScenarioList
|
11
|
+
|
12
|
+
def test_all_source_types():
|
13
|
+
"""
|
14
|
+
Test that all registered source types can create an example instance
|
15
|
+
and convert it to a ScenarioList.
|
16
|
+
"""
|
17
|
+
results = Source.test_all_sources()
|
18
|
+
|
19
|
+
# Print detailed results for debugging
|
20
|
+
for source_type, success in results.items():
|
21
|
+
print(f"Source type {source_type}: {'✓' if success else '✗'}")
|
22
|
+
|
23
|
+
# Assert that all source types succeeded
|
24
|
+
failed_sources = [source_type for source_type, success in results.items() if not success]
|
25
|
+
assert not failed_sources, f"The following source types failed: {failed_sources}"
|
26
|
+
|
27
|
+
def test_source_registry_not_empty():
|
28
|
+
"""Test that the source registry is not empty."""
|
29
|
+
registered_types = Source.get_registered_types()
|
30
|
+
assert len(registered_types) > 0, "No source types are registered"
|
31
|
+
|
32
|
+
def test_each_source_type():
|
33
|
+
"""
|
34
|
+
Test each source type individually with more detailed assertions.
|
35
|
+
"""
|
36
|
+
for source_type in Source.get_registered_types():
|
37
|
+
source_class = Source.get_source_class(source_type)
|
38
|
+
|
39
|
+
# Test example creation
|
40
|
+
example = source_class.example()
|
41
|
+
assert example is not None, f"Example creation failed for {source_type}"
|
42
|
+
|
43
|
+
# Test to_scenario_list
|
44
|
+
scenario_list = example.to_scenario_list()
|
45
|
+
assert isinstance(scenario_list, ScenarioList), f"to_scenario_list did not return a ScenarioList for {source_type}"
|
46
|
+
assert len(scenario_list) > 0, f"Empty ScenarioList returned for {source_type}"
|
47
|
+
|
48
|
+
def test_source_type_uniqueness():
|
49
|
+
"""Test that all source types are unique."""
|
50
|
+
registered_types = Source.get_registered_types()
|
51
|
+
assert len(registered_types) == len(set(registered_types)), "Duplicate source types found in registry"
|
edsl/utilities/__init__.py
CHANGED
@@ -18,7 +18,7 @@ from .utilities import (
|
|
18
18
|
)
|
19
19
|
|
20
20
|
# Decorator utilities
|
21
|
-
from .decorators import sync_wrapper, jupyter_nb_handler
|
21
|
+
from .decorators import sync_wrapper, jupyter_nb_handler, memory_profile
|
22
22
|
|
23
23
|
# Standalone utilities
|
24
24
|
from .is_notebook import is_notebook
|
@@ -43,6 +43,7 @@ __all__ = [
|
|
43
43
|
"is_gzipped",
|
44
44
|
"sync_wrapper",
|
45
45
|
"jupyter_nb_handler",
|
46
|
+
"memory_profile",
|
46
47
|
"is_notebook",
|
47
48
|
"is_valid_variable_name",
|
48
49
|
"sanitize_string"
|