edsl 0.1.54__py3-none-any.whl → 0.1.55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +8 -1
- edsl/__init__original.py +134 -0
- edsl/__version__.py +1 -1
- edsl/agents/agent.py +29 -0
- edsl/agents/agent_list.py +36 -1
- edsl/base/base_class.py +281 -151
- edsl/buckets/__init__.py +8 -3
- edsl/buckets/bucket_collection.py +9 -3
- edsl/buckets/model_buckets.py +4 -2
- edsl/buckets/token_bucket.py +2 -2
- edsl/buckets/token_bucket_client.py +5 -3
- edsl/caching/cache.py +131 -62
- edsl/caching/cache_entry.py +70 -58
- edsl/caching/sql_dict.py +17 -0
- edsl/cli.py +99 -0
- edsl/config/config_class.py +16 -0
- edsl/conversation/__init__.py +31 -0
- edsl/coop/coop.py +276 -242
- edsl/coop/coop_jobs_objects.py +59 -0
- edsl/coop/coop_objects.py +29 -0
- edsl/coop/coop_regular_objects.py +26 -0
- edsl/coop/utils.py +24 -19
- edsl/dataset/dataset.py +338 -101
- edsl/db_list/sqlite_list.py +349 -0
- edsl/inference_services/__init__.py +40 -5
- edsl/inference_services/exceptions.py +11 -0
- edsl/inference_services/services/anthropic_service.py +5 -2
- edsl/inference_services/services/aws_bedrock.py +6 -2
- edsl/inference_services/services/azure_ai.py +6 -2
- edsl/inference_services/services/google_service.py +3 -2
- edsl/inference_services/services/mistral_ai_service.py +6 -2
- edsl/inference_services/services/open_ai_service.py +6 -2
- edsl/inference_services/services/perplexity_service.py +6 -2
- edsl/inference_services/services/test_service.py +94 -5
- edsl/interviews/answering_function.py +167 -59
- edsl/interviews/interview.py +124 -72
- edsl/interviews/interview_task_manager.py +10 -0
- edsl/invigilators/invigilators.py +9 -0
- edsl/jobs/async_interview_runner.py +146 -104
- edsl/jobs/data_structures.py +6 -4
- edsl/jobs/decorators.py +61 -0
- edsl/jobs/fetch_invigilator.py +61 -18
- edsl/jobs/html_table_job_logger.py +14 -2
- edsl/jobs/jobs.py +180 -104
- edsl/jobs/jobs_component_constructor.py +2 -2
- edsl/jobs/jobs_interview_constructor.py +2 -0
- edsl/jobs/jobs_remote_inference_logger.py +4 -0
- edsl/jobs/jobs_runner_status.py +30 -25
- edsl/jobs/progress_bar_manager.py +79 -0
- edsl/jobs/remote_inference.py +35 -1
- edsl/key_management/key_lookup_builder.py +6 -1
- edsl/language_models/language_model.py +86 -6
- edsl/language_models/model.py +10 -3
- edsl/language_models/price_manager.py +45 -75
- edsl/language_models/registry.py +5 -0
- edsl/notebooks/notebook.py +77 -10
- edsl/questions/VALIDATION_README.md +134 -0
- edsl/questions/__init__.py +24 -1
- edsl/questions/exceptions.py +21 -0
- edsl/questions/question_dict.py +201 -16
- edsl/questions/question_multiple_choice_with_other.py +624 -0
- edsl/questions/question_registry.py +2 -1
- edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
- edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
- edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
- edsl/questions/validation_analysis.py +185 -0
- edsl/questions/validation_cli.py +131 -0
- edsl/questions/validation_html_report.py +404 -0
- edsl/questions/validation_logger.py +136 -0
- edsl/results/result.py +63 -16
- edsl/results/results.py +702 -171
- edsl/scenarios/construct_download_link.py +16 -3
- edsl/scenarios/directory_scanner.py +226 -226
- edsl/scenarios/file_methods.py +5 -0
- edsl/scenarios/file_store.py +117 -6
- edsl/scenarios/handlers/__init__.py +5 -1
- edsl/scenarios/handlers/mp4_file_store.py +104 -0
- edsl/scenarios/handlers/webm_file_store.py +104 -0
- edsl/scenarios/scenario.py +120 -101
- edsl/scenarios/scenario_list.py +800 -727
- edsl/scenarios/scenario_list_gc_test.py +146 -0
- edsl/scenarios/scenario_list_memory_test.py +214 -0
- edsl/scenarios/scenario_list_source_refactor.md +35 -0
- edsl/scenarios/scenario_selector.py +5 -4
- edsl/scenarios/scenario_source.py +1990 -0
- edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
- edsl/surveys/survey.py +22 -0
- edsl/tasks/__init__.py +4 -2
- edsl/tasks/task_history.py +198 -36
- edsl/tests/scenarios/test_ScenarioSource.py +51 -0
- edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
- edsl/utilities/__init__.py +2 -1
- edsl/utilities/decorators.py +121 -0
- edsl/utilities/memory_debugger.py +1010 -0
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/METADATA +51 -76
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/RECORD +99 -75
- edsl/jobs/jobs_runner_asyncio.py +0 -281
- edsl/language_models/unused/fake_openai_service.py +0 -60
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/LICENSE +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/WHEEL +0 -0
- {edsl-0.1.54.dist-info → edsl-0.1.55.dist-info}/entry_points.txt +0 -0
edsl/buckets/__init__.py
CHANGED
@@ -18,9 +18,6 @@ client (token_bucket_client) for distributed rate limiting scenarios where
|
|
18
18
|
multiple processes or machines need to share rate limits.
|
19
19
|
"""
|
20
20
|
|
21
|
-
from .bucket_collection import BucketCollection
|
22
|
-
from .model_buckets import ModelBuckets
|
23
|
-
from .token_bucket import TokenBucket
|
24
21
|
from .exceptions import (
|
25
22
|
BucketError,
|
26
23
|
TokenLimitError,
|
@@ -28,10 +25,18 @@ from .exceptions import (
|
|
28
25
|
BucketConfigurationError,
|
29
26
|
)
|
30
27
|
|
28
|
+
from .token_bucket import TokenBucket
|
29
|
+
from .model_buckets import ModelBuckets
|
30
|
+
from .token_bucket_client import TokenBucketClient # Add explicit import for TokenBucketClient
|
31
|
+
|
32
|
+
# Import BucketCollection last to avoid circular import issues
|
33
|
+
from .bucket_collection import BucketCollection
|
34
|
+
|
31
35
|
__all__ = [
|
32
36
|
"BucketCollection",
|
33
37
|
"ModelBuckets",
|
34
38
|
"TokenBucket",
|
39
|
+
"TokenBucketClient",
|
35
40
|
"BucketError",
|
36
41
|
"TokenLimitError",
|
37
42
|
"TokenBucketClientError",
|
@@ -10,11 +10,14 @@ share the same rate limit buckets.
|
|
10
10
|
from typing import TYPE_CHECKING, Dict, List, Tuple
|
11
11
|
from collections import UserDict
|
12
12
|
from threading import RLock
|
13
|
-
|
13
|
+
import functools
|
14
|
+
import inspect
|
15
|
+
|
16
|
+
# Import the synchronized_class decorator directly
|
17
|
+
from ..jobs.decorators import synchronized_class
|
14
18
|
|
15
19
|
from .token_bucket import TokenBucket
|
16
20
|
from .model_buckets import ModelBuckets
|
17
|
-
from ..jobs.decorators import synchronized_class
|
18
21
|
|
19
22
|
if TYPE_CHECKING:
|
20
23
|
from ..language_models import LanguageModel
|
@@ -262,7 +265,7 @@ class BucketCollection(UserDict):
|
|
262
265
|
)
|
263
266
|
self.services_to_buckets[service].tokens_bucket = new_tokens_bucket
|
264
267
|
|
265
|
-
def visualize(self) -> Dict["LanguageModel", Tuple[Figure, Figure]]:
|
268
|
+
def visualize(self) -> Dict["LanguageModel", Tuple["Figure", "Figure"]]:
|
266
269
|
"""
|
267
270
|
Visualize the token and request buckets for all models.
|
268
271
|
|
@@ -279,6 +282,9 @@ class BucketCollection(UserDict):
|
|
279
282
|
>>> plots = bucket_collection.visualize()
|
280
283
|
>>> # Now you can display or save these plots
|
281
284
|
"""
|
285
|
+
# Import Figure only for type checking when the function is called
|
286
|
+
from matplotlib.figure import Figure
|
287
|
+
|
282
288
|
plots = {}
|
283
289
|
for model in self:
|
284
290
|
plots[model] = self[model].visualize()
|
edsl/buckets/model_buckets.py
CHANGED
@@ -7,7 +7,6 @@ instance contains two TokenBucket instances - one for requests and one for token
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from typing import TYPE_CHECKING, Tuple
|
10
|
-
from matplotlib.figure import Figure
|
11
10
|
|
12
11
|
if TYPE_CHECKING:
|
13
12
|
from .token_bucket import TokenBucket
|
@@ -147,7 +146,7 @@ class ModelBuckets:
|
|
147
146
|
),
|
148
147
|
)
|
149
148
|
|
150
|
-
def visualize(self) -> Tuple[Figure, Figure]:
|
149
|
+
def visualize(self) -> Tuple["Figure", "Figure"]:
|
151
150
|
"""
|
152
151
|
Create visualizations of token usage over time for both buckets.
|
153
152
|
|
@@ -163,6 +162,9 @@ class ModelBuckets:
|
|
163
162
|
>>> ## request_plot, token_plot = buckets.visualize()
|
164
163
|
>>> ## Now you can display or save these plots
|
165
164
|
"""
|
165
|
+
# Import Figure only for type checking when the function is called
|
166
|
+
from matplotlib.figure import Figure
|
167
|
+
|
166
168
|
plot1 = self.requests_bucket.visualize()
|
167
169
|
plot2 = self.tokens_bucket.visualize()
|
168
170
|
return plot1, plot2
|
edsl/buckets/token_bucket.py
CHANGED
@@ -80,8 +80,8 @@ class TokenBucket:
|
|
80
80
|
'local-rate-limit'
|
81
81
|
"""
|
82
82
|
if remote_url is not None:
|
83
|
-
# Import
|
84
|
-
from
|
83
|
+
# Import the client directly from its module to avoid circular imports
|
84
|
+
from .token_bucket_client import TokenBucketClient
|
85
85
|
|
86
86
|
return TokenBucketClient(
|
87
87
|
bucket_name=bucket_name,
|
@@ -11,8 +11,6 @@ from typing import Union, Optional, Dict, Any
|
|
11
11
|
import asyncio
|
12
12
|
import time
|
13
13
|
import aiohttp
|
14
|
-
from matplotlib import pyplot as plt
|
15
|
-
from matplotlib.figure import Figure
|
16
14
|
|
17
15
|
from .exceptions import BucketError, TokenBucketClientError
|
18
16
|
|
@@ -417,7 +415,7 @@ class TokenBucketClient:
|
|
417
415
|
# """Server-side wait time calculation (future implementation)"""
|
418
416
|
# return 0 # TODO - Need to implement this on the server side
|
419
417
|
|
420
|
-
def visualize(self) -> Figure:
|
418
|
+
def visualize(self) -> "Figure":
|
421
419
|
"""
|
422
420
|
Visualize the token bucket usage over time as a matplotlib figure.
|
423
421
|
|
@@ -442,6 +440,10 @@ class TokenBucketClient:
|
|
442
440
|
start_time = times[0]
|
443
441
|
times = [t - start_time for t in times]
|
444
442
|
|
443
|
+
# Import here to avoid loading matplotlib until needed
|
444
|
+
from matplotlib import pyplot as plt
|
445
|
+
from matplotlib.figure import Figure
|
446
|
+
|
445
447
|
# Create the plot
|
446
448
|
fig = plt.figure(figsize=(10, 6))
|
447
449
|
plt.plot(times, tokens, label="Tokens Available")
|
edsl/caching/cache.py
CHANGED
@@ -41,30 +41,31 @@ from .sql_dict import SQLiteDict
|
|
41
41
|
if TYPE_CHECKING:
|
42
42
|
from .cache_entry import CacheEntry
|
43
43
|
|
44
|
+
|
44
45
|
class Cache(Base):
|
45
46
|
"""Cache for storing and retrieving language model responses.
|
46
|
-
|
47
|
+
|
47
48
|
The Cache class manages a collection of CacheEntry objects, providing methods for
|
48
49
|
storing, retrieving, and persisting language model responses. It serves as the core
|
49
50
|
component of EDSL's caching infrastructure, helping to reduce redundant API calls,
|
50
51
|
save costs, and ensure reproducibility.
|
51
|
-
|
52
|
+
|
52
53
|
Cache can use different storage backends:
|
53
54
|
- In-memory dictionary (default)
|
54
55
|
- SQLite database via SQLiteDict
|
55
56
|
- JSON lines file (.jsonl)
|
56
|
-
|
57
|
+
|
57
58
|
The cache operates by generating deterministic keys based on the model, parameters,
|
58
59
|
prompts, and iteration number. This allows for efficient lookup of cached responses
|
59
60
|
when identical requests are made.
|
60
|
-
|
61
|
+
|
61
62
|
Attributes:
|
62
63
|
data (dict or SQLiteDict): The primary storage for cache entries
|
63
64
|
new_entries (dict): Entries added in the current session
|
64
65
|
fetched_data (dict): Entries retrieved in the current session
|
65
66
|
filename (str, optional): Path for persistence if provided
|
66
67
|
immediate_write (bool): Whether to update data immediately (True) or defer (False)
|
67
|
-
|
68
|
+
|
68
69
|
Technical Notes:
|
69
70
|
- Can be used as a context manager to automatically persist changes on exit
|
70
71
|
- Supports serialization/deserialization via to_dict/from_dict methods
|
@@ -86,25 +87,25 @@ class Cache(Base):
|
|
86
87
|
verbose=False,
|
87
88
|
):
|
88
89
|
"""Initialize a new Cache instance.
|
89
|
-
|
90
|
-
Creates a new cache for storing language model responses. The cache can be initialized
|
90
|
+
|
91
|
+
Creates a new cache for storing language model responses. The cache can be initialized
|
91
92
|
with existing data or connected to a persistent storage file.
|
92
|
-
|
93
|
+
|
93
94
|
Args:
|
94
95
|
filename: Path to a persistent storage file (.jsonl or .db). If provided, the cache
|
95
96
|
will be initialized from this file and changes will be written back to it.
|
96
97
|
Cannot be used together with data parameter.
|
97
|
-
data: Initial cache data as a dictionary or SQLiteDict. Cannot be used together
|
98
|
+
data: Initial cache data as a dictionary or SQLiteDict. Cannot be used together
|
98
99
|
with filename parameter.
|
99
100
|
immediate_write: If True, new entries are immediately added to the main data store.
|
100
101
|
If False, they're kept separate until explicitly written.
|
101
102
|
method: Deprecated. Legacy parameter for backward compatibility.
|
102
103
|
verbose: If True, prints diagnostic information about cache hits and misses.
|
103
|
-
|
104
|
+
|
104
105
|
Raises:
|
105
|
-
CacheError: If both filename and data are provided, or if the filename has an
|
106
|
+
CacheError: If both filename and data are provided, or if the filename has an
|
106
107
|
invalid extension.
|
107
|
-
|
108
|
+
|
108
109
|
Implementation Notes:
|
109
110
|
- The cache maintains separate dictionaries for tracking:
|
110
111
|
* data: The main persistent storage
|
@@ -153,12 +154,12 @@ class Cache(Base):
|
|
153
154
|
|
154
155
|
def keys(self):
|
155
156
|
"""Return a list of all cache keys.
|
156
|
-
|
157
|
+
|
157
158
|
Retrieves all cache keys, which are the unique identifiers for each cache entry.
|
158
|
-
|
159
|
+
|
159
160
|
Returns:
|
160
161
|
list: A list of string keys in the cache
|
161
|
-
|
162
|
+
|
162
163
|
Examples:
|
163
164
|
>>> from edsl import Cache
|
164
165
|
>>> Cache.example().keys()
|
@@ -168,12 +169,12 @@ class Cache(Base):
|
|
168
169
|
|
169
170
|
def values(self):
|
170
171
|
"""Return a list of all cache entry values.
|
171
|
-
|
172
|
+
|
172
173
|
Retrieves all CacheEntry objects stored in the cache.
|
173
|
-
|
174
|
+
|
174
175
|
Returns:
|
175
176
|
list: A list of CacheEntry objects
|
176
|
-
|
177
|
+
|
177
178
|
Examples:
|
178
179
|
>>> from edsl import Cache
|
179
180
|
>>> entries = Cache.example().values()
|
@@ -186,10 +187,10 @@ class Cache(Base):
|
|
186
187
|
|
187
188
|
def items(self):
|
188
189
|
"""Return an iterator of (key, value) pairs in the cache.
|
189
|
-
|
190
|
+
|
190
191
|
Similar to dict.items(), provides an iterator over all key-value pairs
|
191
192
|
in the cache for easy iteration.
|
192
|
-
|
193
|
+
|
193
194
|
Returns:
|
194
195
|
zip: An iterator of (key, CacheEntry) tuples
|
195
196
|
"""
|
@@ -219,34 +220,35 @@ class Cache(Base):
|
|
219
220
|
system_prompt: str,
|
220
221
|
user_prompt: str,
|
221
222
|
iteration: int,
|
223
|
+
validated: bool = False,
|
222
224
|
) -> tuple(Union[None, str], str):
|
223
225
|
"""Retrieve a cached language model response if available.
|
224
|
-
|
226
|
+
|
225
227
|
This method attempts to find a cached response matching the exact input parameters.
|
226
228
|
The combination of model, parameters, prompts, and iteration creates a unique key
|
227
229
|
that identifies a specific language model request.
|
228
|
-
|
230
|
+
|
229
231
|
Args:
|
230
232
|
model: Language model identifier (e.g., "gpt-3.5-turbo")
|
231
233
|
parameters: Model configuration parameters (e.g., temperature, max_tokens)
|
232
234
|
system_prompt: The system instructions given to the model
|
233
235
|
user_prompt: The user query/prompt given to the model
|
234
236
|
iteration: The iteration number for this specific request
|
235
|
-
|
237
|
+
|
236
238
|
Returns:
|
237
239
|
tuple: (response, key) where:
|
238
240
|
- response: The cached model output as a string, or None if not found
|
239
241
|
- key: The cache key string generated for this request
|
240
|
-
|
242
|
+
|
241
243
|
Technical Notes:
|
242
244
|
- Uses CacheEntry.gen_key() to generate a consistent hash-based key
|
243
245
|
- Updates self.fetched_data when a hit occurs to track cache usage
|
244
246
|
- Optionally logs cache hit/miss when verbose=True
|
245
247
|
- The response is returned as a JSON string for consistency
|
246
|
-
|
248
|
+
|
247
249
|
Examples:
|
248
250
|
>>> c = Cache()
|
249
|
-
>>> c.fetch(model="gpt-3", parameters="default", system_prompt="Hello",
|
251
|
+
>>> c.fetch(model="gpt-3", parameters="default", system_prompt="Hello",
|
250
252
|
... user_prompt="Hi", iteration=1)[0] is None
|
251
253
|
True
|
252
254
|
"""
|
@@ -278,12 +280,13 @@ class Cache(Base):
|
|
278
280
|
response: dict,
|
279
281
|
iteration: int,
|
280
282
|
service: str,
|
283
|
+
validated: bool = False,
|
281
284
|
) -> str:
|
282
285
|
"""Store a new language model response in the cache.
|
283
|
-
|
286
|
+
|
284
287
|
Creates a new CacheEntry from the provided parameters and response, then
|
285
288
|
adds it to the cache using a deterministic key derived from the input parameters.
|
286
|
-
|
289
|
+
|
287
290
|
Args:
|
288
291
|
model: Language model identifier (e.g., "gpt-3.5-turbo")
|
289
292
|
parameters: Model configuration parameters (e.g., temperature, max_tokens)
|
@@ -292,29 +295,30 @@ class Cache(Base):
|
|
292
295
|
response: The model's response as a dictionary
|
293
296
|
iteration: The iteration number for this specific request
|
294
297
|
service: The service provider (e.g., "openai", "anthropic")
|
295
|
-
|
298
|
+
validated: Whether the response has been validated (default: False)
|
299
|
+
|
296
300
|
Returns:
|
297
301
|
str: The cache key generated for this entry
|
298
|
-
|
302
|
+
|
299
303
|
Technical Notes:
|
300
304
|
- Creates a new CacheEntry object to encapsulate the response and metadata
|
301
305
|
- Adds the entry to self.new_entries to track entries added in this session
|
302
306
|
- Adds the entry to the main data store if immediate_write=True
|
303
307
|
- Otherwise, stores in new_entries_to_write_later for deferred writing
|
304
308
|
- The response is stored as a JSON string for consistency and compatibility
|
305
|
-
|
309
|
+
|
306
310
|
Storage Behavior:
|
307
311
|
The method's behavior depends on the immediate_write setting:
|
308
312
|
- If True: Immediately writes to the main data store (self.data)
|
309
313
|
- If False: Stores in a separate dict for writing later (e.g., at context exit)
|
310
|
-
|
314
|
+
|
311
315
|
Examples:
|
312
316
|
>>> from edsl import Cache, Model, Question
|
313
|
-
>>> m = Model("test")
|
317
|
+
>>> m = Model("test")
|
314
318
|
>>> c = Cache()
|
315
319
|
>>> len(c)
|
316
320
|
0
|
317
|
-
>>> results = Question.example("free_text").by(m).run(cache=c,
|
321
|
+
>>> results = Question.example("free_text").by(m).run(cache=c,
|
318
322
|
... disable_remote_cache=True, disable_remote_inference=True)
|
319
323
|
>>> len(c)
|
320
324
|
1
|
@@ -329,6 +333,7 @@ class Cache(Base):
|
|
329
333
|
output=json.dumps(response),
|
330
334
|
iteration=iteration,
|
331
335
|
service=service,
|
336
|
+
validated=validated,
|
332
337
|
)
|
333
338
|
key = entry.key
|
334
339
|
self.new_entries[key] = entry
|
@@ -486,20 +491,20 @@ class Cache(Base):
|
|
486
491
|
|
487
492
|
def __floordiv__(self, other: "Cache") -> "Cache":
|
488
493
|
"""Subtract one cache from another, returning entries unique to this cache.
|
489
|
-
|
490
|
-
This operator implements set difference between two caches, returning a new cache
|
494
|
+
|
495
|
+
This operator implements set difference between two caches, returning a new cache
|
491
496
|
containing only entries that exist in this cache but not in the other cache.
|
492
497
|
The floor division operator (//) is used as an intuitive alternative to subtraction.
|
493
|
-
|
498
|
+
|
494
499
|
Args:
|
495
500
|
other: Another Cache object to subtract from this one
|
496
|
-
|
501
|
+
|
497
502
|
Returns:
|
498
503
|
Cache: A new Cache containing only entries unique to this cache
|
499
|
-
|
504
|
+
|
500
505
|
Raises:
|
501
506
|
CacheError: If the provided object is not a Cache instance
|
502
|
-
|
507
|
+
|
503
508
|
Examples:
|
504
509
|
>>> from edsl.caching import CacheEntry
|
505
510
|
>>> ce1 = CacheEntry.example(randomize=True)
|
@@ -511,7 +516,7 @@ class Cache(Base):
|
|
511
516
|
1
|
512
517
|
>>> c3.data[ce2.key] == ce2
|
513
518
|
True
|
514
|
-
|
519
|
+
|
515
520
|
Technical Notes:
|
516
521
|
- Comparison is based on cache keys, not the full entry contents
|
517
522
|
- Returns a new Cache instance with the same immediate_write setting
|
@@ -534,14 +539,14 @@ class Cache(Base):
|
|
534
539
|
|
535
540
|
def __enter__(self):
|
536
541
|
"""Set up the cache when used as a context manager.
|
537
|
-
|
542
|
+
|
538
543
|
Enables usage of Cache in a with statement, e.g.:
|
539
544
|
```python
|
540
545
|
with Cache(filename="my_cache.db") as cache:
|
541
546
|
# Use cache...
|
542
547
|
# Changes automatically saved when exiting the context
|
543
548
|
```
|
544
|
-
|
549
|
+
|
545
550
|
Returns:
|
546
551
|
Cache: The cache instance itself
|
547
552
|
"""
|
@@ -549,21 +554,22 @@ class Cache(Base):
|
|
549
554
|
|
550
555
|
def __exit__(self, exc_type, exc_value, traceback):
|
551
556
|
"""Clean up and persist cache when exiting the context.
|
552
|
-
|
557
|
+
|
553
558
|
This method is called automatically when exiting a with block.
|
554
559
|
It performs two key operations:
|
555
560
|
1. Writes any deferred entries to the main data store
|
556
561
|
2. Persists the cache to disk if a filename was provided
|
557
|
-
|
562
|
+
|
558
563
|
Args:
|
559
564
|
exc_type: Exception type if an exception was raised in the with block
|
560
565
|
exc_value: Exception value if an exception was raised
|
561
566
|
traceback: Traceback if an exception was raised
|
562
|
-
|
567
|
+
|
563
568
|
Technical Notes:
|
564
569
|
- Deferred entries (new_entries_to_write_later) are written to the main data store
|
565
570
|
- If a filename was provided at initialization, cache is persisted to that file
|
566
571
|
- Persistence format is determined by the filename extension (.jsonl or .db)
|
572
|
+
- SQLAlchemy resources are properly disposed when the context is exited
|
567
573
|
"""
|
568
574
|
# Write any deferred entries to the main data store
|
569
575
|
for key, entry in self.new_entries_to_write_later.items():
|
@@ -573,6 +579,9 @@ class Cache(Base):
|
|
573
579
|
if self.filename:
|
574
580
|
self.write(self.filename)
|
575
581
|
|
582
|
+
# Clean up SQLAlchemy resources
|
583
|
+
self.close()
|
584
|
+
|
576
585
|
def __hash__(self):
|
577
586
|
"""Return the hash of the Cache."""
|
578
587
|
|
@@ -580,15 +589,15 @@ class Cache(Base):
|
|
580
589
|
|
581
590
|
def to_dict(self, add_edsl_version=True) -> dict:
|
582
591
|
"""Serialize the cache to a dictionary for storage or transmission.
|
583
|
-
|
592
|
+
|
584
593
|
Converts the Cache object into a plain dictionary format that can be
|
585
594
|
easily serialized to JSON or other formats. Each CacheEntry is also
|
586
595
|
converted to a dictionary using its to_dict method.
|
587
|
-
|
596
|
+
|
588
597
|
Args:
|
589
598
|
add_edsl_version: If True, includes the EDSL version and class name
|
590
599
|
in the serialized output for compatibility tracking
|
591
|
-
|
600
|
+
|
592
601
|
Returns:
|
593
602
|
dict: A dictionary representation of the cache with the structure:
|
594
603
|
{
|
@@ -598,7 +607,7 @@ class Cache(Base):
|
|
598
607
|
"edsl_version": "x.x.x", # if add_edsl_version=True
|
599
608
|
"edsl_class_name": "Cache" # if add_edsl_version=True
|
600
609
|
}
|
601
|
-
|
610
|
+
|
602
611
|
Technical Notes:
|
603
612
|
- Used by from_dict for deserialization
|
604
613
|
- Used by __hash__ for cache comparison
|
@@ -635,6 +644,46 @@ class Cache(Base):
|
|
635
644
|
def to_dataset(self):
|
636
645
|
return self.to_scenario_list().to_dataset()
|
637
646
|
|
647
|
+
def _repr_html_(self):
|
648
|
+
"""Generate an HTML representation for Jupyter notebooks.
|
649
|
+
|
650
|
+
This method is automatically called by Jupyter to render the object
|
651
|
+
as HTML in notebook cells. It handles empty caches gracefully.
|
652
|
+
|
653
|
+
Returns:
|
654
|
+
str: HTML representation of the object
|
655
|
+
"""
|
656
|
+
# Get class name and documentation link
|
657
|
+
class_name = self.__class__.__name__
|
658
|
+
docs = getattr(self, "__documentation__", "")
|
659
|
+
|
660
|
+
# Create header with link to documentation
|
661
|
+
header = f"<a href='{docs}'>{class_name}</a>"
|
662
|
+
|
663
|
+
# Add summary if available
|
664
|
+
if hasattr(self, "_summary"):
|
665
|
+
summary_dict = self._summary()
|
666
|
+
summary_line = "".join([f" {k}: {v};" for k, v in summary_dict.items()])
|
667
|
+
header = f"<p>{header}{summary_line}</p>"
|
668
|
+
else:
|
669
|
+
header = f"<p>{header}</p>"
|
670
|
+
|
671
|
+
# Handle empty cache
|
672
|
+
if len(self.data) == 0:
|
673
|
+
return f"{header}<p><em>Empty cache</em></p>"
|
674
|
+
|
675
|
+
# For non-empty caches, render the table as usual
|
676
|
+
from edsl.dataset.display.table_display import TableDisplay
|
677
|
+
|
678
|
+
try:
|
679
|
+
return header + self.table()._repr_html_()
|
680
|
+
except Exception:
|
681
|
+
# Fallback if table() fails - display as dictionary
|
682
|
+
display_dict = {"entries": len(self.data)}
|
683
|
+
return (
|
684
|
+
header + TableDisplay.from_dictionary_wide(display_dict)._repr_html_()
|
685
|
+
)
|
686
|
+
|
638
687
|
@classmethod
|
639
688
|
@remove_edsl_version
|
640
689
|
def from_dict(cls, data) -> Cache:
|
@@ -661,26 +710,26 @@ class Cache(Base):
|
|
661
710
|
|
662
711
|
def __add__(self, other: "Cache"):
|
663
712
|
"""Combine this cache with another, updating in-place.
|
664
|
-
|
713
|
+
|
665
714
|
This operator implements a set union operation between two caches, adding all
|
666
715
|
entries from the other cache into this one. The operation modifies this cache
|
667
716
|
in-place rather than creating a new one.
|
668
|
-
|
717
|
+
|
669
718
|
Args:
|
670
719
|
other: Another Cache object to merge into this one
|
671
|
-
|
720
|
+
|
672
721
|
Returns:
|
673
722
|
Cache: Self, with entries from other added
|
674
|
-
|
723
|
+
|
675
724
|
Raises:
|
676
725
|
CacheError: If the provided object is not a Cache instance
|
677
|
-
|
726
|
+
|
678
727
|
Technical Notes:
|
679
728
|
- Modifies this cache in-place (unlike __floordiv__ which returns a new cache)
|
680
729
|
- If both caches have the same key, this cache's entry will be overwritten
|
681
730
|
- Useful for merging caches from different sources
|
682
731
|
- No special handling for conflicting entries - last one wins
|
683
|
-
|
732
|
+
|
684
733
|
Examples:
|
685
734
|
>>> from edsl.caching import CacheEntry
|
686
735
|
>>> ce1 = CacheEntry.example(randomize=True)
|
@@ -697,6 +746,26 @@ class Cache(Base):
|
|
697
746
|
self.data.update(other.data)
|
698
747
|
return self
|
699
748
|
|
749
|
+
def close(self):
|
750
|
+
"""Explicitly close and clean up resources.
|
751
|
+
|
752
|
+
This method properly disposes of any SQLAlchemy engines and
|
753
|
+
connections to prevent memory leaks.
|
754
|
+
"""
|
755
|
+
# Clean up SQLiteDict resources if present
|
756
|
+
if not isinstance(self.data, dict):
|
757
|
+
# Handle SQLiteDict or other database-backed storage
|
758
|
+
if hasattr(self.data, "engine") and self.data.engine:
|
759
|
+
self.data.engine.dispose()
|
760
|
+
|
761
|
+
def __del__(self):
|
762
|
+
"""Destructor for proper resource cleanup.
|
763
|
+
|
764
|
+
Ensures SQLAlchemy connections are properly closed when the Cache
|
765
|
+
object is garbage collected.
|
766
|
+
"""
|
767
|
+
self.close()
|
768
|
+
|
700
769
|
def __repr__(self):
|
701
770
|
"""
|
702
771
|
Return a string representation of the Cache object.
|
@@ -773,23 +842,23 @@ class Cache(Base):
|
|
773
842
|
@classmethod
|
774
843
|
def example(cls, randomize: bool = False) -> Cache:
|
775
844
|
"""Create an example Cache instance for testing and demonstration.
|
776
|
-
|
845
|
+
|
777
846
|
Creates a Cache object pre-populated with example CacheEntry objects.
|
778
847
|
This method is useful for documentation, testing, and demonstration purposes.
|
779
|
-
|
848
|
+
|
780
849
|
Args:
|
781
850
|
randomize: If True, creates CacheEntry objects with randomized content
|
782
851
|
for uniqueness. If False, uses consistent example entries.
|
783
|
-
|
852
|
+
|
784
853
|
Returns:
|
785
854
|
Cache: A new Cache object containing example CacheEntry objects
|
786
|
-
|
855
|
+
|
787
856
|
Technical Notes:
|
788
857
|
- Uses CacheEntry.example() to create sample entries
|
789
858
|
- When randomize=True, generates unique keys for each call
|
790
859
|
- When randomize=False, produces consistent examples for doctests
|
791
860
|
- Creates an in-memory cache (no persistent file)
|
792
|
-
|
861
|
+
|
793
862
|
Examples:
|
794
863
|
>>> cache = Cache.example()
|
795
864
|
>>> len(cache) > 0
|
@@ -797,7 +866,7 @@ class Cache(Base):
|
|
797
866
|
>>> from edsl.caching.cache_entry import CacheEntry
|
798
867
|
>>> all(isinstance(entry, CacheEntry) for entry in cache.values())
|
799
868
|
True
|
800
|
-
|
869
|
+
|
801
870
|
>>> # Create examples with randomized content
|
802
871
|
>>> cache1 = Cache.example(randomize=True)
|
803
872
|
>>> cache2 = Cache.example(randomize=True)
|