edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. edsl/__init__.py +8 -1
  2. edsl/__init__original.py +134 -0
  3. edsl/__version__.py +1 -1
  4. edsl/agents/agent.py +29 -0
  5. edsl/agents/agent_list.py +36 -1
  6. edsl/base/base_class.py +281 -151
  7. edsl/base/data_transfer_models.py +15 -4
  8. edsl/buckets/__init__.py +8 -3
  9. edsl/buckets/bucket_collection.py +9 -3
  10. edsl/buckets/model_buckets.py +4 -2
  11. edsl/buckets/token_bucket.py +2 -2
  12. edsl/buckets/token_bucket_client.py +5 -3
  13. edsl/caching/cache.py +131 -62
  14. edsl/caching/cache_entry.py +70 -58
  15. edsl/caching/sql_dict.py +17 -0
  16. edsl/cli.py +99 -0
  17. edsl/config/config_class.py +16 -0
  18. edsl/conversation/__init__.py +31 -0
  19. edsl/coop/coop.py +276 -242
  20. edsl/coop/coop_jobs_objects.py +59 -0
  21. edsl/coop/coop_objects.py +29 -0
  22. edsl/coop/coop_regular_objects.py +26 -0
  23. edsl/coop/utils.py +24 -19
  24. edsl/dataset/dataset.py +338 -101
  25. edsl/dataset/dataset_operations_mixin.py +216 -180
  26. edsl/db_list/sqlite_list.py +349 -0
  27. edsl/inference_services/__init__.py +40 -5
  28. edsl/inference_services/exceptions.py +11 -0
  29. edsl/inference_services/services/anthropic_service.py +5 -2
  30. edsl/inference_services/services/aws_bedrock.py +6 -2
  31. edsl/inference_services/services/azure_ai.py +6 -2
  32. edsl/inference_services/services/google_service.py +7 -3
  33. edsl/inference_services/services/mistral_ai_service.py +6 -2
  34. edsl/inference_services/services/open_ai_service.py +6 -2
  35. edsl/inference_services/services/perplexity_service.py +6 -2
  36. edsl/inference_services/services/test_service.py +94 -5
  37. edsl/interviews/answering_function.py +167 -59
  38. edsl/interviews/interview.py +124 -72
  39. edsl/interviews/interview_task_manager.py +10 -0
  40. edsl/interviews/request_token_estimator.py +8 -0
  41. edsl/invigilators/invigilators.py +35 -13
  42. edsl/jobs/async_interview_runner.py +146 -104
  43. edsl/jobs/data_structures.py +6 -4
  44. edsl/jobs/decorators.py +61 -0
  45. edsl/jobs/fetch_invigilator.py +61 -18
  46. edsl/jobs/html_table_job_logger.py +14 -2
  47. edsl/jobs/jobs.py +180 -104
  48. edsl/jobs/jobs_component_constructor.py +2 -2
  49. edsl/jobs/jobs_interview_constructor.py +2 -0
  50. edsl/jobs/jobs_pricing_estimation.py +154 -113
  51. edsl/jobs/jobs_remote_inference_logger.py +4 -0
  52. edsl/jobs/jobs_runner_status.py +30 -25
  53. edsl/jobs/progress_bar_manager.py +79 -0
  54. edsl/jobs/remote_inference.py +35 -1
  55. edsl/key_management/key_lookup_builder.py +6 -1
  56. edsl/language_models/language_model.py +110 -12
  57. edsl/language_models/model.py +10 -3
  58. edsl/language_models/price_manager.py +176 -71
  59. edsl/language_models/registry.py +5 -0
  60. edsl/notebooks/notebook.py +77 -10
  61. edsl/questions/VALIDATION_README.md +134 -0
  62. edsl/questions/__init__.py +24 -1
  63. edsl/questions/exceptions.py +21 -0
  64. edsl/questions/question_dict.py +201 -16
  65. edsl/questions/question_multiple_choice_with_other.py +624 -0
  66. edsl/questions/question_registry.py +2 -1
  67. edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
  68. edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
  69. edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
  70. edsl/questions/validation_analysis.py +185 -0
  71. edsl/questions/validation_cli.py +131 -0
  72. edsl/questions/validation_html_report.py +404 -0
  73. edsl/questions/validation_logger.py +136 -0
  74. edsl/results/result.py +115 -46
  75. edsl/results/results.py +702 -171
  76. edsl/scenarios/construct_download_link.py +16 -3
  77. edsl/scenarios/directory_scanner.py +226 -226
  78. edsl/scenarios/file_methods.py +5 -0
  79. edsl/scenarios/file_store.py +150 -9
  80. edsl/scenarios/handlers/__init__.py +5 -1
  81. edsl/scenarios/handlers/mp4_file_store.py +104 -0
  82. edsl/scenarios/handlers/webm_file_store.py +104 -0
  83. edsl/scenarios/scenario.py +120 -101
  84. edsl/scenarios/scenario_list.py +800 -727
  85. edsl/scenarios/scenario_list_gc_test.py +146 -0
  86. edsl/scenarios/scenario_list_memory_test.py +214 -0
  87. edsl/scenarios/scenario_list_source_refactor.md +35 -0
  88. edsl/scenarios/scenario_selector.py +5 -4
  89. edsl/scenarios/scenario_source.py +1990 -0
  90. edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
  91. edsl/surveys/survey.py +22 -0
  92. edsl/tasks/__init__.py +4 -2
  93. edsl/tasks/task_history.py +198 -36
  94. edsl/tests/scenarios/test_ScenarioSource.py +51 -0
  95. edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
  96. edsl/utilities/__init__.py +2 -1
  97. edsl/utilities/decorators.py +121 -0
  98. edsl/utilities/memory_debugger.py +1010 -0
  99. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
  100. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
  101. edsl/jobs/jobs_runner_asyncio.py +0 -281
  102. edsl/language_models/unused/fake_openai_service.py +0 -60
  103. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
  104. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
  105. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0
@@ -1,10 +1,11 @@
1
1
  from collections import UserDict
2
- from typing import NamedTuple, Dict, Optional, Any
2
+ from typing import NamedTuple, Dict, Optional, Any, Union
3
3
  from dataclasses import dataclass, fields
4
4
 
5
5
 
6
6
  class ModelInputs(NamedTuple):
7
7
  "This is what was send by the agent to the model"
8
+
8
9
  user_prompt: str
9
10
  system_prompt: str
10
11
  encoded_image: Optional[str] = None
@@ -12,6 +13,7 @@ class ModelInputs(NamedTuple):
12
13
 
13
14
  class EDSLOutput(NamedTuple):
14
15
  "This is the edsl dictionary that is returned by the model"
16
+
15
17
  answer: Any
16
18
  generated_tokens: str
17
19
  comment: Optional[str] = None
@@ -19,11 +21,16 @@ class EDSLOutput(NamedTuple):
19
21
 
20
22
  class ModelResponse(NamedTuple):
21
23
  "This is the metadata that is returned by the model and includes info about the cache"
24
+
22
25
  response: dict
23
26
  cache_used: bool
24
27
  cache_key: str
25
28
  cached_response: Optional[Dict[str, Any]] = None
26
- cost: Optional[float] = None
29
+ input_tokens: Optional[int] = None
30
+ output_tokens: Optional[int] = None
31
+ input_price_per_million_tokens: Optional[float] = None
32
+ output_price_per_million_tokens: Optional[float] = None
33
+ total_cost: Optional[Union[float, str]] = None
27
34
 
28
35
 
29
36
  class AgentResponseDict(NamedTuple):
@@ -44,7 +51,11 @@ class EDSLResultObjectInput(NamedTuple):
44
51
  comment: str
45
52
  validated: bool = False
46
53
  exception_occurred: Exception = None
47
- cost: Optional[float] = None
54
+ input_tokens: Optional[int] = None
55
+ output_tokens: Optional[int] = None
56
+ input_price_per_million_tokens: Optional[float] = None
57
+ output_price_per_million_tokens: Optional[float] = None
58
+ total_cost: Optional[Union[float, str]] = None
48
59
 
49
60
 
50
61
  @dataclass
@@ -111,4 +122,4 @@ class Answers(UserDict):
111
122
  if __name__ == "__main__":
112
123
  import doctest
113
124
 
114
- doctest.testmod()
125
+ doctest.testmod()
edsl/buckets/__init__.py CHANGED
@@ -18,9 +18,6 @@ client (token_bucket_client) for distributed rate limiting scenarios where
18
18
  multiple processes or machines need to share rate limits.
19
19
  """
20
20
 
21
- from .bucket_collection import BucketCollection
22
- from .model_buckets import ModelBuckets
23
- from .token_bucket import TokenBucket
24
21
  from .exceptions import (
25
22
  BucketError,
26
23
  TokenLimitError,
@@ -28,10 +25,18 @@ from .exceptions import (
28
25
  BucketConfigurationError,
29
26
  )
30
27
 
28
+ from .token_bucket import TokenBucket
29
+ from .model_buckets import ModelBuckets
30
+ from .token_bucket_client import TokenBucketClient # Add explicit import for TokenBucketClient
31
+
32
+ # Import BucketCollection last to avoid circular import issues
33
+ from .bucket_collection import BucketCollection
34
+
31
35
  __all__ = [
32
36
  "BucketCollection",
33
37
  "ModelBuckets",
34
38
  "TokenBucket",
39
+ "TokenBucketClient",
35
40
  "BucketError",
36
41
  "TokenLimitError",
37
42
  "TokenBucketClientError",
@@ -10,11 +10,14 @@ share the same rate limit buckets.
10
10
  from typing import TYPE_CHECKING, Dict, List, Tuple
11
11
  from collections import UserDict
12
12
  from threading import RLock
13
- from matplotlib.figure import Figure
13
+ import functools
14
+ import inspect
15
+
16
+ # Import the synchronized_class decorator directly
17
+ from ..jobs.decorators import synchronized_class
14
18
 
15
19
  from .token_bucket import TokenBucket
16
20
  from .model_buckets import ModelBuckets
17
- from ..jobs.decorators import synchronized_class
18
21
 
19
22
  if TYPE_CHECKING:
20
23
  from ..language_models import LanguageModel
@@ -262,7 +265,7 @@ class BucketCollection(UserDict):
262
265
  )
263
266
  self.services_to_buckets[service].tokens_bucket = new_tokens_bucket
264
267
 
265
- def visualize(self) -> Dict["LanguageModel", Tuple[Figure, Figure]]:
268
+ def visualize(self) -> Dict["LanguageModel", Tuple["Figure", "Figure"]]:
266
269
  """
267
270
  Visualize the token and request buckets for all models.
268
271
 
@@ -279,6 +282,9 @@ class BucketCollection(UserDict):
279
282
  >>> plots = bucket_collection.visualize()
280
283
  >>> # Now you can display or save these plots
281
284
  """
285
+ # Import Figure only for type checking when the function is called
286
+ from matplotlib.figure import Figure
287
+
282
288
  plots = {}
283
289
  for model in self:
284
290
  plots[model] = self[model].visualize()
@@ -7,7 +7,6 @@ instance contains two TokenBucket instances - one for requests and one for token
7
7
  """
8
8
 
9
9
  from typing import TYPE_CHECKING, Tuple
10
- from matplotlib.figure import Figure
11
10
 
12
11
  if TYPE_CHECKING:
13
12
  from .token_bucket import TokenBucket
@@ -147,7 +146,7 @@ class ModelBuckets:
147
146
  ),
148
147
  )
149
148
 
150
- def visualize(self) -> Tuple[Figure, Figure]:
149
+ def visualize(self) -> Tuple["Figure", "Figure"]:
151
150
  """
152
151
  Create visualizations of token usage over time for both buckets.
153
152
 
@@ -163,6 +162,9 @@ class ModelBuckets:
163
162
  >>> ## request_plot, token_plot = buckets.visualize()
164
163
  >>> ## Now you can display or save these plots
165
164
  """
165
+ # Import Figure only for type checking when the function is called
166
+ from matplotlib.figure import Figure
167
+
166
168
  plot1 = self.requests_bucket.visualize()
167
169
  plot2 = self.tokens_bucket.visualize()
168
170
  return plot1, plot2
@@ -80,8 +80,8 @@ class TokenBucket:
80
80
  'local-rate-limit'
81
81
  """
82
82
  if remote_url is not None:
83
- # Import here to avoid circular imports
84
- from ..buckets import TokenBucketClient
83
+ # Import the client directly from its module to avoid circular imports
84
+ from .token_bucket_client import TokenBucketClient
85
85
 
86
86
  return TokenBucketClient(
87
87
  bucket_name=bucket_name,
@@ -11,8 +11,6 @@ from typing import Union, Optional, Dict, Any
11
11
  import asyncio
12
12
  import time
13
13
  import aiohttp
14
- from matplotlib import pyplot as plt
15
- from matplotlib.figure import Figure
16
14
 
17
15
  from .exceptions import BucketError, TokenBucketClientError
18
16
 
@@ -417,7 +415,7 @@ class TokenBucketClient:
417
415
  # """Server-side wait time calculation (future implementation)"""
418
416
  # return 0 # TODO - Need to implement this on the server side
419
417
 
420
- def visualize(self) -> Figure:
418
+ def visualize(self) -> "Figure":
421
419
  """
422
420
  Visualize the token bucket usage over time as a matplotlib figure.
423
421
 
@@ -442,6 +440,10 @@ class TokenBucketClient:
442
440
  start_time = times[0]
443
441
  times = [t - start_time for t in times]
444
442
 
443
+ # Import here to avoid loading matplotlib until needed
444
+ from matplotlib import pyplot as plt
445
+ from matplotlib.figure import Figure
446
+
445
447
  # Create the plot
446
448
  fig = plt.figure(figsize=(10, 6))
447
449
  plt.plot(times, tokens, label="Tokens Available")
edsl/caching/cache.py CHANGED
@@ -41,30 +41,31 @@ from .sql_dict import SQLiteDict
41
41
  if TYPE_CHECKING:
42
42
  from .cache_entry import CacheEntry
43
43
 
44
+
44
45
  class Cache(Base):
45
46
  """Cache for storing and retrieving language model responses.
46
-
47
+
47
48
  The Cache class manages a collection of CacheEntry objects, providing methods for
48
49
  storing, retrieving, and persisting language model responses. It serves as the core
49
50
  component of EDSL's caching infrastructure, helping to reduce redundant API calls,
50
51
  save costs, and ensure reproducibility.
51
-
52
+
52
53
  Cache can use different storage backends:
53
54
  - In-memory dictionary (default)
54
55
  - SQLite database via SQLiteDict
55
56
  - JSON lines file (.jsonl)
56
-
57
+
57
58
  The cache operates by generating deterministic keys based on the model, parameters,
58
59
  prompts, and iteration number. This allows for efficient lookup of cached responses
59
60
  when identical requests are made.
60
-
61
+
61
62
  Attributes:
62
63
  data (dict or SQLiteDict): The primary storage for cache entries
63
64
  new_entries (dict): Entries added in the current session
64
65
  fetched_data (dict): Entries retrieved in the current session
65
66
  filename (str, optional): Path for persistence if provided
66
67
  immediate_write (bool): Whether to update data immediately (True) or defer (False)
67
-
68
+
68
69
  Technical Notes:
69
70
  - Can be used as a context manager to automatically persist changes on exit
70
71
  - Supports serialization/deserialization via to_dict/from_dict methods
@@ -86,25 +87,25 @@ class Cache(Base):
86
87
  verbose=False,
87
88
  ):
88
89
  """Initialize a new Cache instance.
89
-
90
- Creates a new cache for storing language model responses. The cache can be initialized
90
+
91
+ Creates a new cache for storing language model responses. The cache can be initialized
91
92
  with existing data or connected to a persistent storage file.
92
-
93
+
93
94
  Args:
94
95
  filename: Path to a persistent storage file (.jsonl or .db). If provided, the cache
95
96
  will be initialized from this file and changes will be written back to it.
96
97
  Cannot be used together with data parameter.
97
- data: Initial cache data as a dictionary or SQLiteDict. Cannot be used together
98
+ data: Initial cache data as a dictionary or SQLiteDict. Cannot be used together
98
99
  with filename parameter.
99
100
  immediate_write: If True, new entries are immediately added to the main data store.
100
101
  If False, they're kept separate until explicitly written.
101
102
  method: Deprecated. Legacy parameter for backward compatibility.
102
103
  verbose: If True, prints diagnostic information about cache hits and misses.
103
-
104
+
104
105
  Raises:
105
- CacheError: If both filename and data are provided, or if the filename has an
106
+ CacheError: If both filename and data are provided, or if the filename has an
106
107
  invalid extension.
107
-
108
+
108
109
  Implementation Notes:
109
110
  - The cache maintains separate dictionaries for tracking:
110
111
  * data: The main persistent storage
@@ -153,12 +154,12 @@ class Cache(Base):
153
154
 
154
155
  def keys(self):
155
156
  """Return a list of all cache keys.
156
-
157
+
157
158
  Retrieves all cache keys, which are the unique identifiers for each cache entry.
158
-
159
+
159
160
  Returns:
160
161
  list: A list of string keys in the cache
161
-
162
+
162
163
  Examples:
163
164
  >>> from edsl import Cache
164
165
  >>> Cache.example().keys()
@@ -168,12 +169,12 @@ class Cache(Base):
168
169
 
169
170
  def values(self):
170
171
  """Return a list of all cache entry values.
171
-
172
+
172
173
  Retrieves all CacheEntry objects stored in the cache.
173
-
174
+
174
175
  Returns:
175
176
  list: A list of CacheEntry objects
176
-
177
+
177
178
  Examples:
178
179
  >>> from edsl import Cache
179
180
  >>> entries = Cache.example().values()
@@ -186,10 +187,10 @@ class Cache(Base):
186
187
 
187
188
  def items(self):
188
189
  """Return an iterator of (key, value) pairs in the cache.
189
-
190
+
190
191
  Similar to dict.items(), provides an iterator over all key-value pairs
191
192
  in the cache for easy iteration.
192
-
193
+
193
194
  Returns:
194
195
  zip: An iterator of (key, CacheEntry) tuples
195
196
  """
@@ -219,34 +220,35 @@ class Cache(Base):
219
220
  system_prompt: str,
220
221
  user_prompt: str,
221
222
  iteration: int,
223
+ validated: bool = False,
222
224
  ) -> tuple(Union[None, str], str):
223
225
  """Retrieve a cached language model response if available.
224
-
226
+
225
227
  This method attempts to find a cached response matching the exact input parameters.
226
228
  The combination of model, parameters, prompts, and iteration creates a unique key
227
229
  that identifies a specific language model request.
228
-
230
+
229
231
  Args:
230
232
  model: Language model identifier (e.g., "gpt-3.5-turbo")
231
233
  parameters: Model configuration parameters (e.g., temperature, max_tokens)
232
234
  system_prompt: The system instructions given to the model
233
235
  user_prompt: The user query/prompt given to the model
234
236
  iteration: The iteration number for this specific request
235
-
237
+
236
238
  Returns:
237
239
  tuple: (response, key) where:
238
240
  - response: The cached model output as a string, or None if not found
239
241
  - key: The cache key string generated for this request
240
-
242
+
241
243
  Technical Notes:
242
244
  - Uses CacheEntry.gen_key() to generate a consistent hash-based key
243
245
  - Updates self.fetched_data when a hit occurs to track cache usage
244
246
  - Optionally logs cache hit/miss when verbose=True
245
247
  - The response is returned as a JSON string for consistency
246
-
248
+
247
249
  Examples:
248
250
  >>> c = Cache()
249
- >>> c.fetch(model="gpt-3", parameters="default", system_prompt="Hello",
251
+ >>> c.fetch(model="gpt-3", parameters="default", system_prompt="Hello",
250
252
  ... user_prompt="Hi", iteration=1)[0] is None
251
253
  True
252
254
  """
@@ -278,12 +280,13 @@ class Cache(Base):
278
280
  response: dict,
279
281
  iteration: int,
280
282
  service: str,
283
+ validated: bool = False,
281
284
  ) -> str:
282
285
  """Store a new language model response in the cache.
283
-
286
+
284
287
  Creates a new CacheEntry from the provided parameters and response, then
285
288
  adds it to the cache using a deterministic key derived from the input parameters.
286
-
289
+
287
290
  Args:
288
291
  model: Language model identifier (e.g., "gpt-3.5-turbo")
289
292
  parameters: Model configuration parameters (e.g., temperature, max_tokens)
@@ -292,29 +295,30 @@ class Cache(Base):
292
295
  response: The model's response as a dictionary
293
296
  iteration: The iteration number for this specific request
294
297
  service: The service provider (e.g., "openai", "anthropic")
295
-
298
+ validated: Whether the response has been validated (default: False)
299
+
296
300
  Returns:
297
301
  str: The cache key generated for this entry
298
-
302
+
299
303
  Technical Notes:
300
304
  - Creates a new CacheEntry object to encapsulate the response and metadata
301
305
  - Adds the entry to self.new_entries to track entries added in this session
302
306
  - Adds the entry to the main data store if immediate_write=True
303
307
  - Otherwise, stores in new_entries_to_write_later for deferred writing
304
308
  - The response is stored as a JSON string for consistency and compatibility
305
-
309
+
306
310
  Storage Behavior:
307
311
  The method's behavior depends on the immediate_write setting:
308
312
  - If True: Immediately writes to the main data store (self.data)
309
313
  - If False: Stores in a separate dict for writing later (e.g., at context exit)
310
-
314
+
311
315
  Examples:
312
316
  >>> from edsl import Cache, Model, Question
313
- >>> m = Model("test")
317
+ >>> m = Model("test")
314
318
  >>> c = Cache()
315
319
  >>> len(c)
316
320
  0
317
- >>> results = Question.example("free_text").by(m).run(cache=c,
321
+ >>> results = Question.example("free_text").by(m).run(cache=c,
318
322
  ... disable_remote_cache=True, disable_remote_inference=True)
319
323
  >>> len(c)
320
324
  1
@@ -329,6 +333,7 @@ class Cache(Base):
329
333
  output=json.dumps(response),
330
334
  iteration=iteration,
331
335
  service=service,
336
+ validated=validated,
332
337
  )
333
338
  key = entry.key
334
339
  self.new_entries[key] = entry
@@ -486,20 +491,20 @@ class Cache(Base):
486
491
 
487
492
  def __floordiv__(self, other: "Cache") -> "Cache":
488
493
  """Subtract one cache from another, returning entries unique to this cache.
489
-
490
- This operator implements set difference between two caches, returning a new cache
494
+
495
+ This operator implements set difference between two caches, returning a new cache
491
496
  containing only entries that exist in this cache but not in the other cache.
492
497
  The floor division operator (//) is used as an intuitive alternative to subtraction.
493
-
498
+
494
499
  Args:
495
500
  other: Another Cache object to subtract from this one
496
-
501
+
497
502
  Returns:
498
503
  Cache: A new Cache containing only entries unique to this cache
499
-
504
+
500
505
  Raises:
501
506
  CacheError: If the provided object is not a Cache instance
502
-
507
+
503
508
  Examples:
504
509
  >>> from edsl.caching import CacheEntry
505
510
  >>> ce1 = CacheEntry.example(randomize=True)
@@ -511,7 +516,7 @@ class Cache(Base):
511
516
  1
512
517
  >>> c3.data[ce2.key] == ce2
513
518
  True
514
-
519
+
515
520
  Technical Notes:
516
521
  - Comparison is based on cache keys, not the full entry contents
517
522
  - Returns a new Cache instance with the same immediate_write setting
@@ -534,14 +539,14 @@ class Cache(Base):
534
539
 
535
540
  def __enter__(self):
536
541
  """Set up the cache when used as a context manager.
537
-
542
+
538
543
  Enables usage of Cache in a with statement, e.g.:
539
544
  ```python
540
545
  with Cache(filename="my_cache.db") as cache:
541
546
  # Use cache...
542
547
  # Changes automatically saved when exiting the context
543
548
  ```
544
-
549
+
545
550
  Returns:
546
551
  Cache: The cache instance itself
547
552
  """
@@ -549,21 +554,22 @@ class Cache(Base):
549
554
 
550
555
  def __exit__(self, exc_type, exc_value, traceback):
551
556
  """Clean up and persist cache when exiting the context.
552
-
557
+
553
558
  This method is called automatically when exiting a with block.
554
559
  It performs two key operations:
555
560
  1. Writes any deferred entries to the main data store
556
561
  2. Persists the cache to disk if a filename was provided
557
-
562
+
558
563
  Args:
559
564
  exc_type: Exception type if an exception was raised in the with block
560
565
  exc_value: Exception value if an exception was raised
561
566
  traceback: Traceback if an exception was raised
562
-
567
+
563
568
  Technical Notes:
564
569
  - Deferred entries (new_entries_to_write_later) are written to the main data store
565
570
  - If a filename was provided at initialization, cache is persisted to that file
566
571
  - Persistence format is determined by the filename extension (.jsonl or .db)
572
+ - SQLAlchemy resources are properly disposed when the context is exited
567
573
  """
568
574
  # Write any deferred entries to the main data store
569
575
  for key, entry in self.new_entries_to_write_later.items():
@@ -573,6 +579,9 @@ class Cache(Base):
573
579
  if self.filename:
574
580
  self.write(self.filename)
575
581
 
582
+ # Clean up SQLAlchemy resources
583
+ self.close()
584
+
576
585
  def __hash__(self):
577
586
  """Return the hash of the Cache."""
578
587
 
@@ -580,15 +589,15 @@ class Cache(Base):
580
589
 
581
590
  def to_dict(self, add_edsl_version=True) -> dict:
582
591
  """Serialize the cache to a dictionary for storage or transmission.
583
-
592
+
584
593
  Converts the Cache object into a plain dictionary format that can be
585
594
  easily serialized to JSON or other formats. Each CacheEntry is also
586
595
  converted to a dictionary using its to_dict method.
587
-
596
+
588
597
  Args:
589
598
  add_edsl_version: If True, includes the EDSL version and class name
590
599
  in the serialized output for compatibility tracking
591
-
600
+
592
601
  Returns:
593
602
  dict: A dictionary representation of the cache with the structure:
594
603
  {
@@ -598,7 +607,7 @@ class Cache(Base):
598
607
  "edsl_version": "x.x.x", # if add_edsl_version=True
599
608
  "edsl_class_name": "Cache" # if add_edsl_version=True
600
609
  }
601
-
610
+
602
611
  Technical Notes:
603
612
  - Used by from_dict for deserialization
604
613
  - Used by __hash__ for cache comparison
@@ -635,6 +644,46 @@ class Cache(Base):
635
644
  def to_dataset(self):
636
645
  return self.to_scenario_list().to_dataset()
637
646
 
647
+ def _repr_html_(self):
648
+ """Generate an HTML representation for Jupyter notebooks.
649
+
650
+ This method is automatically called by Jupyter to render the object
651
+ as HTML in notebook cells. It handles empty caches gracefully.
652
+
653
+ Returns:
654
+ str: HTML representation of the object
655
+ """
656
+ # Get class name and documentation link
657
+ class_name = self.__class__.__name__
658
+ docs = getattr(self, "__documentation__", "")
659
+
660
+ # Create header with link to documentation
661
+ header = f"<a href='{docs}'>{class_name}</a>"
662
+
663
+ # Add summary if available
664
+ if hasattr(self, "_summary"):
665
+ summary_dict = self._summary()
666
+ summary_line = "".join([f" {k}: {v};" for k, v in summary_dict.items()])
667
+ header = f"<p>{header}{summary_line}</p>"
668
+ else:
669
+ header = f"<p>{header}</p>"
670
+
671
+ # Handle empty cache
672
+ if len(self.data) == 0:
673
+ return f"{header}<p><em>Empty cache</em></p>"
674
+
675
+ # For non-empty caches, render the table as usual
676
+ from edsl.dataset.display.table_display import TableDisplay
677
+
678
+ try:
679
+ return header + self.table()._repr_html_()
680
+ except Exception:
681
+ # Fallback if table() fails - display as dictionary
682
+ display_dict = {"entries": len(self.data)}
683
+ return (
684
+ header + TableDisplay.from_dictionary_wide(display_dict)._repr_html_()
685
+ )
686
+
638
687
  @classmethod
639
688
  @remove_edsl_version
640
689
  def from_dict(cls, data) -> Cache:
@@ -661,26 +710,26 @@ class Cache(Base):
661
710
 
662
711
  def __add__(self, other: "Cache"):
663
712
  """Combine this cache with another, updating in-place.
664
-
713
+
665
714
  This operator implements a set union operation between two caches, adding all
666
715
  entries from the other cache into this one. The operation modifies this cache
667
716
  in-place rather than creating a new one.
668
-
717
+
669
718
  Args:
670
719
  other: Another Cache object to merge into this one
671
-
720
+
672
721
  Returns:
673
722
  Cache: Self, with entries from other added
674
-
723
+
675
724
  Raises:
676
725
  CacheError: If the provided object is not a Cache instance
677
-
726
+
678
727
  Technical Notes:
679
728
  - Modifies this cache in-place (unlike __floordiv__ which returns a new cache)
680
729
  - If both caches have the same key, this cache's entry will be overwritten
681
730
  - Useful for merging caches from different sources
682
731
  - No special handling for conflicting entries - last one wins
683
-
732
+
684
733
  Examples:
685
734
  >>> from edsl.caching import CacheEntry
686
735
  >>> ce1 = CacheEntry.example(randomize=True)
@@ -697,6 +746,26 @@ class Cache(Base):
697
746
  self.data.update(other.data)
698
747
  return self
699
748
 
749
+ def close(self):
750
+ """Explicitly close and clean up resources.
751
+
752
+ This method properly disposes of any SQLAlchemy engines and
753
+ connections to prevent memory leaks.
754
+ """
755
+ # Clean up SQLiteDict resources if present
756
+ if not isinstance(self.data, dict):
757
+ # Handle SQLiteDict or other database-backed storage
758
+ if hasattr(self.data, "engine") and self.data.engine:
759
+ self.data.engine.dispose()
760
+
761
+ def __del__(self):
762
+ """Destructor for proper resource cleanup.
763
+
764
+ Ensures SQLAlchemy connections are properly closed when the Cache
765
+ object is garbage collected.
766
+ """
767
+ self.close()
768
+
700
769
  def __repr__(self):
701
770
  """
702
771
  Return a string representation of the Cache object.
@@ -773,23 +842,23 @@ class Cache(Base):
773
842
  @classmethod
774
843
  def example(cls, randomize: bool = False) -> Cache:
775
844
  """Create an example Cache instance for testing and demonstration.
776
-
845
+
777
846
  Creates a Cache object pre-populated with example CacheEntry objects.
778
847
  This method is useful for documentation, testing, and demonstration purposes.
779
-
848
+
780
849
  Args:
781
850
  randomize: If True, creates CacheEntry objects with randomized content
782
851
  for uniqueness. If False, uses consistent example entries.
783
-
852
+
784
853
  Returns:
785
854
  Cache: A new Cache object containing example CacheEntry objects
786
-
855
+
787
856
  Technical Notes:
788
857
  - Uses CacheEntry.example() to create sample entries
789
858
  - When randomize=True, generates unique keys for each call
790
859
  - When randomize=False, produces consistent examples for doctests
791
860
  - Creates an in-memory cache (no persistent file)
792
-
861
+
793
862
  Examples:
794
863
  >>> cache = Cache.example()
795
864
  >>> len(cache) > 0
@@ -797,7 +866,7 @@ class Cache(Base):
797
866
  >>> from edsl.caching.cache_entry import CacheEntry
798
867
  >>> all(isinstance(entry, CacheEntry) for entry in cache.values())
799
868
  True
800
-
869
+
801
870
  >>> # Create examples with randomized content
802
871
  >>> cache1 = Cache.example(randomize=True)
803
872
  >>> cache2 = Cache.example(randomize=True)