edsl 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. edsl/__init__.py +124 -53
  2. edsl/__version__.py +1 -1
  3. edsl/agents/agent.py +21 -21
  4. edsl/agents/agent_list.py +2 -5
  5. edsl/agents/exceptions.py +119 -5
  6. edsl/base/__init__.py +10 -35
  7. edsl/base/base_class.py +71 -36
  8. edsl/base/base_exception.py +204 -0
  9. edsl/base/data_transfer_models.py +1 -1
  10. edsl/base/exceptions.py +94 -0
  11. edsl/buckets/__init__.py +15 -1
  12. edsl/buckets/bucket_collection.py +3 -4
  13. edsl/buckets/exceptions.py +107 -0
  14. edsl/buckets/model_buckets.py +1 -2
  15. edsl/buckets/token_bucket.py +11 -6
  16. edsl/buckets/token_bucket_api.py +27 -12
  17. edsl/buckets/token_bucket_client.py +9 -7
  18. edsl/caching/cache.py +12 -4
  19. edsl/caching/cache_entry.py +10 -9
  20. edsl/caching/exceptions.py +113 -7
  21. edsl/caching/remote_cache_sync.py +6 -7
  22. edsl/caching/sql_dict.py +20 -14
  23. edsl/cli.py +43 -0
  24. edsl/config/__init__.py +1 -1
  25. edsl/config/config_class.py +32 -6
  26. edsl/conversation/Conversation.py +8 -4
  27. edsl/conversation/car_buying.py +1 -3
  28. edsl/conversation/exceptions.py +58 -0
  29. edsl/conversation/mug_negotiation.py +2 -8
  30. edsl/coop/__init__.py +28 -6
  31. edsl/coop/coop.py +120 -29
  32. edsl/coop/coop_functions.py +1 -1
  33. edsl/coop/ep_key_handling.py +1 -1
  34. edsl/coop/exceptions.py +188 -9
  35. edsl/coop/price_fetcher.py +5 -8
  36. edsl/coop/utils.py +4 -6
  37. edsl/dataset/__init__.py +5 -4
  38. edsl/dataset/dataset.py +177 -86
  39. edsl/dataset/dataset_operations_mixin.py +98 -76
  40. edsl/dataset/dataset_tree.py +11 -7
  41. edsl/dataset/display/table_display.py +0 -2
  42. edsl/dataset/display/table_renderers.py +6 -4
  43. edsl/dataset/exceptions.py +125 -0
  44. edsl/dataset/file_exports.py +18 -11
  45. edsl/dataset/r/ggplot.py +13 -6
  46. edsl/display/__init__.py +27 -0
  47. edsl/display/core.py +147 -0
  48. edsl/display/plugin.py +189 -0
  49. edsl/display/utils.py +52 -0
  50. edsl/inference_services/__init__.py +9 -1
  51. edsl/inference_services/available_model_cache_handler.py +1 -1
  52. edsl/inference_services/available_model_fetcher.py +5 -6
  53. edsl/inference_services/data_structures.py +10 -7
  54. edsl/inference_services/exceptions.py +132 -1
  55. edsl/inference_services/inference_service_abc.py +2 -2
  56. edsl/inference_services/inference_services_collection.py +2 -6
  57. edsl/inference_services/registry.py +4 -3
  58. edsl/inference_services/service_availability.py +4 -3
  59. edsl/inference_services/services/anthropic_service.py +4 -1
  60. edsl/inference_services/services/aws_bedrock.py +13 -12
  61. edsl/inference_services/services/azure_ai.py +12 -10
  62. edsl/inference_services/services/deep_infra_service.py +1 -4
  63. edsl/inference_services/services/deep_seek_service.py +1 -5
  64. edsl/inference_services/services/google_service.py +7 -3
  65. edsl/inference_services/services/groq_service.py +1 -1
  66. edsl/inference_services/services/mistral_ai_service.py +4 -2
  67. edsl/inference_services/services/ollama_service.py +1 -1
  68. edsl/inference_services/services/open_ai_service.py +7 -5
  69. edsl/inference_services/services/perplexity_service.py +6 -2
  70. edsl/inference_services/services/test_service.py +8 -7
  71. edsl/inference_services/services/together_ai_service.py +2 -3
  72. edsl/inference_services/services/xai_service.py +1 -1
  73. edsl/instructions/__init__.py +1 -1
  74. edsl/instructions/change_instruction.py +7 -5
  75. edsl/instructions/exceptions.py +61 -0
  76. edsl/instructions/instruction.py +6 -2
  77. edsl/instructions/instruction_collection.py +6 -4
  78. edsl/instructions/instruction_handler.py +12 -15
  79. edsl/interviews/ReportErrors.py +0 -3
  80. edsl/interviews/__init__.py +9 -2
  81. edsl/interviews/answering_function.py +11 -13
  82. edsl/interviews/exception_tracking.py +15 -8
  83. edsl/interviews/exceptions.py +79 -0
  84. edsl/interviews/interview.py +33 -30
  85. edsl/interviews/interview_status_dictionary.py +4 -2
  86. edsl/interviews/interview_status_log.py +2 -1
  87. edsl/interviews/interview_task_manager.py +5 -5
  88. edsl/interviews/request_token_estimator.py +5 -2
  89. edsl/interviews/statistics.py +3 -4
  90. edsl/invigilators/__init__.py +7 -1
  91. edsl/invigilators/exceptions.py +79 -0
  92. edsl/invigilators/invigilator_base.py +0 -1
  93. edsl/invigilators/invigilators.py +9 -13
  94. edsl/invigilators/prompt_constructor.py +1 -5
  95. edsl/invigilators/prompt_helpers.py +8 -4
  96. edsl/invigilators/question_instructions_prompt_builder.py +1 -1
  97. edsl/invigilators/question_option_processor.py +9 -5
  98. edsl/invigilators/question_template_replacements_builder.py +3 -2
  99. edsl/jobs/__init__.py +42 -5
  100. edsl/jobs/async_interview_runner.py +25 -23
  101. edsl/jobs/check_survey_scenario_compatibility.py +11 -10
  102. edsl/jobs/data_structures.py +8 -5
  103. edsl/jobs/exceptions.py +177 -8
  104. edsl/jobs/fetch_invigilator.py +1 -1
  105. edsl/jobs/jobs.py +74 -69
  106. edsl/jobs/jobs_checks.py +6 -7
  107. edsl/jobs/jobs_component_constructor.py +4 -4
  108. edsl/jobs/jobs_pricing_estimation.py +4 -3
  109. edsl/jobs/jobs_remote_inference_logger.py +5 -4
  110. edsl/jobs/jobs_runner_asyncio.py +3 -4
  111. edsl/jobs/jobs_runner_status.py +8 -9
  112. edsl/jobs/remote_inference.py +27 -24
  113. edsl/jobs/results_exceptions_handler.py +10 -7
  114. edsl/key_management/__init__.py +3 -1
  115. edsl/key_management/exceptions.py +62 -0
  116. edsl/key_management/key_lookup.py +1 -1
  117. edsl/key_management/key_lookup_builder.py +37 -14
  118. edsl/key_management/key_lookup_collection.py +2 -0
  119. edsl/language_models/__init__.py +1 -1
  120. edsl/language_models/exceptions.py +302 -14
  121. edsl/language_models/language_model.py +9 -8
  122. edsl/language_models/model.py +4 -4
  123. edsl/language_models/model_list.py +1 -1
  124. edsl/language_models/price_manager.py +1 -1
  125. edsl/language_models/raw_response_handler.py +14 -9
  126. edsl/language_models/registry.py +17 -21
  127. edsl/language_models/repair.py +0 -6
  128. edsl/language_models/unused/fake_openai_service.py +0 -1
  129. edsl/load_plugins.py +69 -0
  130. edsl/logger.py +146 -0
  131. edsl/notebooks/__init__.py +24 -1
  132. edsl/notebooks/exceptions.py +82 -0
  133. edsl/notebooks/notebook.py +7 -3
  134. edsl/notebooks/notebook_to_latex.py +1 -2
  135. edsl/plugins/__init__.py +63 -0
  136. edsl/plugins/built_in/export_example.py +50 -0
  137. edsl/plugins/built_in/pig_latin.py +67 -0
  138. edsl/plugins/cli.py +372 -0
  139. edsl/plugins/cli_typer.py +283 -0
  140. edsl/plugins/exceptions.py +31 -0
  141. edsl/plugins/hookspec.py +51 -0
  142. edsl/plugins/plugin_host.py +128 -0
  143. edsl/plugins/plugin_manager.py +633 -0
  144. edsl/plugins/plugins_registry.py +168 -0
  145. edsl/prompts/__init__.py +24 -1
  146. edsl/prompts/exceptions.py +107 -5
  147. edsl/prompts/prompt.py +15 -7
  148. edsl/questions/HTMLQuestion.py +5 -11
  149. edsl/questions/Quick.py +0 -1
  150. edsl/questions/__init__.py +6 -4
  151. edsl/questions/answer_validator_mixin.py +318 -323
  152. edsl/questions/compose_questions.py +3 -3
  153. edsl/questions/descriptors.py +11 -50
  154. edsl/questions/exceptions.py +278 -22
  155. edsl/questions/loop_processor.py +7 -5
  156. edsl/questions/prompt_templates/question_list.jinja +3 -0
  157. edsl/questions/question_base.py +46 -19
  158. edsl/questions/question_base_gen_mixin.py +2 -2
  159. edsl/questions/question_base_prompts_mixin.py +13 -7
  160. edsl/questions/question_budget.py +503 -98
  161. edsl/questions/question_check_box.py +660 -160
  162. edsl/questions/question_dict.py +345 -194
  163. edsl/questions/question_extract.py +401 -61
  164. edsl/questions/question_free_text.py +80 -14
  165. edsl/questions/question_functional.py +119 -9
  166. edsl/questions/{derived/question_likert_five.py → question_likert_five.py} +2 -2
  167. edsl/questions/{derived/question_linear_scale.py → question_linear_scale.py} +3 -4
  168. edsl/questions/question_list.py +275 -28
  169. edsl/questions/question_matrix.py +643 -96
  170. edsl/questions/question_multiple_choice.py +219 -51
  171. edsl/questions/question_numerical.py +361 -32
  172. edsl/questions/question_rank.py +401 -124
  173. edsl/questions/question_registry.py +7 -5
  174. edsl/questions/{derived/question_top_k.py → question_top_k.py} +3 -3
  175. edsl/questions/{derived/question_yes_no.py → question_yes_no.py} +3 -4
  176. edsl/questions/register_questions_meta.py +2 -2
  177. edsl/questions/response_validator_abc.py +13 -15
  178. edsl/questions/response_validator_factory.py +10 -12
  179. edsl/questions/templates/dict/answering_instructions.jinja +1 -0
  180. edsl/questions/templates/rank/question_presentation.jinja +1 -1
  181. edsl/results/__init__.py +1 -1
  182. edsl/results/exceptions.py +141 -7
  183. edsl/results/report.py +1 -2
  184. edsl/results/result.py +11 -9
  185. edsl/results/results.py +480 -321
  186. edsl/results/results_selector.py +8 -4
  187. edsl/scenarios/PdfExtractor.py +2 -2
  188. edsl/scenarios/construct_download_link.py +69 -35
  189. edsl/scenarios/directory_scanner.py +33 -14
  190. edsl/scenarios/document_chunker.py +1 -1
  191. edsl/scenarios/exceptions.py +238 -14
  192. edsl/scenarios/file_methods.py +1 -1
  193. edsl/scenarios/file_store.py +7 -3
  194. edsl/scenarios/handlers/__init__.py +17 -0
  195. edsl/scenarios/handlers/docx_file_store.py +0 -5
  196. edsl/scenarios/handlers/pdf_file_store.py +0 -1
  197. edsl/scenarios/handlers/pptx_file_store.py +0 -5
  198. edsl/scenarios/handlers/py_file_store.py +0 -1
  199. edsl/scenarios/handlers/sql_file_store.py +1 -4
  200. edsl/scenarios/handlers/sqlite_file_store.py +0 -1
  201. edsl/scenarios/handlers/txt_file_store.py +1 -1
  202. edsl/scenarios/scenario.py +1 -3
  203. edsl/scenarios/scenario_list.py +179 -27
  204. edsl/scenarios/scenario_list_pdf_tools.py +1 -0
  205. edsl/scenarios/scenario_selector.py +0 -1
  206. edsl/surveys/__init__.py +3 -4
  207. edsl/surveys/dag/__init__.py +4 -2
  208. edsl/surveys/descriptors.py +1 -1
  209. edsl/surveys/edit_survey.py +1 -0
  210. edsl/surveys/exceptions.py +165 -9
  211. edsl/surveys/memory/__init__.py +5 -3
  212. edsl/surveys/memory/memory_management.py +1 -0
  213. edsl/surveys/memory/memory_plan.py +6 -15
  214. edsl/surveys/rules/__init__.py +5 -3
  215. edsl/surveys/rules/rule.py +1 -2
  216. edsl/surveys/rules/rule_collection.py +1 -1
  217. edsl/surveys/survey.py +12 -24
  218. edsl/surveys/survey_css.py +3 -3
  219. edsl/surveys/survey_export.py +6 -3
  220. edsl/surveys/survey_flow_visualization.py +10 -1
  221. edsl/surveys/survey_simulator.py +2 -1
  222. edsl/tasks/__init__.py +23 -1
  223. edsl/tasks/exceptions.py +72 -0
  224. edsl/tasks/question_task_creator.py +3 -3
  225. edsl/tasks/task_creators.py +1 -3
  226. edsl/tasks/task_history.py +8 -10
  227. edsl/tasks/task_status_log.py +1 -2
  228. edsl/tokens/__init__.py +29 -1
  229. edsl/tokens/exceptions.py +37 -0
  230. edsl/tokens/interview_token_usage.py +3 -2
  231. edsl/tokens/token_usage.py +4 -3
  232. edsl/utilities/__init__.py +21 -1
  233. edsl/utilities/decorators.py +1 -2
  234. edsl/utilities/markdown_to_docx.py +2 -2
  235. edsl/utilities/markdown_to_pdf.py +1 -1
  236. edsl/utilities/repair_functions.py +0 -1
  237. edsl/utilities/restricted_python.py +0 -1
  238. edsl/utilities/template_loader.py +2 -3
  239. edsl/utilities/utilities.py +8 -29
  240. {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/METADATA +32 -2
  241. edsl-0.1.51.dist-info/RECORD +365 -0
  242. edsl-0.1.51.dist-info/entry_points.txt +3 -0
  243. edsl/dataset/smart_objects.py +0 -96
  244. edsl/exceptions/BaseException.py +0 -21
  245. edsl/exceptions/__init__.py +0 -54
  246. edsl/exceptions/configuration.py +0 -16
  247. edsl/exceptions/general.py +0 -34
  248. edsl/questions/derived/__init__.py +0 -0
  249. edsl/study/ObjectEntry.py +0 -173
  250. edsl/study/ProofOfWork.py +0 -113
  251. edsl/study/SnapShot.py +0 -80
  252. edsl/study/Study.py +0 -520
  253. edsl/study/__init__.py +0 -6
  254. edsl/utilities/interface.py +0 -135
  255. edsl-0.1.49.dist-info/RECORD +0 -347
  256. {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/LICENSE +0 -0
  257. {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/WHEEL +0 -0
@@ -12,16 +12,18 @@ ScenarioList, AgentList) to share the same data manipulation interface, enabling
12
12
  fluid operations across different parts of the EDSL ecosystem.
13
13
  """
14
14
 
15
- from abc import ABC, abstractmethod
16
15
  import io
17
16
  import warnings
18
17
  import textwrap
19
- from typing import Optional, Tuple, Union, List, TYPE_CHECKING
18
+ from typing import Optional, Tuple, Union, List, TYPE_CHECKING # Callable not used
19
+ from functools import wraps
20
20
  from .r.ggplot import GGPlotMethod
21
+ from .exceptions import DatasetKeyError, DatasetValueError, DatasetTypeError, DatasetExportError
21
22
 
22
23
  if TYPE_CHECKING:
23
24
  from docx import Document
24
25
  from .dataset import Dataset
26
+ from ..jobs import Job # noqa: F401
25
27
 
26
28
  class DataOperationsBase:
27
29
  """
@@ -135,10 +137,7 @@ class DataOperationsBase:
135
137
  >>> sorted(Results.example().select().relevant_columns(data_type = "model"))
136
138
  ['model.frequency_penalty', ...]
137
139
 
138
- >>> Results.example().relevant_columns(data_type = "flimflam")
139
- Traceback (most recent call last):
140
- ...
141
- ValueError: No columns found for data type: flimflam. Available data types are: ...
140
+ >>> # Testing relevant_columns with invalid data_type raises DatasetValueError - tested in unit tests
142
141
  """
143
142
  columns = [list(x.keys())[0] for x in self]
144
143
  if remove_prefix:
@@ -159,7 +158,7 @@ class DataOperationsBase:
159
158
  all_data_types = sorted(
160
159
  list(set(get_data_type(column) for column in all_columns))
161
160
  )
162
- raise ValueError(
161
+ raise DatasetValueError(
163
162
  f"No columns found for data type: {data_type}. Available data types are: {all_data_types}."
164
163
  )
165
164
 
@@ -179,12 +178,19 @@ class DataOperationsBase:
179
178
  _num_observations = len(values)
180
179
  else:
181
180
  if len(values) != _num_observations:
182
- raise ValueError(
181
+ raise DatasetValueError(
183
182
  f"The number of observations is not consistent across columns. "
184
183
  f"Column '{key}' has {len(values)} observations, but previous columns had {_num_observations} observations."
185
184
  )
186
185
 
187
186
  return _num_observations
187
+
188
+ def chart(self):
189
+ """
190
+ Create a chart from the results.
191
+ """
192
+ import altair as alt
193
+ return alt.Chart(self.to_pandas(remove_prefix=True))
188
194
 
189
195
  def make_tabular(
190
196
  self, remove_prefix: bool, pretty_labels: Optional[dict] = None
@@ -262,8 +268,9 @@ class DataOperationsBase:
262
268
  remove_prefix=remove_prefix, pretty_labels=pretty_labels
263
269
  )
264
270
 
265
- def to_jsonl(self, filename: Optional[str] = None) -> Optional["FileStore"]:
271
+ def to_jsonl(self, filename: Optional[str] = None):
266
272
  """Export the results to a FileStore instance containing JSONL data."""
273
+ from .file_exports import JSONLExport
267
274
  exporter = JSONLExport(data=self, filename=filename)
268
275
  return exporter.export()
269
276
 
@@ -274,8 +281,9 @@ class DataOperationsBase:
274
281
  pretty_labels: Optional[dict] = None,
275
282
  table_name: str = "results",
276
283
  if_exists: str = "replace",
277
- ) -> Optional["FileStore"]:
284
+ ):
278
285
  """Export the results to a SQLite database file."""
286
+ from .file_exports import SQLiteExport
279
287
  exporter = SQLiteExport(
280
288
  data=self,
281
289
  filename=filename,
@@ -291,7 +299,7 @@ class DataOperationsBase:
291
299
  filename: Optional[str] = None,
292
300
  remove_prefix: bool = False,
293
301
  pretty_labels: Optional[dict] = None,
294
- ) -> Optional["FileStore"]:
302
+ ):
295
303
  """Export the results to a FileStore instance containing CSV data."""
296
304
  from .file_exports import CSVExport
297
305
 
@@ -309,9 +317,9 @@ class DataOperationsBase:
309
317
  remove_prefix: bool = False,
310
318
  pretty_labels: Optional[dict] = None,
311
319
  sheet_name: Optional[str] = None,
312
- ) -> Optional["FileStore"]:
320
+ ):
313
321
  """Export the results to a FileStore instance containing Excel data."""
314
- from .file_exports import ExcelExport
322
+ from .file_exports import ExcelExport
315
323
 
316
324
  exporter = ExcelExport(
317
325
  data=self,
@@ -324,25 +332,28 @@ class DataOperationsBase:
324
332
 
325
333
  def _db(
326
334
  self, remove_prefix: bool = True, shape: str = "wide"
327
- ) -> "sqlalchemy.engine.Engine":
335
+ ):
328
336
  """Create a SQLite database in memory and return the connection.
329
337
 
330
338
  Args:
331
339
  remove_prefix: Whether to remove the prefix from the column names
332
340
  shape: The shape of the data in the database ("wide" or "long")
333
-
341
+
334
342
  Returns:
335
343
  A database connection
336
- >>> from sqlalchemy import text
337
- >>> from edsl import Results
338
- >>> engine = Results.example()._db()
339
- >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
340
- 4
341
- >>> engine = Results.example()._db(shape = "long")
342
- >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
343
- 172
344
+
345
+ Examples:
346
+ >>> from sqlalchemy import text
347
+ >>> from edsl import Results
348
+ >>> engine = Results.example()._db()
349
+ >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
350
+ 4
351
+ >>> engine = Results.example()._db(shape = "long")
352
+ >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
353
+ 172
344
354
  """
345
- from sqlalchemy import create_engine, text
355
+ # Import needed for database connection
356
+ from sqlalchemy import create_engine
346
357
 
347
358
  engine = create_engine("sqlite:///:memory:")
348
359
  if remove_prefix and shape == "wide":
@@ -445,29 +456,35 @@ class DataOperationsBase:
445
456
 
446
457
  def to_pandas(
447
458
  self, remove_prefix: bool = False, lists_as_strings=False
448
- ) -> "DataFrame":
459
+ ):
449
460
  """Convert the results to a pandas DataFrame, ensuring that lists remain as lists.
450
461
 
451
- :param remove_prefix: Whether to remove the prefix from the column names.
452
-
462
+ Args:
463
+ remove_prefix: Whether to remove the prefix from the column names.
464
+ lists_as_strings: Whether to convert lists to strings.
465
+
466
+ Returns:
467
+ A pandas DataFrame.
453
468
  """
469
+ # pandas is imported in _to_pandas_strings
454
470
  return self._to_pandas_strings(remove_prefix)
455
471
 
456
- def _to_pandas_strings(self, remove_prefix: bool = False) -> "pd.DataFrame":
472
+ def _to_pandas_strings(self, remove_prefix: bool = False):
457
473
  """Convert the results to a pandas DataFrame.
458
474
 
459
- :param remove_prefix: Whether to remove the prefix from the column names.
475
+ Args:
476
+ remove_prefix: Whether to remove the prefix from the column names.
460
477
 
461
- >>> from edsl.results import Results
462
- >>> r = Results.example()
463
- >>> r.select('how_feeling').to_pandas()
464
- answer.how_feeling
465
- 0 OK
466
- 1 Great
467
- 2 Terrible
468
- 3 OK
478
+ Examples:
479
+ >>> from edsl.results import Results
480
+ >>> r = Results.example()
481
+ >>> r.select('how_feeling').to_pandas()
482
+ answer.how_feeling
483
+ 0 OK
484
+ 1 Great
485
+ 2 Terrible
486
+ 3 OK
469
487
  """
470
-
471
488
  import pandas as pd
472
489
 
473
490
  csv_string = self.to_csv(remove_prefix=remove_prefix).text
@@ -478,17 +495,27 @@ class DataOperationsBase:
478
495
 
479
496
  def to_polars(
480
497
  self, remove_prefix: bool = False, lists_as_strings=False
481
- ) -> "pl.DataFrame":
498
+ ):
482
499
  """Convert the results to a Polars DataFrame.
483
500
 
484
- :param remove_prefix: Whether to remove the prefix from the column names.
501
+ Args:
502
+ remove_prefix: Whether to remove the prefix from the column names.
503
+ lists_as_strings: Whether to convert lists to strings.
504
+
505
+ Returns:
506
+ A Polars DataFrame.
485
507
  """
508
+ # polars is imported in _to_polars_strings
486
509
  return self._to_polars_strings(remove_prefix)
487
510
 
488
- def _to_polars_strings(self, remove_prefix: bool = False) -> "pl.DataFrame":
511
+ def _to_polars_strings(self, remove_prefix: bool = False):
489
512
  """Convert the results to a Polars DataFrame.
490
513
 
491
- :param remove_prefix: Whether to remove the prefix from the column names.
514
+ Args:
515
+ remove_prefix: Whether to remove the prefix from the column names.
516
+
517
+ Returns:
518
+ A Polars DataFrame.
492
519
  """
493
520
  import polars as pl
494
521
 
@@ -496,10 +523,14 @@ class DataOperationsBase:
496
523
  df = pl.read_csv(io.StringIO(csv_string))
497
524
  return df
498
525
 
499
- def tree(self, node_order: Optional[List[str]] = None) -> "Tree":
526
+ def tree(self, node_order: Optional[List[str]] = None):
500
527
  """Convert the results to a Tree.
501
528
 
502
- :param node_order: The order of the nodes.
529
+ Args:
530
+ node_order: The order of the nodes.
531
+
532
+ Returns:
533
+ A Tree object.
503
534
  """
504
535
  from .dataset_tree import Tree
505
536
  return Tree(self, node_order=node_order)
@@ -514,13 +545,14 @@ class DataOperationsBase:
514
545
  >>> r.select('how_feeling').to_scenario_list()
515
546
  ScenarioList([Scenario({'how_feeling': 'OK'}), Scenario({'how_feeling': 'Great'}), Scenario({'how_feeling': 'Terrible'}), Scenario({'how_feeling': 'OK'})])
516
547
  """
517
- from edsl.scenarios import ScenarioList, Scenario
548
+ from ..scenarios import ScenarioList, Scenario
518
549
 
519
550
  list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
520
551
  scenarios = []
521
552
  for d in list_of_dicts:
522
553
  scenarios.append(Scenario(d))
523
554
  return ScenarioList(scenarios)
555
+
524
556
 
525
557
  def to_agent_list(self, remove_prefix: bool = True):
526
558
  """Convert the results to a list of dictionaries, one per agent.
@@ -532,7 +564,7 @@ class DataOperationsBase:
532
564
  >>> r.select('how_feeling').to_agent_list()
533
565
  AgentList([Agent(traits = {'how_feeling': 'OK'}), Agent(traits = {'how_feeling': 'Great'}), Agent(traits = {'how_feeling': 'Terrible'}), Agent(traits = {'how_feeling': 'OK'})])
534
566
  """
535
- from edsl.agents import Agent, AgentList
567
+ from ..agents import Agent, AgentList
536
568
 
537
569
  list_of_dicts = self.to_dicts(remove_prefix=remove_prefix)
538
570
  agents = []
@@ -598,15 +630,12 @@ class DataOperationsBase:
598
630
  [1, 9, 2, 3, 4]
599
631
 
600
632
  >>> from edsl.dataset import Dataset
601
- >>> Dataset([{'a.b': [[1, 9], 2, 3, 4]}, {'c': [6, 2, 3, 4]}]).select('a.b', 'c').to_list(flatten = True)
602
- Traceback (most recent call last):
603
- ...
604
- ValueError: Cannot flatten a list of lists when there are multiple columns selected.
633
+ >>> # Testing to_list flatten with multiple columns raises DatasetValueError - tested in unit tests
605
634
 
606
635
 
607
636
  """
608
637
  if len(self.relevant_columns()) > 1 and flatten:
609
- raise ValueError(
638
+ raise DatasetValueError(
610
639
  "Cannot flatten a list of lists when there are multiple columns selected."
611
640
  )
612
641
 
@@ -632,7 +661,6 @@ class DataOperationsBase:
632
661
  new_list.append(item)
633
662
  list_to_return = new_list
634
663
 
635
- from edsl.utilities.PrettyList import PrettyList
636
664
 
637
665
  #return PrettyList(list_to_return)
638
666
  return list_to_return
@@ -645,9 +673,8 @@ class DataOperationsBase:
645
673
  ):
646
674
  import os
647
675
  import tempfile
648
- from edsl.utilities.utilities import is_notebook
676
+ from ..utilities.utilities import is_notebook
649
677
  from IPython.display import HTML, display
650
- from edsl.utilities.utilities import is_notebook
651
678
 
652
679
  df = self.to_pandas()
653
680
 
@@ -698,7 +725,7 @@ class DataOperationsBase:
698
725
  all_fields = list(fields) + [f for f in header_fields if f not in fields]
699
726
  for field in all_fields:
700
727
  if field not in self.relevant_columns():
701
- raise ValueError(f"Field '{field}' not found in dataset")
728
+ raise DatasetKeyError(f"Field '{field}' not found in dataset")
702
729
 
703
730
  # Get data for each field
704
731
  field_data = {}
@@ -780,7 +807,8 @@ class DataOperationsBase:
780
807
  from docx.shared import Pt
781
808
  import json
782
809
  except ImportError:
783
- raise ImportError("The python-docx package is required for DOCX export. Install it with 'pip install python-docx'.")
810
+ from .exceptions import DatasetImportError
811
+ raise DatasetImportError("The python-docx package is required for DOCX export. Install it with 'pip install python-docx'.")
784
812
 
785
813
  doc = Document()
786
814
 
@@ -797,7 +825,7 @@ class DataOperationsBase:
797
825
  if header_parts:
798
826
  header_text += f" ({', '.join(header_parts)})"
799
827
 
800
- heading = doc.add_heading(header_text, level=1)
828
+ doc.add_heading(header_text, level=1)
801
829
 
802
830
  # Add the remaining fields
803
831
  for field in fields:
@@ -823,7 +851,7 @@ class DataOperationsBase:
823
851
  def report(self, *fields: Optional[str], top_n: Optional[int] = None,
824
852
  header_fields: Optional[List[str]] = None, divider: bool = True,
825
853
  return_string: bool = False, format: str = "markdown",
826
- filename: Optional[str] = None) -> Optional[Union[str, "docx.Document"]]:
854
+ filename: Optional[str] = None) -> Optional[Union[str, "Document"]]:
827
855
  """Generates a report of the results by iterating through rows.
828
856
 
829
857
  Args:
@@ -851,7 +879,7 @@ class DataOperationsBase:
851
879
  >>> isinstance(doc, object)
852
880
  True
853
881
  """
854
- from edsl.utilities.utilities import is_notebook
882
+ from ..utilities.utilities import is_notebook
855
883
 
856
884
  # Prepare the data for the report
857
885
  field_data, num_obs, fields, header_fields = self._prepare_report_data(
@@ -886,7 +914,7 @@ class DataOperationsBase:
886
914
  return doc
887
915
 
888
916
  else:
889
- raise ValueError(f"Unsupported format: {format}. Use 'markdown' or 'docx'.")
917
+ raise DatasetExportError(f"Unsupported format: {format}. Use 'markdown' or 'docx'.")
890
918
 
891
919
  def tally(
892
920
  self, *fields: Optional[str], top_n: Optional[int] = None, output="Dataset"
@@ -945,7 +973,7 @@ class DataOperationsBase:
945
973
  f in self.relevant_columns() or f in relevant_columns_without_prefix
946
974
  for f in fields
947
975
  ):
948
- raise ValueError("One or more specified fields are not in the dataset."
976
+ raise DatasetKeyError("One or more specified fields are not in the dataset."
949
977
  f"The available fields are: {self.relevant_columns()}"
950
978
  )
951
979
 
@@ -963,7 +991,7 @@ class DataOperationsBase:
963
991
  except TypeError:
964
992
  tally = dict(Counter([str(v) for v in values]))
965
993
  except Exception as e:
966
- raise ValueError(f"Error tallying values: {e}")
994
+ raise DatasetValueError(f"Error tallying values: {e}")
967
995
 
968
996
  sorted_tally = dict(sorted(tally.items(), key=lambda item: -item[1]))
969
997
  if top_n is not None:
@@ -1056,7 +1084,8 @@ class DataOperationsBase:
1056
1084
  # Check if the field is ambiguous
1057
1085
  if len(matching_entries) > 1:
1058
1086
  matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
1059
- raise ValueError(
1087
+ from .exceptions import DatasetValueError
1088
+ raise DatasetValueError(
1060
1089
  f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
1061
1090
  f"Please specify the full column name to flatten."
1062
1091
  )
@@ -1159,13 +1188,13 @@ class DataOperationsBase:
1159
1188
  break
1160
1189
 
1161
1190
  if field_index is None:
1162
- raise ValueError(f"Field '{field}' not found in dataset")
1191
+ raise DatasetKeyError(f"Field '{field}' not found in dataset")
1163
1192
 
1164
1193
  field_data = result.data[field_index][field]
1165
1194
 
1166
1195
  # Check if values are lists
1167
1196
  if not all(isinstance(v, list) for v in field_data):
1168
- raise ValueError(f"Field '{field}' does not contain lists in all entries")
1197
+ raise DatasetTypeError(f"Field '{field}' does not contain lists in all entries")
1169
1198
 
1170
1199
  # Get the maximum length of lists
1171
1200
  max_len = max(len(v) for v in field_data)
@@ -1209,16 +1238,13 @@ class DataOperationsBase:
1209
1238
  >>> d.drop('a')
1210
1239
  Dataset([{'b': [4, 5, 6]}])
1211
1240
 
1212
- >>> d.drop('c')
1213
- Traceback (most recent call last):
1214
- ...
1215
- KeyError: "Field 'c' not found in dataset"
1241
+ >>> # Testing drop with nonexistent field raises DatasetKeyError - tested in unit tests
1216
1242
  """
1217
1243
  from .dataset import Dataset
1218
1244
 
1219
1245
  # Check if field exists in the dataset
1220
1246
  if field_name not in self.relevant_columns():
1221
- raise KeyError(f"Field '{field_name}' not found in dataset")
1247
+ raise DatasetKeyError(f"Field '{field_name}' not found in dataset")
1222
1248
 
1223
1249
  # Create a new dataset without the specified field
1224
1250
  new_data = [entry for entry in self.data if field_name not in entry]
@@ -1248,9 +1274,7 @@ class DataOperationsBase:
1248
1274
  >>> d = Dataset([{'a.x': [1, 2, 3]}, {'b.x': [4, 5, 6]}])
1249
1275
  >>> # d.remove_prefix()
1250
1276
 
1251
- Traceback (most recent call last):
1252
- ...
1253
- ValueError: Removing prefixes would result in duplicate column names: ['x']
1277
+ # Testing remove_prefix with duplicate column names raises DatasetValueError - tested in unit tests
1254
1278
  """
1255
1279
  from .dataset import Dataset
1256
1280
 
@@ -1273,7 +1297,7 @@ class DataOperationsBase:
1273
1297
 
1274
1298
  # Check for duplicates
1275
1299
  if duplicates:
1276
- raise ValueError(f"Removing prefixes would result in duplicate column names: {sorted(list(duplicates))}")
1300
+ raise DatasetValueError(f"Removing prefixes would result in duplicate column names: {sorted(list(duplicates))}")
1277
1301
 
1278
1302
  # Create a new dataset with unprefixed column names
1279
1303
  new_data = []
@@ -1288,8 +1312,6 @@ class DataOperationsBase:
1288
1312
  return Dataset(new_data)
1289
1313
 
1290
1314
 
1291
- from functools import wraps
1292
-
1293
1315
  def to_dataset(func):
1294
1316
  """
1295
1317
  Decorator that ensures functions receive a Dataset object as their first argument.
@@ -1,4 +1,7 @@
1
- from typing import Dict, List, Any, Optional, List
1
+ from typing import Optional, List, TYPE_CHECKING
2
+
3
+ if TYPE_CHECKING:
4
+ from .dataset import Dataset
2
5
 
3
6
 
4
7
  def is_hashable(v):
@@ -16,8 +19,10 @@ class TreeNode:
16
19
  self.children = {}
17
20
 
18
21
 
22
+
19
23
  class Tree:
20
24
  def __init__(self, data: "Dataset", node_order: Optional[List[str]] = None):
25
+ """Initialize the tree with a Dataset."""
21
26
  d = {}
22
27
  for entry in data:
23
28
  d.update(entry)
@@ -46,7 +51,8 @@ class Tree:
46
51
  else:
47
52
  if not set(node_order).issubset(set(self.data.keys())):
48
53
  invalid_keys = set(node_order) - set(self.data.keys())
49
- raise ValueError(f"Invalid keys in node_order: {invalid_keys}")
54
+ from .exceptions import DatasetValueError
55
+ raise DatasetValueError(f"Invalid keys in node_order: {invalid_keys}")
50
56
 
51
57
  self.root = TreeNode()
52
58
 
@@ -95,8 +101,7 @@ class Tree:
95
101
  filename = "tree_structure.docx"
96
102
 
97
103
  from docx import Document
98
- from docx.shared import Inches, Pt
99
- from docx.enum.text import WD_ALIGN_PARAGRAPH
104
+ from docx.shared import Pt
100
105
  from docx.enum.style import WD_STYLE_TYPE
101
106
 
102
107
  doc = Document()
@@ -118,7 +123,6 @@ class Tree:
118
123
  self._add_to_docx(doc, self.root, 0)
119
124
  import base64
120
125
  from io import BytesIO
121
- import base64
122
126
 
123
127
  # Save document to bytes buffer
124
128
  doc_buffer = BytesIO()
@@ -126,7 +130,7 @@ class Tree:
126
130
  doc_buffer.seek(0)
127
131
 
128
132
  base64_string = base64.b64encode(doc_buffer.getvalue()).decode("utf-8")
129
- from edsl.scenarios.FileStore import FileStore
133
+ from ..scenarios.file_store import FileStore
130
134
 
131
135
  # Create and return FileStore instance
132
136
  return FileStore(
@@ -331,7 +335,7 @@ class Tree:
331
335
  Returns:
332
336
  A string containing the markdown document, or renders markdown in notebooks.
333
337
  """
334
- from edsl.utilities.utilities import is_notebook
338
+ from ..utilities.utilities import is_notebook
335
339
  from IPython.display import Markdown, display
336
340
 
337
341
  if node is None:
@@ -1,7 +1,5 @@
1
1
  from typing import (
2
2
  Protocol,
3
- List,
4
- Any,
5
3
  Optional,
6
4
  TYPE_CHECKING,
7
5
  Sequence,
@@ -1,5 +1,4 @@
1
1
  from abc import ABC, abstractmethod
2
- import os
3
2
  from pathlib import Path
4
3
  from .table_data_class import TableData
5
4
 
@@ -104,9 +103,12 @@ class PandasStyleRenderer(DataTablesRendererABC):
104
103
  else:
105
104
  df = pd.DataFrame(self.table_data.data, columns=self.table_data.headers)
106
105
 
107
- styled_df = df.style.set_properties(
108
- **{"text-align": "left"}
109
- ).background_gradient()
106
+ styled_df = df.style.set_properties(**{
107
+ "text-align": "left",
108
+ "white-space": "pre-wrap", # Allows text wrapping
109
+ "max-width": "300px", # Maximum width before wrapping
110
+ "word-wrap": "break-word" # Breaks words that exceed max-width
111
+ }).background_gradient()
110
112
 
111
113
  return f"""
112
114
  <div style="max-height: 500px; overflow-y: auto;">
@@ -0,0 +1,125 @@
1
+ """
2
+ Exceptions module for dataset-related operations.
3
+
4
+ This module defines custom exception classes for all dataset-related error conditions
5
+ in the EDSL framework, ensuring consistent error handling for data manipulation,
6
+ transformation, and analysis operations.
7
+ """
8
+
9
+ from ..base import BaseException
10
+
11
+
12
+ class DatasetError(BaseException):
13
+ """
14
+ Base exception class for all dataset-related errors.
15
+
16
+ This is the parent class for exceptions related to Dataset operations
17
+ in the EDSL framework, including data creation, manipulation, validation,
18
+ and serialization.
19
+
20
+ Examples:
21
+ ```python
22
+ # Usually not raised directly, but through subclasses:
23
+ dataset = Dataset([])
24
+ dataset["missing_key"] # Would raise DatasetKeyError
25
+ ```
26
+ """
27
+ relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
28
+
29
+
30
+ class DatasetKeyError(DatasetError):
31
+ """
32
+ Exception raised when a key is not found in a dataset.
33
+
34
+ This exception occurs when attempting to access a field or column
35
+ that doesn't exist in the dataset.
36
+
37
+ Examples:
38
+ ```python
39
+ dataset = Dataset([{"a": 1}])
40
+ dataset["b"] # Raises DatasetKeyError
41
+ ```
42
+ """
43
+ relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
44
+
45
+
46
+ class DatasetValueError(DatasetError):
47
+ """
48
+ Exception raised when there's an issue with dataset values.
49
+
50
+ This exception occurs when dataset values are invalid, incompatible
51
+ with an operation, or otherwise problematic.
52
+
53
+ Examples:
54
+ ```python
55
+ dataset = Dataset([{"a": 1}, {"b": 2}])
56
+ dataset.select(["c"]) # Raises DatasetValueError for missing field
57
+ ```
58
+ """
59
+ relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
60
+
61
+
62
+ class DatasetTypeError(DatasetError):
63
+ """
64
+ Exception raised when there's a type mismatch in dataset operations.
65
+
66
+ This exception occurs when trying to perform operations with
67
+ incompatible data types.
68
+
69
+ Examples:
70
+ ```python
71
+ dataset = Dataset([{"a": 1}])
72
+ dataset + "not a dataset" # Raises DatasetTypeError
73
+ ```
74
+ """
75
+ relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
76
+
77
+
78
+ class DatasetExportError(DatasetError):
79
+ """
80
+ Exception raised when exporting a dataset to a different format fails.
81
+
82
+ This exception occurs when trying to export a dataset to a file format
83
+ (like CSV, SQLite, etc.) and the operation fails.
84
+
85
+ Examples:
86
+ ```python
87
+ dataset = Dataset([{"a": complex(1, 2)}])
88
+ dataset.to_csv("file.csv") # Raises DatasetExportError (complex not serializable)
89
+ ```
90
+ """
91
+ relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
92
+
93
+
94
+ class DatasetImportError(DatasetError):
95
+ """
96
+ Exception raised when importing data from an external source fails.
97
+
98
+ This exception occurs when trying to import data from an external source or format
99
+ (like CSV, JSON, etc.) and the operation fails, often due to missing dependencies
100
+ or format issues.
101
+
102
+ Examples:
103
+ ```python
104
+ # Trying to export to DOCX without python-docx package
105
+ dataset.to_docx("file.docx") # Raises DatasetImportError
106
+ ```
107
+ """
108
+ relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"
109
+
110
+
111
+ class DatasetRuntimeError(DatasetError):
112
+ """
113
+ Exception raised when an operation fails during runtime.
114
+
115
+ This exception is used for runtime errors in dataset operations,
116
+ typically for operations that depend on external systems or libraries
117
+ like R integration.
118
+
119
+ Examples:
120
+ ```python
121
+ # Plotting with ggplot when R is not installed
122
+ dataset.ggplot() # Raises DatasetRuntimeError
123
+ ```
124
+ """
125
+ relevant_doc = "https://docs.expectedparrot.com/en/latest/dataset.html"