edsl 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. edsl/__init__.py +124 -53
  2. edsl/__version__.py +1 -1
  3. edsl/agents/agent.py +21 -21
  4. edsl/agents/agent_list.py +2 -5
  5. edsl/agents/exceptions.py +119 -5
  6. edsl/base/__init__.py +10 -35
  7. edsl/base/base_class.py +71 -36
  8. edsl/base/base_exception.py +204 -0
  9. edsl/base/data_transfer_models.py +1 -1
  10. edsl/base/exceptions.py +94 -0
  11. edsl/buckets/__init__.py +15 -1
  12. edsl/buckets/bucket_collection.py +3 -4
  13. edsl/buckets/exceptions.py +107 -0
  14. edsl/buckets/model_buckets.py +1 -2
  15. edsl/buckets/token_bucket.py +11 -6
  16. edsl/buckets/token_bucket_api.py +27 -12
  17. edsl/buckets/token_bucket_client.py +9 -7
  18. edsl/caching/cache.py +12 -4
  19. edsl/caching/cache_entry.py +10 -9
  20. edsl/caching/exceptions.py +113 -7
  21. edsl/caching/remote_cache_sync.py +6 -7
  22. edsl/caching/sql_dict.py +20 -14
  23. edsl/cli.py +43 -0
  24. edsl/config/__init__.py +1 -1
  25. edsl/config/config_class.py +32 -6
  26. edsl/conversation/Conversation.py +8 -4
  27. edsl/conversation/car_buying.py +1 -3
  28. edsl/conversation/exceptions.py +58 -0
  29. edsl/conversation/mug_negotiation.py +2 -8
  30. edsl/coop/__init__.py +28 -6
  31. edsl/coop/coop.py +120 -29
  32. edsl/coop/coop_functions.py +1 -1
  33. edsl/coop/ep_key_handling.py +1 -1
  34. edsl/coop/exceptions.py +188 -9
  35. edsl/coop/price_fetcher.py +5 -8
  36. edsl/coop/utils.py +4 -6
  37. edsl/dataset/__init__.py +5 -4
  38. edsl/dataset/dataset.py +177 -86
  39. edsl/dataset/dataset_operations_mixin.py +98 -76
  40. edsl/dataset/dataset_tree.py +11 -7
  41. edsl/dataset/display/table_display.py +0 -2
  42. edsl/dataset/display/table_renderers.py +6 -4
  43. edsl/dataset/exceptions.py +125 -0
  44. edsl/dataset/file_exports.py +18 -11
  45. edsl/dataset/r/ggplot.py +13 -6
  46. edsl/display/__init__.py +27 -0
  47. edsl/display/core.py +147 -0
  48. edsl/display/plugin.py +189 -0
  49. edsl/display/utils.py +52 -0
  50. edsl/inference_services/__init__.py +9 -1
  51. edsl/inference_services/available_model_cache_handler.py +1 -1
  52. edsl/inference_services/available_model_fetcher.py +5 -6
  53. edsl/inference_services/data_structures.py +10 -7
  54. edsl/inference_services/exceptions.py +132 -1
  55. edsl/inference_services/inference_service_abc.py +2 -2
  56. edsl/inference_services/inference_services_collection.py +2 -6
  57. edsl/inference_services/registry.py +4 -3
  58. edsl/inference_services/service_availability.py +4 -3
  59. edsl/inference_services/services/anthropic_service.py +4 -1
  60. edsl/inference_services/services/aws_bedrock.py +13 -12
  61. edsl/inference_services/services/azure_ai.py +12 -10
  62. edsl/inference_services/services/deep_infra_service.py +1 -4
  63. edsl/inference_services/services/deep_seek_service.py +1 -5
  64. edsl/inference_services/services/google_service.py +7 -3
  65. edsl/inference_services/services/groq_service.py +1 -1
  66. edsl/inference_services/services/mistral_ai_service.py +4 -2
  67. edsl/inference_services/services/ollama_service.py +1 -1
  68. edsl/inference_services/services/open_ai_service.py +7 -5
  69. edsl/inference_services/services/perplexity_service.py +6 -2
  70. edsl/inference_services/services/test_service.py +8 -7
  71. edsl/inference_services/services/together_ai_service.py +2 -3
  72. edsl/inference_services/services/xai_service.py +1 -1
  73. edsl/instructions/__init__.py +1 -1
  74. edsl/instructions/change_instruction.py +7 -5
  75. edsl/instructions/exceptions.py +61 -0
  76. edsl/instructions/instruction.py +6 -2
  77. edsl/instructions/instruction_collection.py +6 -4
  78. edsl/instructions/instruction_handler.py +12 -15
  79. edsl/interviews/ReportErrors.py +0 -3
  80. edsl/interviews/__init__.py +9 -2
  81. edsl/interviews/answering_function.py +11 -13
  82. edsl/interviews/exception_tracking.py +15 -8
  83. edsl/interviews/exceptions.py +79 -0
  84. edsl/interviews/interview.py +33 -30
  85. edsl/interviews/interview_status_dictionary.py +4 -2
  86. edsl/interviews/interview_status_log.py +2 -1
  87. edsl/interviews/interview_task_manager.py +5 -5
  88. edsl/interviews/request_token_estimator.py +5 -2
  89. edsl/interviews/statistics.py +3 -4
  90. edsl/invigilators/__init__.py +7 -1
  91. edsl/invigilators/exceptions.py +79 -0
  92. edsl/invigilators/invigilator_base.py +0 -1
  93. edsl/invigilators/invigilators.py +9 -13
  94. edsl/invigilators/prompt_constructor.py +1 -5
  95. edsl/invigilators/prompt_helpers.py +8 -4
  96. edsl/invigilators/question_instructions_prompt_builder.py +1 -1
  97. edsl/invigilators/question_option_processor.py +9 -5
  98. edsl/invigilators/question_template_replacements_builder.py +3 -2
  99. edsl/jobs/__init__.py +42 -5
  100. edsl/jobs/async_interview_runner.py +25 -23
  101. edsl/jobs/check_survey_scenario_compatibility.py +11 -10
  102. edsl/jobs/data_structures.py +8 -5
  103. edsl/jobs/exceptions.py +177 -8
  104. edsl/jobs/fetch_invigilator.py +1 -1
  105. edsl/jobs/jobs.py +74 -69
  106. edsl/jobs/jobs_checks.py +6 -7
  107. edsl/jobs/jobs_component_constructor.py +4 -4
  108. edsl/jobs/jobs_pricing_estimation.py +4 -3
  109. edsl/jobs/jobs_remote_inference_logger.py +5 -4
  110. edsl/jobs/jobs_runner_asyncio.py +3 -4
  111. edsl/jobs/jobs_runner_status.py +8 -9
  112. edsl/jobs/remote_inference.py +27 -24
  113. edsl/jobs/results_exceptions_handler.py +10 -7
  114. edsl/key_management/__init__.py +3 -1
  115. edsl/key_management/exceptions.py +62 -0
  116. edsl/key_management/key_lookup.py +1 -1
  117. edsl/key_management/key_lookup_builder.py +37 -14
  118. edsl/key_management/key_lookup_collection.py +2 -0
  119. edsl/language_models/__init__.py +1 -1
  120. edsl/language_models/exceptions.py +302 -14
  121. edsl/language_models/language_model.py +9 -8
  122. edsl/language_models/model.py +4 -4
  123. edsl/language_models/model_list.py +1 -1
  124. edsl/language_models/price_manager.py +1 -1
  125. edsl/language_models/raw_response_handler.py +14 -9
  126. edsl/language_models/registry.py +17 -21
  127. edsl/language_models/repair.py +0 -6
  128. edsl/language_models/unused/fake_openai_service.py +0 -1
  129. edsl/load_plugins.py +69 -0
  130. edsl/logger.py +146 -0
  131. edsl/notebooks/__init__.py +24 -1
  132. edsl/notebooks/exceptions.py +82 -0
  133. edsl/notebooks/notebook.py +7 -3
  134. edsl/notebooks/notebook_to_latex.py +1 -2
  135. edsl/plugins/__init__.py +63 -0
  136. edsl/plugins/built_in/export_example.py +50 -0
  137. edsl/plugins/built_in/pig_latin.py +67 -0
  138. edsl/plugins/cli.py +372 -0
  139. edsl/plugins/cli_typer.py +283 -0
  140. edsl/plugins/exceptions.py +31 -0
  141. edsl/plugins/hookspec.py +51 -0
  142. edsl/plugins/plugin_host.py +128 -0
  143. edsl/plugins/plugin_manager.py +633 -0
  144. edsl/plugins/plugins_registry.py +168 -0
  145. edsl/prompts/__init__.py +24 -1
  146. edsl/prompts/exceptions.py +107 -5
  147. edsl/prompts/prompt.py +15 -7
  148. edsl/questions/HTMLQuestion.py +5 -11
  149. edsl/questions/Quick.py +0 -1
  150. edsl/questions/__init__.py +6 -4
  151. edsl/questions/answer_validator_mixin.py +318 -323
  152. edsl/questions/compose_questions.py +3 -3
  153. edsl/questions/descriptors.py +11 -50
  154. edsl/questions/exceptions.py +278 -22
  155. edsl/questions/loop_processor.py +7 -5
  156. edsl/questions/prompt_templates/question_list.jinja +3 -0
  157. edsl/questions/question_base.py +46 -19
  158. edsl/questions/question_base_gen_mixin.py +2 -2
  159. edsl/questions/question_base_prompts_mixin.py +13 -7
  160. edsl/questions/question_budget.py +503 -98
  161. edsl/questions/question_check_box.py +660 -160
  162. edsl/questions/question_dict.py +345 -194
  163. edsl/questions/question_extract.py +401 -61
  164. edsl/questions/question_free_text.py +80 -14
  165. edsl/questions/question_functional.py +119 -9
  166. edsl/questions/{derived/question_likert_five.py → question_likert_five.py} +2 -2
  167. edsl/questions/{derived/question_linear_scale.py → question_linear_scale.py} +3 -4
  168. edsl/questions/question_list.py +275 -28
  169. edsl/questions/question_matrix.py +643 -96
  170. edsl/questions/question_multiple_choice.py +219 -51
  171. edsl/questions/question_numerical.py +361 -32
  172. edsl/questions/question_rank.py +401 -124
  173. edsl/questions/question_registry.py +7 -5
  174. edsl/questions/{derived/question_top_k.py → question_top_k.py} +3 -3
  175. edsl/questions/{derived/question_yes_no.py → question_yes_no.py} +3 -4
  176. edsl/questions/register_questions_meta.py +2 -2
  177. edsl/questions/response_validator_abc.py +13 -15
  178. edsl/questions/response_validator_factory.py +10 -12
  179. edsl/questions/templates/dict/answering_instructions.jinja +1 -0
  180. edsl/questions/templates/rank/question_presentation.jinja +1 -1
  181. edsl/results/__init__.py +1 -1
  182. edsl/results/exceptions.py +141 -7
  183. edsl/results/report.py +1 -2
  184. edsl/results/result.py +11 -9
  185. edsl/results/results.py +480 -321
  186. edsl/results/results_selector.py +8 -4
  187. edsl/scenarios/PdfExtractor.py +2 -2
  188. edsl/scenarios/construct_download_link.py +69 -35
  189. edsl/scenarios/directory_scanner.py +33 -14
  190. edsl/scenarios/document_chunker.py +1 -1
  191. edsl/scenarios/exceptions.py +238 -14
  192. edsl/scenarios/file_methods.py +1 -1
  193. edsl/scenarios/file_store.py +7 -3
  194. edsl/scenarios/handlers/__init__.py +17 -0
  195. edsl/scenarios/handlers/docx_file_store.py +0 -5
  196. edsl/scenarios/handlers/pdf_file_store.py +0 -1
  197. edsl/scenarios/handlers/pptx_file_store.py +0 -5
  198. edsl/scenarios/handlers/py_file_store.py +0 -1
  199. edsl/scenarios/handlers/sql_file_store.py +1 -4
  200. edsl/scenarios/handlers/sqlite_file_store.py +0 -1
  201. edsl/scenarios/handlers/txt_file_store.py +1 -1
  202. edsl/scenarios/scenario.py +1 -3
  203. edsl/scenarios/scenario_list.py +179 -27
  204. edsl/scenarios/scenario_list_pdf_tools.py +1 -0
  205. edsl/scenarios/scenario_selector.py +0 -1
  206. edsl/surveys/__init__.py +3 -4
  207. edsl/surveys/dag/__init__.py +4 -2
  208. edsl/surveys/descriptors.py +1 -1
  209. edsl/surveys/edit_survey.py +1 -0
  210. edsl/surveys/exceptions.py +165 -9
  211. edsl/surveys/memory/__init__.py +5 -3
  212. edsl/surveys/memory/memory_management.py +1 -0
  213. edsl/surveys/memory/memory_plan.py +6 -15
  214. edsl/surveys/rules/__init__.py +5 -3
  215. edsl/surveys/rules/rule.py +1 -2
  216. edsl/surveys/rules/rule_collection.py +1 -1
  217. edsl/surveys/survey.py +12 -24
  218. edsl/surveys/survey_css.py +3 -3
  219. edsl/surveys/survey_export.py +6 -3
  220. edsl/surveys/survey_flow_visualization.py +10 -1
  221. edsl/surveys/survey_simulator.py +2 -1
  222. edsl/tasks/__init__.py +23 -1
  223. edsl/tasks/exceptions.py +72 -0
  224. edsl/tasks/question_task_creator.py +3 -3
  225. edsl/tasks/task_creators.py +1 -3
  226. edsl/tasks/task_history.py +8 -10
  227. edsl/tasks/task_status_log.py +1 -2
  228. edsl/tokens/__init__.py +29 -1
  229. edsl/tokens/exceptions.py +37 -0
  230. edsl/tokens/interview_token_usage.py +3 -2
  231. edsl/tokens/token_usage.py +4 -3
  232. edsl/utilities/__init__.py +21 -1
  233. edsl/utilities/decorators.py +1 -2
  234. edsl/utilities/markdown_to_docx.py +2 -2
  235. edsl/utilities/markdown_to_pdf.py +1 -1
  236. edsl/utilities/repair_functions.py +0 -1
  237. edsl/utilities/restricted_python.py +0 -1
  238. edsl/utilities/template_loader.py +2 -3
  239. edsl/utilities/utilities.py +8 -29
  240. {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/METADATA +32 -2
  241. edsl-0.1.51.dist-info/RECORD +365 -0
  242. edsl-0.1.51.dist-info/entry_points.txt +3 -0
  243. edsl/dataset/smart_objects.py +0 -96
  244. edsl/exceptions/BaseException.py +0 -21
  245. edsl/exceptions/__init__.py +0 -54
  246. edsl/exceptions/configuration.py +0 -16
  247. edsl/exceptions/general.py +0 -34
  248. edsl/questions/derived/__init__.py +0 -0
  249. edsl/study/ObjectEntry.py +0 -173
  250. edsl/study/ProofOfWork.py +0 -113
  251. edsl/study/SnapShot.py +0 -80
  252. edsl/study/Study.py +0 -520
  253. edsl/study/__init__.py +0 -6
  254. edsl/utilities/interface.py +0 -135
  255. edsl-0.1.49.dist-info/RECORD +0 -347
  256. {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/LICENSE +0 -0
  257. {edsl-0.1.49.dist-info → edsl-0.1.51.dist-info}/WHEEL +0 -0
edsl/results/results.py CHANGED
@@ -1,5 +1,4 @@
1
- """
2
- The Results module provides tools for working with collections of Result objects.
1
+ """The Results module provides tools for working with collections of Result objects.
3
2
 
4
3
  The Results class is the primary container for analyzing and manipulating data obtained
5
4
  from running surveys with language models. It implements a powerful data analysis interface
@@ -41,16 +40,15 @@ import json
41
40
  import random
42
41
  import warnings
43
42
  from collections import UserList, defaultdict
44
- from typing import Optional, Callable, Any, Type, Union, List, TYPE_CHECKING
43
+ from typing import Optional, Callable, Any, Union, List, TYPE_CHECKING
45
44
  from bisect import bisect_left
46
45
 
47
46
  from ..base import Base
48
47
 
49
48
  if TYPE_CHECKING:
50
49
  from ..surveys import Survey
51
- from ..data import Cache
50
+ from ..caching import Cache
52
51
  from ..agents import AgentList
53
- from ..language_models import Model
54
52
  from ..scenarios import ScenarioList
55
53
  from ..results import Result
56
54
  from ..tasks import TaskHistory
@@ -71,23 +69,43 @@ from .exceptions import (
71
69
  ResultsDeserializationError,
72
70
  )
73
71
 
72
+
74
73
  def ensure_fetched(method):
75
- """A decorator that checks if remote data is loaded, and if not, attempts to fetch it."""
74
+ """A decorator that checks if remote data is loaded, and if not, attempts to fetch it.
75
+
76
+ Args:
77
+ method: The method to decorate.
78
+
79
+ Returns:
80
+ The wrapped method that will ensure data is fetched before execution.
81
+ """
82
+
76
83
  def wrapper(self, *args, **kwargs):
77
84
  if not self._fetched:
78
85
  # If not fetched, try fetching now.
79
86
  # (If you know you have job info stored in self.job_info)
80
87
  self.fetch_remote(self.job_info)
81
88
  return method(self, *args, **kwargs)
89
+
82
90
  return wrapper
83
91
 
92
+
84
93
  def ensure_ready(method):
85
- """
86
- Decorator for Results methods.
87
-
94
+ """Decorator for Results methods to handle not-ready state.
95
+
88
96
  If the Results object is not ready, for most methods we return a NotReadyObject.
89
97
  However, for __repr__ (and other methods that need to return a string), we return
90
98
  the string representation of NotReadyObject.
99
+
100
+ Args:
101
+ method: The method to decorate.
102
+
103
+ Returns:
104
+ The wrapped method that will handle not-ready Results objects appropriately.
105
+
106
+ Raises:
107
+ Exception: Any exception from fetch_remote will be caught and printed.
108
+
91
109
  """
92
110
  from functools import wraps
93
111
 
@@ -102,7 +120,7 @@ def ensure_ready(method):
102
120
  except Exception as e:
103
121
  print(f"Error during fetch_remote in {method.__name__}: {e}")
104
122
  if not self.completed:
105
- not_ready = NotReadyObject(name = method.__name__, job_info = self.job_info)
123
+ not_ready = NotReadyObject(name=method.__name__, job_info=self.job_info)
106
124
  # For __repr__, ensure we return a string
107
125
  if method.__name__ == "__repr__" or method.__name__ == "__str__":
108
126
  return not_ready.__repr__()
@@ -111,59 +129,115 @@ def ensure_ready(method):
111
129
 
112
130
  return wrapper
113
131
 
132
+
114
133
  class NotReadyObject:
115
- """A placeholder object that prints a message when any attribute is accessed."""
116
- def __init__(self, name: str, job_info: 'RemoteJobInfo'):
134
+ """A placeholder object that indicates results are not ready yet.
135
+
136
+ This class returns itself for all attribute accesses and method calls,
137
+ displaying a message about the job's running status when represented as a string.
138
+
139
+ Attributes:
140
+ name: The name of the method that was originally called.
141
+ job_info: Information about the running job.
142
+
143
+ """
144
+
145
+ def __init__(self, name: str, job_info: "Any"):
146
+ """Initialize a NotReadyObject.
147
+
148
+ Args:
149
+ name: The name of the method that was attempted to be called.
150
+ job_info: Information about the running job.
151
+ """
117
152
  self.name = name
118
153
  self.job_info = job_info
119
- #print(f"Not ready to call {name}")
154
+ # print(f"Not ready to call {name}")
120
155
 
121
156
  def __repr__(self):
122
- message = f"""Results not ready - job still running on server."""
157
+ """Generate a string representation showing the job is still running.
158
+
159
+ Returns:
160
+ str: A message indicating the job is still running, along with job details.
161
+ """
162
+ message = """Results not ready - job still running on server."""
123
163
  for key, value in self.job_info.creation_data.items():
124
164
  message += f"\n{key}: {value}"
125
165
  return message
126
166
 
127
167
  def __getattr__(self, _):
168
+ """Return self for any attribute access.
169
+
170
+ Args:
171
+ _: The attribute name (ignored).
172
+
173
+ Returns:
174
+ NotReadyObject: Returns self for chaining.
175
+ """
128
176
  return self
129
-
177
+
130
178
  def __call__(self, *args, **kwargs):
179
+ """Return self when called as a function.
180
+
181
+ Args:
182
+ *args: Positional arguments (ignored).
183
+ **kwargs: Keyword arguments (ignored).
184
+
185
+ Returns:
186
+ NotReadyObject: Returns self for chaining.
187
+ """
131
188
  return self
132
189
 
133
190
 
134
191
  class Results(UserList, ResultsOperationsMixin, Base):
135
- """
136
- A collection of Result objects with powerful data analysis capabilities.
137
-
192
+ """A collection of Result objects with powerful data analysis capabilities.
193
+
138
194
  The Results class is the primary container for working with data from EDSL surveys.
139
195
  It provides a rich set of methods for data analysis, transformation, and visualization
140
- inspired by data manipulation libraries like dplyr and pandas. The Results class
141
- implements a functional, fluent interface for data manipulation where each method
196
+ inspired by data manipulation libraries like dplyr and pandas. The Results class
197
+ implements a functional, fluent interface for data manipulation where each method
142
198
  returns a new Results object, allowing method chaining.
143
-
199
+
200
+ Attributes:
201
+ survey: The Survey object containing the questions used to generate results.
202
+ data: A list of Result objects containing the responses.
203
+ created_columns: A list of column names created through transformations.
204
+ cache: A Cache object for storing model responses.
205
+ completed: Whether the Results object is ready for use.
206
+ task_history: A TaskHistory object containing information about the tasks.
207
+ known_data_types: List of valid data type strings for accessing data.
208
+
144
209
  Key features:
145
-
146
- - List-like interface for accessing individual Result objects
147
- - Selection of specific data columns with `select()`
148
- - Filtering results with boolean expressions using `filter()`
149
- - Creating new derived columns with `mutate()`
150
- - Recoding values with `recode()` and `answer_truncate()`
151
- - Sorting results with `order_by()`
152
- - Converting to other formats (dataset, table, pandas DataFrame)
153
- - Serialization for storage and retrieval
154
- - Support for remote execution and result retrieval
155
-
210
+ - List-like interface for accessing individual Result objects
211
+ - Selection of specific data columns with `select()`
212
+ - Filtering results with boolean expressions using `filter()`
213
+ - Creating new derived columns with `mutate()`
214
+ - Recoding values with `recode()` and `answer_truncate()`
215
+ - Sorting results with `order_by()`
216
+ - Converting to other formats (dataset, table, pandas DataFrame)
217
+ - Serialization for storage and retrieval
218
+ - Support for remote execution and result retrieval
219
+
156
220
  Results objects have a hierarchical structure with the following components:
157
-
158
- 1. Each Results object contains multiple Result objects
159
- 2. Each Result object contains data organized by type (agent, scenario, model, answer, etc.)
160
- 3. Each data type contains multiple attributes (e.g., "how_feeling" in the answer type)
161
-
221
+ 1. Each Results object contains multiple Result objects
222
+ 2. Each Result object contains data organized by type (agent, scenario, model, answer, etc.)
223
+ 3. Each data type contains multiple attributes (e.g., "how_feeling" in the answer type)
224
+
162
225
  You can access data in a Results object using dot notation (`answer.how_feeling`) or
163
226
  using just the attribute name if it's not ambiguous (`how_feeling`).
164
-
227
+
165
228
  The Results class also tracks "created columns" - new derived values that aren't
166
229
  part of the original data but were created through transformations.
230
+
231
+ Examples:
232
+ >>> # Create a simple Results object from example data
233
+ >>> r = Results.example()
234
+ >>> len(r) > 0 # Contains Result objects
235
+ True
236
+ >>> # Filter and transform data
237
+ >>> filtered = r.filter("how_feeling == 'Great'")
238
+ >>> # Access hierarchical data
239
+ >>> 'agent' in r.known_data_types
240
+ True
167
241
  """
168
242
 
169
243
  __documentation__ = "https://docs.expectedparrot.com/en/latest/results.html"
@@ -186,9 +260,28 @@ class Results(UserList, ResultsOperationsMixin, Base):
186
260
  ]
187
261
 
188
262
  @classmethod
189
- def from_job_info(cls, job_info: dict) -> Results:
190
- """
191
- Instantiate a `Results` object from a job info dictionary.
263
+ def from_job_info(cls, job_info: dict) -> "Results":
264
+ """Instantiate a Results object from a job info dictionary.
265
+
266
+ This method creates a Results object in a not-ready state that will
267
+ fetch its data from a remote source when methods are called on it.
268
+
269
+ Args:
270
+ job_info: Dictionary containing information about a remote job.
271
+
272
+ Returns:
273
+ Results: A new Results instance with completed=False that will
274
+ fetch remote data when needed.
275
+
276
+ Examples:
277
+ >>> # Create a job info dictionary
278
+ >>> job_info = {'job_uuid': '12345', 'creation_data': {'model': 'gpt-4'}}
279
+ >>> # Create a Results object from the job info
280
+ >>> results = Results.from_job_info(job_info)
281
+ >>> results.completed
282
+ False
283
+ >>> hasattr(results, 'job_info')
284
+ True
192
285
  """
193
286
  results = cls()
194
287
  results.completed = False
@@ -205,14 +298,37 @@ class Results(UserList, ResultsOperationsMixin, Base):
205
298
  total_results: Optional[int] = None,
206
299
  task_history: Optional[TaskHistory] = None,
207
300
  ):
208
- """Instantiate a `Results` object with a survey and a list of `Result` objects.
209
-
210
- :param survey: A Survey object.
211
- :param data: A list of Result objects.
212
- :param created_columns: A list of strings that are created columns.
213
- :param job_uuid: A string representing the job UUID.
214
- :param total_results: An integer representing the total number of results.
215
- :cache: A Cache object.
301
+ """Instantiate a Results object with a survey and a list of Result objects.
302
+
303
+ This initializes a completed Results object with the provided data.
304
+ For creating a not-ready Results object from a job info dictionary,
305
+ use the from_job_info class method instead.
306
+
307
+ Args:
308
+ survey: A Survey object containing the questions used to generate results.
309
+ data: A list of Result objects containing the responses.
310
+ created_columns: A list of column names created through transformations.
311
+ cache: A Cache object for storing model responses.
312
+ job_uuid: A string representing the job UUID.
313
+ total_results: An integer representing the total number of results.
314
+ task_history: A TaskHistory object containing information about the tasks.
315
+
316
+ Examples:
317
+ >>> from ..results import Result
318
+ >>> # Create an empty Results object
319
+ >>> r = Results()
320
+ >>> r.completed
321
+ True
322
+ >>> len(r.created_columns)
323
+ 0
324
+
325
+ >>> # Create a Results object with data
326
+ >>> from unittest.mock import Mock
327
+ >>> mock_survey = Mock()
328
+ >>> mock_result = Mock(spec=Result)
329
+ >>> r = Results(survey=mock_survey, data=[mock_result])
330
+ >>> len(r)
331
+ 1
216
332
  """
217
333
  self.completed = True
218
334
  self._fetching = False
@@ -231,39 +347,26 @@ class Results(UserList, ResultsOperationsMixin, Base):
231
347
  if hasattr(self, "_add_output_functions"):
232
348
  self._add_output_functions()
233
349
 
234
- def long(self):
235
- return self.table().long()
236
-
237
- def print_long(self, max_rows: int = None) -> None:
238
- """Print the results in long format.
239
-
240
- >>> from edsl.results import Results
241
- >>> r = Results.example()
242
- >>> r.select('how_feeling').print_long(max_rows = 2)
243
- ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┓
244
- ┃ Result index ┃ Key ┃ Value ┃
245
- ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━┩
246
- │ 0 │ how_feeling │ OK │
247
- │ 1 │ how_feeling │ Great │
248
- └──────────────┴─────────────┴───────┘
249
- """
250
- from edsl.utilities.interface import print_results_long
251
-
252
- print_results_long(self, max_rows=max_rows)
253
-
254
-
255
350
  def _fetch_list(self, data_type: str, key: str) -> list:
256
- """
257
- Return a list of values from the data for a given data type and key.
351
+ """Return a list of values from the data for a given data type and key.
258
352
 
259
353
  Uses the filtered data, not the original data.
260
354
 
261
- Example:
355
+ Args:
356
+ data_type: The type of data to fetch (e.g., 'answer', 'agent', 'scenario').
357
+ key: The key to fetch from each data type dictionary.
262
358
 
263
- >>> from edsl.results import Results
264
- >>> r = Results.example()
265
- >>> r._fetch_list('answer', 'how_feeling')
266
- ['OK', 'Great', 'Terrible', 'OK']
359
+ Returns:
360
+ list: A list of values, one from each result in the data.
361
+
362
+ Examples:
363
+ >>> from edsl.results import Results
364
+ >>> r = Results.example()
365
+ >>> values = r._fetch_list('answer', 'how_feeling')
366
+ >>> len(values) == len(r)
367
+ True
368
+ >>> all(isinstance(v, (str, type(None))) for v in values)
369
+ True
267
370
  """
268
371
  returned_list = []
269
372
  for row in self.data:
@@ -271,6 +374,25 @@ class Results(UserList, ResultsOperationsMixin, Base):
271
374
 
272
375
  return returned_list
273
376
 
377
+ def get_answers(self, question_name: str) -> list:
378
+ """Get the answers for a given question name.
379
+
380
+ Args:
381
+ question_name: The name of the question to fetch answers for.
382
+
383
+ Returns:
384
+ list: A list of answers, one from each result in the data.
385
+
386
+ Examples:
387
+ >>> from edsl.results import Results
388
+ >>> r = Results.example()
389
+ >>> answers = r.get_answers('how_feeling')
390
+ >>> isinstance(answers, list)
391
+ True
392
+ >>> len(answers) == len(r)
393
+ True
394
+ """
395
+ return self._fetch_list("answer", question_name)
274
396
 
275
397
  def _summary(self) -> dict:
276
398
  import reprlib
@@ -322,8 +444,23 @@ class Results(UserList, ResultsOperationsMixin, Base):
322
444
  self.insert(item)
323
445
 
324
446
  def compute_job_cost(self, include_cached_responses_in_cost: bool = False) -> float:
325
- """
326
- Computes the cost of a completed job in USD.
447
+ """Compute the cost of a completed job in USD.
448
+
449
+ This method calculates the total cost of all model responses in the results.
450
+ By default, it only counts the cost of responses that were not cached.
451
+
452
+ Args:
453
+ include_cached_responses_in_cost: Whether to include the cost of cached
454
+ responses in the total. Defaults to False.
455
+
456
+ Returns:
457
+ float: The total cost in USD.
458
+
459
+ Examples:
460
+ >>> from edsl.results import Results
461
+ >>> r = Results.example()
462
+ >>> r.compute_job_cost()
463
+ 0
327
464
  """
328
465
  total_cost = 0
329
466
  for result in self:
@@ -342,88 +479,55 @@ class Results(UserList, ResultsOperationsMixin, Base):
342
479
 
343
480
  return total_cost
344
481
 
345
- # def leaves(self):
346
- # leaves = []
347
- # for result in self:
348
- # leaves.extend(result.leaves())
349
- # return leaves
350
-
351
- # def tree(self, node_list: Optional[List[str]] = None):
352
- # return self.to_scenario_list().tree(node_list)
353
-
354
- # def interactive_tree(
355
- # self,
356
- # fold_attributes: Optional[List[str]] = None,
357
- # drop: Optional[List[str]] = None,
358
- # open_file=True,
359
- # ) -> dict:
360
- # """Return the results as a tree."""
361
- # from edsl.results.tree_explore import FoldableHTMLTableGenerator
362
-
363
- # if drop is None:
364
- # drop = []
365
-
366
- # valid_attributes = [
367
- # "model",
368
- # "scenario",
369
- # "agent",
370
- # "answer",
371
- # "question",
372
- # "iteration",
373
- # ]
374
- # if fold_attributes is None:
375
- # fold_attributes = []
376
-
377
- # for attribute in fold_attributes:
378
- # if attribute not in valid_attributes:
379
- # raise ValueError(
380
- # f"Invalid fold attribute: {attribute}; must be in {valid_attributes}"
381
- # )
382
- # data = self.leaves()
383
- # generator = FoldableHTMLTableGenerator(data)
384
- # tree = generator.tree(fold_attributes=fold_attributes, drop=drop)
385
- # html_content = generator.generate_html(tree, fold_attributes)
386
- # import tempfile
387
- # from edsl.utilities.utilities import is_notebook
388
-
389
- # from IPython.display import display, HTML
390
-
391
- # if is_notebook():
392
- # import html
393
- # from IPython.display import display, HTML
394
-
395
- # height = 1000
396
- # width = 1000
397
- # escaped_output = html.escape(html_content)
398
- # # escaped_output = rendered_html
399
- # iframe = f""""
400
- # <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
401
- # """
402
- # display(HTML(iframe))
403
- # return None
404
-
405
- # with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
406
- # f.write(html_content.encode())
407
- # print(f"HTML file has been generated: {f.name}")
408
-
409
- # if open_file:
410
- # import webbrowser
411
- # import time
412
-
413
- # time.sleep(1) # Wait for 1 second
414
- # # webbrowser.open(f.name)
415
- # import os
416
-
417
- # filename = f.name
418
- # webbrowser.open(f"file://{os.path.abspath(filename)}")
419
-
420
- # else:
421
- # return html_content
422
-
423
482
  def code(self):
424
- raise NotImplementedError
483
+ """Method for generating code representations.
484
+
485
+ Raises:
486
+ ResultsError: This method is not implemented for Results objects.
487
+
488
+ Examples:
489
+ >>> from edsl.results import Results
490
+ >>> r = Results.example()
491
+ >>> try:
492
+ ... r.code()
493
+ ... except ResultsError as e:
494
+ ... str(e).startswith("The code() method is not implemented")
495
+ True
496
+ """
497
+ raise ResultsError("The code() method is not implemented for Results objects")
425
498
 
426
499
  def __getitem__(self, i):
500
+ """Get an item from the Results object by index, slice, or key.
501
+
502
+ Args:
503
+ i: An integer index, a slice, or a string key.
504
+
505
+ Returns:
506
+ The requested item, slice of results, or dictionary value.
507
+
508
+ Raises:
509
+ ResultsError: If the argument type is invalid for indexing.
510
+
511
+ Examples:
512
+ >>> from edsl.results import Results
513
+ >>> r = Results.example()
514
+ >>> # Get by integer index
515
+ >>> result = r[0]
516
+ >>> # Get by slice
517
+ >>> subset = r[0:2]
518
+ >>> len(subset) == 2
519
+ True
520
+ >>> # Get by string key
521
+ >>> data = r["data"]
522
+ >>> isinstance(data, list)
523
+ True
524
+ >>> # Invalid index type
525
+ >>> try:
526
+ ... r[1.5]
527
+ ... except ResultsError:
528
+ ... True
529
+ True
530
+ """
427
531
  if isinstance(i, int):
428
532
  return self.data[i]
429
533
 
@@ -433,18 +537,40 @@ class Results(UserList, ResultsOperationsMixin, Base):
433
537
  if isinstance(i, str):
434
538
  return self.to_dict()[i]
435
539
 
436
- raise TypeError("Invalid argument type")
540
+ raise ResultsError("Invalid argument type for indexing Results object")
437
541
 
438
542
  def __add__(self, other: Results) -> Results:
439
543
  """Add two Results objects together.
440
- They must have the same survey and created columns.
441
- :param other: A Results object.
442
544
 
443
- Example:
545
+ Combines two Results objects into a new one. Both objects must have the same
546
+ survey and created columns.
444
547
 
445
- >>> r = Results.example()
446
- >>> r2 = Results.example()
447
- >>> r3 = r + r2
548
+ Args:
549
+ other: A Results object to add to this one.
550
+
551
+ Returns:
552
+ A new Results object containing data from both objects.
553
+
554
+ Raises:
555
+ ResultsError: If the surveys or created columns of the two objects don't match.
556
+
557
+ Examples:
558
+ >>> from edsl.results import Results
559
+ >>> r1 = Results.example()
560
+ >>> r2 = Results.example()
561
+ >>> # Combine two Results objects
562
+ >>> r3 = r1 + r2
563
+ >>> len(r3) == len(r1) + len(r2)
564
+ True
565
+
566
+ >>> # Attempting to add incompatible Results
567
+ >>> from unittest.mock import Mock
568
+ >>> r4 = Results(survey=Mock()) # Different survey
569
+ >>> try:
570
+ ... r1 + r4
571
+ ... except ResultsError:
572
+ ... True
573
+ True
448
574
  """
449
575
  if self.survey != other.survey:
450
576
  raise ResultsError(
@@ -460,21 +586,17 @@ class Results(UserList, ResultsOperationsMixin, Base):
460
586
  data=self.data + other.data,
461
587
  created_columns=self.created_columns,
462
588
  )
463
-
589
+
464
590
  def _repr_html_(self):
465
591
  if not self.completed:
466
592
  if hasattr(self, "job_info"):
467
593
  self.fetch_remote(self.job_info)
468
-
594
+
469
595
  if not self.completed:
470
- return f"Results not ready to call"
471
-
596
+ return "Results not ready to call"
597
+
472
598
  return super()._repr_html_()
473
599
 
474
- # @ensure_ready
475
- # def __str__(self):
476
- # super().__str__()
477
-
478
600
  @ensure_ready
479
601
  def __repr__(self) -> str:
480
602
  return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
@@ -482,7 +604,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
482
604
  def table(
483
605
  self,
484
606
  *fields,
485
- tablefmt: Optional[str] = None,
607
+ tablefmt: Optional[str] = "rich",
486
608
  pretty_labels: Optional[dict] = None,
487
609
  print_parameters: Optional[dict] = None,
488
610
  ):
@@ -516,8 +638,8 @@ class Results(UserList, ResultsOperationsMixin, Base):
516
638
  print_parameters=print_parameters,
517
639
  )
518
640
  )
519
-
520
- def to_dataset(self) -> 'Dataset':
641
+
642
+ def to_dataset(self) -> "Dataset":
521
643
  return self.select()
522
644
 
523
645
  def to_dict(
@@ -561,7 +683,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
561
683
  d.update({"task_history": self.task_history.to_dict()})
562
684
 
563
685
  if add_edsl_version:
564
- from edsl import __version__
686
+ from .. import __version__
565
687
 
566
688
  d["edsl_version"] = __version__
567
689
  d["edsl_class_name"] = "Results"
@@ -590,7 +712,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
590
712
  return self.task_history.has_unfixed_exceptions
591
713
 
592
714
  def __hash__(self) -> int:
593
-
715
+
594
716
  return dict_hash(
595
717
  self.to_dict(sort=True, add_edsl_version=False, include_cache_info=False)
596
718
  )
@@ -599,7 +721,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
599
721
  def hashes(self) -> set:
600
722
  return set(hash(result) for result in self.data)
601
723
 
602
- def sample(self, n: int) -> Results:
724
+ def _sample_legacy(self, n: int) -> Results:
603
725
  """Return a random sample of the results.
604
726
 
605
727
  :param n: The number of samples to return.
@@ -643,7 +765,6 @@ class Results(UserList, ResultsOperationsMixin, Base):
643
765
  from ..caching import Cache
644
766
  from ..results import Result
645
767
  from ..tasks import TaskHistory
646
- from ..agents import Agent
647
768
 
648
769
  survey = Survey.from_dict(data["survey"])
649
770
  results_data = [Result.from_dict(r) for r in data["data"]]
@@ -717,7 +838,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
717
838
  ['agent.agent_index', ...]
718
839
  """
719
840
  column_names = [f"{v}.{k}" for k, v in self._key_to_data_type.items()]
720
- from edsl.utilities.PrettyList import PrettyList
841
+ from ..utilities.PrettyList import PrettyList
721
842
 
722
843
  return PrettyList(sorted(column_names))
723
844
 
@@ -731,7 +852,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
731
852
  >>> r.answer_keys
732
853
  {'how_feeling': 'How are you this {{ period }}?', 'how_feeling_yesterday': 'How were you feeling yesterday {{ period }}?'}
733
854
  """
734
- from edsl.utilities.utilities import shorten_string
855
+ from ..utilities.utilities import shorten_string
735
856
 
736
857
  if not self.survey:
737
858
  raise ResultsError("Survey is not defined so no answer keys are available.")
@@ -756,7 +877,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
756
877
  >>> r.agents
757
878
  AgentList([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'}), Agent(traits = {'status': 'Sad'})])
758
879
  """
759
- from edsl.agents import AgentList
880
+ from ..agents import AgentList
760
881
 
761
882
  return AgentList([r.agent for r in self.data])
762
883
 
@@ -867,7 +988,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
867
988
  return self.data[0]
868
989
 
869
990
  def answer_truncate(
870
- self, column: str, top_n: int = 5, new_var_name: str = None
991
+ self, column: str, top_n: int = 5, new_var_name: Optional[str] = None
871
992
  ) -> Results:
872
993
  """Create a new variable that truncates the answers to the top_n.
873
994
 
@@ -998,24 +1119,23 @@ class Results(UserList, ResultsOperationsMixin, Base):
998
1119
  def mutate(
999
1120
  self, new_var_string: str, functions_dict: Optional[dict] = None
1000
1121
  ) -> Results:
1001
- """
1002
- Create a new column based on a computational expression.
1003
-
1122
+ """Create a new column based on a computational expression.
1123
+
1004
1124
  The mutate method allows you to create new derived variables based on existing data.
1005
1125
  You provide an assignment expression where the left side is the new column name
1006
1126
  and the right side is a Python expression that computes the value. The expression
1007
1127
  can reference any existing columns in the Results object.
1008
-
1009
- Parameters:
1010
- new_var_string: A string containing an assignment expression in the form
1011
- "new_column_name = expression". The expression can reference
1012
- any existing column and use standard Python syntax.
1013
- functions_dict: Optional dictionary of custom functions that can be used in
1014
- the expression. Keys are function names, values are function objects.
1015
-
1128
+
1129
+ Args:
1130
+ new_var_string: A string containing an assignment expression in the form
1131
+ "new_column_name = expression". The expression can reference
1132
+ any existing column and use standard Python syntax.
1133
+ functions_dict: Optional dictionary of custom functions that can be used in
1134
+ the expression. Keys are function names, values are function objects.
1135
+
1016
1136
  Returns:
1017
1137
  A new Results object with the additional column.
1018
-
1138
+
1019
1139
  Notes:
1020
1140
  - The expression must contain an equals sign (=) separating the new column name
1021
1141
  from the computation expression
@@ -1024,22 +1144,22 @@ class Results(UserList, ResultsOperationsMixin, Base):
1024
1144
  - The expression can access any data in the Result object using the column names
1025
1145
  - New columns are added to the "answer" data type
1026
1146
  - Created columns are tracked in the `created_columns` property
1027
-
1147
+
1028
1148
  Examples:
1029
1149
  >>> r = Results.example()
1030
-
1031
- # Create a simple derived column
1150
+
1151
+ >>> # Create a simple derived column
1032
1152
  >>> r.mutate('how_feeling_x = how_feeling + "x"').select('how_feeling_x')
1033
1153
  Dataset([{'answer.how_feeling_x': ['OKx', 'Greatx', 'Terriblex', 'OKx']}])
1034
-
1035
- # Create a binary indicator column
1154
+
1155
+ >>> # Create a binary indicator column
1036
1156
  >>> r.mutate('is_great = 1 if how_feeling == "Great" else 0').select('is_great')
1037
1157
  Dataset([{'answer.is_great': [0, 1, 0, 0]}])
1038
-
1039
- # Create a column with custom functions
1158
+
1159
+ >>> # Create a column with custom functions
1040
1160
  >>> def sentiment(text):
1041
1161
  ... return len(text) > 5
1042
- >>> r.mutate('is_long = sentiment(how_feeling)',
1162
+ >>> r.mutate('is_long = sentiment(how_feeling)',
1043
1163
  ... functions_dict={'sentiment': sentiment}).select('is_long')
1044
1164
  Dataset([{'answer.is_long': [False, False, True, False]}])
1045
1165
  """
@@ -1050,7 +1170,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
1050
1170
  )
1051
1171
  raw_var_name, expression = new_var_string.split("=", 1)
1052
1172
  var_name = raw_var_name.strip()
1053
- from edsl.utilities.utilities import is_valid_variable_name
1173
+ from ..utilities.utilities import is_valid_variable_name
1054
1174
 
1055
1175
  if not is_valid_variable_name(var_name):
1056
1176
  raise ResultsInvalidNameError(f"{var_name} is not a valid variable name.")
@@ -1076,26 +1196,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
1076
1196
  created_columns=self.created_columns + [var_name],
1077
1197
  )
1078
1198
 
1079
- @ensure_ready
1080
- def add_column(self, column_name: str, values: list) -> Results:
1081
- """Adds columns to Results
1082
-
1083
- >>> r = Results.example()
1084
- >>> r.add_column('a', [1,2,3, 4]).select('a')
1085
- Dataset([{'answer.a': [1, 2, 3, 4]}])
1086
- """
1087
-
1088
- assert len(values) == len(
1089
- self.data
1090
- ), "The number of values must match the number of results."
1091
- new_results = self.data.copy()
1092
- for i, result in enumerate(new_results):
1093
- result["answer"][column_name] = values[i]
1094
- return Results(
1095
- survey=self.survey,
1096
- data=new_results,
1097
- created_columns=self.created_columns + [column_name],
1098
- )
1199
+ # Method removed due to duplication (F811)
1099
1200
 
1100
1201
  @ensure_ready
1101
1202
  def rename(self, old_name: str, new_name: str) -> Results:
@@ -1157,10 +1258,14 @@ class Results(UserList, ResultsOperationsMixin, Base):
1157
1258
  random.seed(seed)
1158
1259
 
1159
1260
  if n is None and frac is None:
1160
- raise Exception("You must specify either n or frac.")
1261
+ from .exceptions import ResultsError
1262
+
1263
+ raise ResultsError("You must specify either n or frac.")
1161
1264
 
1162
1265
  if n is not None and frac is not None:
1163
- raise Exception("You cannot specify both n and frac.")
1266
+ from .exceptions import ResultsError
1267
+
1268
+ raise ResultsError("You cannot specify both n and frac.")
1164
1269
 
1165
1270
  if frac is not None and n is None:
1166
1271
  n = int(frac * len(self.data))
@@ -1173,61 +1278,62 @@ class Results(UserList, ResultsOperationsMixin, Base):
1173
1278
  return Results(survey=self.survey, data=new_data, created_columns=None)
1174
1279
 
1175
1280
  @ensure_ready
1176
- def select(self, *columns: Union[str, list[str]]) -> 'Dataset':
1177
- """
1178
- Extract specific columns from the Results into a Dataset.
1179
-
1281
+ def select(self, *columns: Union[str, list[str]]) -> "Dataset":
1282
+ """Extract specific columns from the Results into a Dataset.
1283
+
1180
1284
  This method allows you to select specific columns from the Results object
1181
1285
  and transforms the data into a Dataset for further analysis and visualization.
1182
1286
  A Dataset is a more general-purpose data structure optimized for analysis
1183
1287
  operations rather than the hierarchical structure of Result objects.
1184
-
1185
- Parameters:
1288
+
1289
+ Args:
1186
1290
  *columns: Column names to select. Each column can be:
1187
- - A simple attribute name (e.g., "how_feeling")
1188
- - A fully qualified name with type (e.g., "answer.how_feeling")
1189
- - A wildcard pattern (e.g., "answer.*" to select all answer fields)
1190
- If no columns are provided, selects all data.
1191
-
1291
+ - A simple attribute name (e.g., "how_feeling")
1292
+ - A fully qualified name with type (e.g., "answer.how_feeling")
1293
+ - A wildcard pattern (e.g., "answer.*" to select all answer fields)
1294
+ If no columns are provided, selects all data.
1295
+
1192
1296
  Returns:
1193
1297
  A Dataset object containing the selected data.
1194
-
1298
+
1195
1299
  Notes:
1196
1300
  - Column names are automatically disambiguated if needed
1197
1301
  - When column names are ambiguous, specify the full path with data type
1198
1302
  - You can use wildcard patterns with "*" to select multiple related fields
1199
1303
  - Selecting with no arguments returns all data
1200
1304
  - Results are restructured in a columnar format in the Dataset
1201
-
1305
+
1202
1306
  Examples:
1203
1307
  >>> results = Results.example()
1204
-
1205
- # Select a single column by name
1308
+
1309
+ >>> # Select a single column by name
1206
1310
  >>> results.select('how_feeling')
1207
1311
  Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
1208
-
1209
- # Select multiple columns
1312
+
1313
+ >>> # Select multiple columns
1210
1314
  >>> ds = results.select('how_feeling', 'how_feeling_yesterday')
1211
1315
  >>> sorted([list(d.keys())[0] for d in ds])
1212
1316
  ['answer.how_feeling', 'answer.how_feeling_yesterday']
1213
-
1214
- # Using fully qualified names with data type
1317
+
1318
+ >>> # Using fully qualified names with data type
1215
1319
  >>> results.select('answer.how_feeling')
1216
1320
  Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
1217
-
1218
- # Using partial matching for column names
1321
+
1322
+ >>> # Using partial matching for column names
1219
1323
  >>> results.select('answer.how_feeling_y')
1220
1324
  Dataset([{'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
1221
-
1222
- # Select all columns (same as calling select with no arguments)
1223
- >>> results.select('*.*')
1325
+
1326
+ >>> # Select all columns (same as calling select with no arguments)
1327
+ >>> results.select('*.*')
1224
1328
  Dataset([...])
1225
1329
  """
1226
1330
 
1227
- from edsl.results.results_selector import Selector
1331
+ from .results_selector import Selector
1228
1332
 
1229
1333
  if len(self) == 0:
1230
- raise Exception("No data to select from---the Results object is empty.")
1334
+ from .exceptions import ResultsError
1335
+
1336
+ raise ResultsError("No data to select from---the Results object is empty.")
1231
1337
 
1232
1338
  selector = Selector(
1233
1339
  known_data_types=self.known_data_types,
@@ -1275,7 +1381,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
1275
1381
  def to_numeric_if_possible(v):
1276
1382
  try:
1277
1383
  return float(v)
1278
- except:
1384
+ except (ValueError, TypeError):
1279
1385
  return v
1280
1386
 
1281
1387
  def sort_key(item):
@@ -1291,21 +1397,24 @@ class Results(UserList, ResultsOperationsMixin, Base):
1291
1397
 
1292
1398
  @ensure_ready
1293
1399
  def filter(self, expression: str) -> Results:
1294
- """
1295
- Filter results based on a boolean expression.
1296
-
1400
+ """Filter results based on a boolean expression.
1401
+
1297
1402
  This method evaluates a boolean expression against each Result object in the
1298
1403
  collection and returns a new Results object containing only those that match.
1299
1404
  The expression can reference any column in the data and supports standard
1300
1405
  Python operators and syntax.
1301
-
1302
- Parameters:
1406
+
1407
+ Args:
1303
1408
  expression: A string containing a Python expression that evaluates to a boolean.
1304
1409
  The expression is applied to each Result object individually.
1305
-
1410
+
1306
1411
  Returns:
1307
1412
  A new Results object containing only the Result objects that satisfy the expression.
1308
-
1413
+
1414
+ Raises:
1415
+ ResultsFilterError: If the expression is invalid or uses improper syntax
1416
+ (like using '=' instead of '==').
1417
+
1309
1418
  Notes:
1310
1419
  - Column names can be specified with or without their data type prefix
1311
1420
  (e.g., both "how_feeling" and "answer.how_feeling" work if unambiguous)
@@ -1314,23 +1423,23 @@ class Results(UserList, ResultsOperationsMixin, Base):
1314
1423
  - You can use comparison operators like '==', '!=', '>', '<', '>=', '<='
1315
1424
  - You can use membership tests with 'in'
1316
1425
  - You can use string methods like '.startswith()', '.contains()', etc.
1317
-
1426
+
1318
1427
  Examples:
1319
1428
  >>> r = Results.example()
1320
-
1321
- # Simple equality filter
1429
+
1430
+ >>> # Simple equality filter
1322
1431
  >>> r.filter("how_feeling == 'Great'").select('how_feeling')
1323
1432
  Dataset([{'answer.how_feeling': ['Great']}])
1324
-
1325
- # Using OR condition
1433
+
1434
+ >>> # Using OR condition
1326
1435
  >>> r.filter("how_feeling == 'Great' or how_feeling == 'Terrible'").select('how_feeling')
1327
1436
  Dataset([{'answer.how_feeling': ['Great', 'Terrible']}])
1328
-
1329
- # Filter on agent properties
1437
+
1438
+ >>> # Filter on agent properties
1330
1439
  >>> r.filter("agent.status == 'Joyful'").select('agent.status')
1331
1440
  Dataset([{'agent.status': ['Joyful', 'Joyful']}])
1332
-
1333
- # Common error: using = instead of ==
1441
+
1442
+ >>> # Common error: using = instead of ==
1334
1443
  >>> try:
1335
1444
  ... r.filter("how_feeling = 'Great'")
1336
1445
  ... except Exception as e:
@@ -1435,45 +1544,58 @@ class Results(UserList, ResultsOperationsMixin, Base):
1435
1544
  [1, 1, 0, 0]
1436
1545
  """
1437
1546
  return [r.score(f) for r in self.data]
1438
-
1547
+
1439
1548
  def score_with_answer_key(self, answer_key: dict) -> list:
1440
1549
  """Score the results using an answer key.
1441
1550
 
1442
1551
  :param answer_key: A dictionary that maps answer values to scores.
1443
1552
  """
1444
1553
  return [r.score_with_answer_key(answer_key) for r in self.data]
1445
-
1446
1554
 
1447
- def fetch_remote(self, job_info: "RemoteJobInfo") -> None:
1448
- """
1449
- Fetches the remote Results object using the provided RemoteJobInfo and updates this instance with the remote data.
1450
-
1451
- This is useful when you have a Results object that was created locally but want to sync it with
1555
+ def fetch_remote(self, job_info: Any) -> None:
1556
+ """Fetch remote Results object and update this instance with the data.
1557
+
1558
+ This is useful when you have a Results object that was created locally but want to sync it with
1452
1559
  the latest data from the remote server.
1453
-
1560
+
1454
1561
  Args:
1455
1562
  job_info: RemoteJobInfo object containing the job_uuid and other remote job details
1456
-
1563
+
1564
+ Returns:
1565
+ bool: True if the fetch was successful, False if the job is not yet completed.
1566
+
1567
+ Raises:
1568
+ ResultsError: If there's an error during the fetch process.
1569
+
1570
+ Examples:
1571
+ >>> # This is a simplified example since we can't actually test this without a remote server
1572
+ >>> from unittest.mock import Mock, patch
1573
+ >>> # Create a mock job_info and Results
1574
+ >>> job_info = Mock()
1575
+ >>> job_info.job_uuid = "test_uuid"
1576
+ >>> results = Results()
1577
+ >>> # In a real scenario:
1578
+ >>> # results.fetch_remote(job_info)
1579
+ >>> # results.completed # Would be True if successful
1457
1580
  """
1458
- #print("Calling fetch_remote")
1459
1581
  try:
1460
1582
  from ..coop import Coop
1461
1583
  from ..jobs import JobsRemoteInferenceHandler
1462
-
1584
+
1463
1585
  # Get the remote job data
1464
1586
  remote_job_data = JobsRemoteInferenceHandler.check_status(job_info.job_uuid)
1465
-
1587
+
1466
1588
  if remote_job_data.get("status") not in ["completed", "failed"]:
1467
1589
  return False
1468
- #
1590
+ #
1469
1591
  results_uuid = remote_job_data.get("results_uuid")
1470
1592
  if not results_uuid:
1471
1593
  raise ResultsError("No results_uuid found in remote job data")
1472
-
1594
+
1473
1595
  # Fetch the remote Results object
1474
1596
  coop = Coop()
1475
1597
  remote_results = coop.get(results_uuid, expected_object_type="results")
1476
-
1598
+
1477
1599
  # Update this instance with remote data
1478
1600
  self.data = remote_results.data
1479
1601
  self.survey = remote_results.survey
@@ -1481,10 +1603,10 @@ class Results(UserList, ResultsOperationsMixin, Base):
1481
1603
  self.cache = remote_results.cache
1482
1604
  self.task_history = remote_results.task_history
1483
1605
  self.completed = True
1484
-
1606
+
1485
1607
  # Set job_uuid and results_uuid from remote data
1486
1608
  self.job_uuid = job_info.job_uuid
1487
- if hasattr(remote_results, 'results_uuid'):
1609
+ if hasattr(remote_results, "results_uuid"):
1488
1610
  self.results_uuid = remote_results.results_uuid
1489
1611
 
1490
1612
  return True
@@ -1492,39 +1614,60 @@ class Results(UserList, ResultsOperationsMixin, Base):
1492
1614
  except Exception as e:
1493
1615
  raise ResultsError(f"Failed to fetch remote results: {str(e)}")
1494
1616
 
1495
- def fetch(self, polling_interval: [float, int] = 1.0) -> Results:
1496
- """
1497
- Polls the server for job completion and updates this Results instance with the completed data.
1498
-
1617
+ def fetch(self, polling_interval: Union[float, int] = 1.0) -> Results:
1618
+ """Poll the server for job completion and update this Results instance.
1619
+
1620
+ This method continuously polls the remote server until the job is completed or
1621
+ fails, then updates this Results object with the final data.
1622
+
1499
1623
  Args:
1500
1624
  polling_interval: Number of seconds to wait between polling attempts (default: 1.0)
1501
-
1625
+
1502
1626
  Returns:
1503
1627
  self: The updated Results instance
1628
+
1629
+ Raises:
1630
+ ResultsError: If no job info is available or if there's an error during fetch.
1631
+
1632
+ Examples:
1633
+ >>> # This is a simplified example since we can't actually test polling
1634
+ >>> from unittest.mock import Mock, patch
1635
+ >>> # Create a mock results object
1636
+ >>> results = Results()
1637
+ >>> # In a real scenario with a running job:
1638
+ >>> # results.job_info = remote_job_info
1639
+ >>> # results.fetch() # Would poll until complete
1640
+ >>> # results.completed # Would be True if successful
1504
1641
  """
1505
1642
  if not hasattr(self, "job_info"):
1506
- raise ResultsError("No job info available - this Results object wasn't created from a remote job")
1507
-
1643
+ raise ResultsError(
1644
+ "No job info available - this Results object wasn't created from a remote job"
1645
+ )
1646
+
1508
1647
  from ..jobs import JobsRemoteInferenceHandler
1509
-
1648
+
1510
1649
  try:
1511
1650
  # Get the remote job data
1512
- remote_job_data = JobsRemoteInferenceHandler.check_status(self.job_info.job_uuid)
1513
-
1651
+ remote_job_data = JobsRemoteInferenceHandler.check_status(
1652
+ self.job_info.job_uuid
1653
+ )
1654
+
1514
1655
  while remote_job_data.get("status") not in ["completed", "failed"]:
1515
1656
  print("Waiting for remote job to complete...")
1516
1657
  import time
1658
+
1517
1659
  time.sleep(polling_interval)
1518
- remote_job_data = JobsRemoteInferenceHandler.check_status(self.job_info.job_uuid)
1519
-
1660
+ remote_job_data = JobsRemoteInferenceHandler.check_status(
1661
+ self.job_info.job_uuid
1662
+ )
1663
+
1520
1664
  # Once complete, fetch the full results
1521
1665
  self.fetch_remote(self.job_info)
1522
1666
  return self
1523
-
1667
+
1524
1668
  except Exception as e:
1525
1669
  raise ResultsError(f"Failed to fetch remote results: {str(e)}")
1526
1670
 
1527
-
1528
1671
  def spot_issues(self, models: Optional[ModelList] = None) -> Results:
1529
1672
  """Run a survey to spot issues and suggest improvements for prompts that had no model response, returning a new Results object.
1530
1673
  Future version: Allow user to optionally pass a list of questions to review, regardless of whether they had a null model response.
@@ -1532,60 +1675,75 @@ class Results(UserList, ResultsOperationsMixin, Base):
1532
1675
  from ..questions import QuestionFreeText, QuestionDict
1533
1676
  from ..surveys import Survey
1534
1677
  from ..scenarios import Scenario, ScenarioList
1535
- from ..language_models import Model, ModelList
1678
+ from ..language_models import ModelList
1536
1679
  import pandas as pd
1537
1680
 
1538
- df = self.select("agent.*", "scenario.*", "answer.*", "raw_model_response.*", "prompt.*").to_pandas()
1681
+ df = self.select(
1682
+ "agent.*", "scenario.*", "answer.*", "raw_model_response.*", "prompt.*"
1683
+ ).to_pandas()
1539
1684
  scenario_list = []
1540
1685
 
1541
1686
  for _, row in df.iterrows():
1542
1687
  for col in df.columns:
1543
1688
  if col.endswith("_raw_model_response") and pd.isna(row[col]):
1544
- q = col.split("_raw_model_response")[0].replace("raw_model_response.", "")
1545
-
1546
- s = Scenario({
1547
- "original_question": q,
1548
- "original_agent_index": row["agent.agent_index"],
1549
- "original_scenario_index": row["scenario.scenario_index"],
1550
- "original_prompts": f"User prompt: {row[f'prompt.{q}_user_prompt']}\nSystem prompt: {row[f'prompt.{q}_system_prompt']}"
1551
- })
1552
-
1689
+ q = col.split("_raw_model_response")[0].replace(
1690
+ "raw_model_response.", ""
1691
+ )
1692
+
1693
+ s = Scenario(
1694
+ {
1695
+ "original_question": q,
1696
+ "original_agent_index": row["agent.agent_index"],
1697
+ "original_scenario_index": row["scenario.scenario_index"],
1698
+ "original_prompts": f"User prompt: {row[f'prompt.{q}_user_prompt']}\nSystem prompt: {row[f'prompt.{q}_system_prompt']}",
1699
+ }
1700
+ )
1701
+
1553
1702
  scenario_list.append(s)
1554
1703
 
1555
1704
  sl = ScenarioList(set(scenario_list))
1556
1705
 
1557
1706
  q1 = QuestionFreeText(
1558
- question_name = "issues",
1559
- question_text = """
1707
+ question_name="issues",
1708
+ question_text="""
1560
1709
  The following prompts generated a bad or null response: '{{ original_prompts }}'
1561
1710
  What do you think was the likely issue(s)?
1562
- """
1711
+ """,
1563
1712
  )
1564
1713
 
1565
1714
  q2 = QuestionDict(
1566
- question_name = "revised",
1567
- question_text = """
1715
+ question_name="revised",
1716
+ question_text="""
1568
1717
  The following prompts generated a bad or null response: '{{ original_prompts }}'
1569
1718
  You identified the issue(s) as '{{ issues.answer }}'.
1570
1719
  Please revise the prompts to address the issue(s).
1571
1720
  """,
1572
- answer_keys = ["revised_user_prompt", "revised_system_prompt"]
1721
+ answer_keys=["revised_user_prompt", "revised_system_prompt"],
1573
1722
  )
1574
1723
 
1575
- survey = Survey(questions = [q1, q2])
1724
+ survey = Survey(questions=[q1, q2])
1576
1725
 
1577
1726
  if models is not None:
1578
1727
  if not isinstance(models, ModelList):
1579
1728
  raise ResultsError("models must be a ModelList")
1580
1729
  results = survey.by(sl).by(models).run()
1581
1730
  else:
1582
- results = survey.by(sl).run() # use the default model
1731
+ results = survey.by(sl).run() # use the default model
1583
1732
 
1584
1733
  return results
1585
1734
 
1586
1735
 
1587
1736
  def main(): # pragma: no cover
1588
- """Call the OpenAI API credits."""
1737
+ """Run example operations on a Results object.
1738
+
1739
+ This function demonstrates basic filtering and mutation operations on
1740
+ a Results object, printing the output.
1741
+
1742
+ Examples:
1743
+ >>> # This can be run directly as a script
1744
+ >>> # python -m edsl.results.results
1745
+ >>> # It will create example results and show filtering and mutation
1746
+ """
1589
1747
  from ..results import Results
1590
1748
 
1591
1749
  results = Results.example(debug=True)
@@ -1595,4 +1753,5 @@ def main(): # pragma: no cover
1595
1753
 
1596
1754
  if __name__ == "__main__":
1597
1755
  import doctest
1756
+
1598
1757
  doctest.testmod(optionflags=doctest.ELLIPSIS)