edsl 0.1.50__py3-none-any.whl → 0.1.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. edsl/__init__.py +45 -34
  2. edsl/__version__.py +1 -1
  3. edsl/base/base_exception.py +2 -2
  4. edsl/buckets/bucket_collection.py +1 -1
  5. edsl/buckets/exceptions.py +32 -0
  6. edsl/buckets/token_bucket_api.py +26 -10
  7. edsl/caching/cache.py +5 -2
  8. edsl/caching/remote_cache_sync.py +5 -5
  9. edsl/caching/sql_dict.py +12 -11
  10. edsl/config/__init__.py +1 -1
  11. edsl/config/config_class.py +4 -2
  12. edsl/conversation/Conversation.py +9 -5
  13. edsl/conversation/car_buying.py +1 -3
  14. edsl/conversation/mug_negotiation.py +2 -6
  15. edsl/coop/__init__.py +11 -8
  16. edsl/coop/coop.py +15 -13
  17. edsl/coop/coop_functions.py +1 -1
  18. edsl/coop/ep_key_handling.py +1 -1
  19. edsl/coop/price_fetcher.py +2 -2
  20. edsl/coop/utils.py +2 -2
  21. edsl/dataset/dataset.py +144 -63
  22. edsl/dataset/dataset_operations_mixin.py +14 -6
  23. edsl/dataset/dataset_tree.py +3 -3
  24. edsl/dataset/display/table_renderers.py +6 -3
  25. edsl/dataset/file_exports.py +4 -4
  26. edsl/dataset/r/ggplot.py +3 -3
  27. edsl/inference_services/available_model_fetcher.py +2 -2
  28. edsl/inference_services/data_structures.py +5 -5
  29. edsl/inference_services/inference_service_abc.py +1 -1
  30. edsl/inference_services/inference_services_collection.py +1 -1
  31. edsl/inference_services/service_availability.py +3 -3
  32. edsl/inference_services/services/azure_ai.py +3 -3
  33. edsl/inference_services/services/google_service.py +1 -1
  34. edsl/inference_services/services/test_service.py +1 -1
  35. edsl/instructions/change_instruction.py +5 -4
  36. edsl/instructions/instruction.py +1 -0
  37. edsl/instructions/instruction_collection.py +5 -4
  38. edsl/instructions/instruction_handler.py +10 -8
  39. edsl/interviews/answering_function.py +20 -21
  40. edsl/interviews/exception_tracking.py +3 -2
  41. edsl/interviews/interview.py +1 -1
  42. edsl/interviews/interview_status_dictionary.py +1 -1
  43. edsl/interviews/interview_task_manager.py +7 -4
  44. edsl/interviews/request_token_estimator.py +3 -2
  45. edsl/interviews/statistics.py +2 -2
  46. edsl/invigilators/invigilators.py +34 -6
  47. edsl/jobs/__init__.py +39 -2
  48. edsl/jobs/async_interview_runner.py +1 -1
  49. edsl/jobs/check_survey_scenario_compatibility.py +5 -5
  50. edsl/jobs/data_structures.py +2 -2
  51. edsl/jobs/html_table_job_logger.py +494 -257
  52. edsl/jobs/jobs.py +2 -2
  53. edsl/jobs/jobs_checks.py +5 -5
  54. edsl/jobs/jobs_component_constructor.py +2 -2
  55. edsl/jobs/jobs_pricing_estimation.py +1 -1
  56. edsl/jobs/jobs_runner_asyncio.py +2 -2
  57. edsl/jobs/jobs_status_enums.py +1 -0
  58. edsl/jobs/remote_inference.py +47 -13
  59. edsl/jobs/results_exceptions_handler.py +2 -2
  60. edsl/language_models/language_model.py +151 -145
  61. edsl/notebooks/__init__.py +24 -1
  62. edsl/notebooks/exceptions.py +82 -0
  63. edsl/notebooks/notebook.py +7 -3
  64. edsl/notebooks/notebook_to_latex.py +1 -1
  65. edsl/prompts/__init__.py +23 -2
  66. edsl/prompts/prompt.py +1 -1
  67. edsl/questions/__init__.py +4 -4
  68. edsl/questions/answer_validator_mixin.py +0 -5
  69. edsl/questions/compose_questions.py +2 -2
  70. edsl/questions/descriptors.py +1 -1
  71. edsl/questions/question_base.py +32 -3
  72. edsl/questions/question_base_prompts_mixin.py +4 -4
  73. edsl/questions/question_budget.py +503 -102
  74. edsl/questions/question_check_box.py +658 -156
  75. edsl/questions/question_dict.py +176 -2
  76. edsl/questions/question_extract.py +401 -61
  77. edsl/questions/question_free_text.py +77 -9
  78. edsl/questions/question_functional.py +118 -9
  79. edsl/questions/{derived/question_likert_five.py → question_likert_five.py} +2 -2
  80. edsl/questions/{derived/question_linear_scale.py → question_linear_scale.py} +3 -4
  81. edsl/questions/question_list.py +246 -26
  82. edsl/questions/question_matrix.py +586 -73
  83. edsl/questions/question_multiple_choice.py +213 -47
  84. edsl/questions/question_numerical.py +360 -29
  85. edsl/questions/question_rank.py +401 -124
  86. edsl/questions/question_registry.py +3 -3
  87. edsl/questions/{derived/question_top_k.py → question_top_k.py} +3 -3
  88. edsl/questions/{derived/question_yes_no.py → question_yes_no.py} +3 -4
  89. edsl/questions/register_questions_meta.py +2 -1
  90. edsl/questions/response_validator_abc.py +6 -2
  91. edsl/questions/response_validator_factory.py +10 -12
  92. edsl/results/report.py +1 -1
  93. edsl/results/result.py +7 -4
  94. edsl/results/results.py +500 -271
  95. edsl/results/results_selector.py +2 -2
  96. edsl/scenarios/construct_download_link.py +3 -3
  97. edsl/scenarios/scenario.py +1 -2
  98. edsl/scenarios/scenario_list.py +41 -23
  99. edsl/surveys/survey_css.py +3 -3
  100. edsl/surveys/survey_simulator.py +2 -1
  101. edsl/tasks/__init__.py +22 -2
  102. edsl/tasks/exceptions.py +72 -0
  103. edsl/tasks/task_history.py +48 -11
  104. edsl/templates/error_reporting/base.html +37 -4
  105. edsl/templates/error_reporting/exceptions_table.html +105 -33
  106. edsl/templates/error_reporting/interview_details.html +130 -126
  107. edsl/templates/error_reporting/overview.html +21 -25
  108. edsl/templates/error_reporting/report.css +215 -46
  109. edsl/templates/error_reporting/report.js +122 -20
  110. edsl/tokens/__init__.py +27 -1
  111. edsl/tokens/exceptions.py +37 -0
  112. edsl/tokens/interview_token_usage.py +3 -2
  113. edsl/tokens/token_usage.py +4 -3
  114. {edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/METADATA +1 -1
  115. {edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/RECORD +118 -116
  116. edsl/questions/derived/__init__.py +0 -0
  117. {edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/LICENSE +0 -0
  118. {edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/WHEEL +0 -0
  119. {edsl-0.1.50.dist-info → edsl-0.1.52.dist-info}/entry_points.txt +0 -0
edsl/results/results.py CHANGED
@@ -1,5 +1,4 @@
1
- """
2
- The Results module provides tools for working with collections of Result objects.
1
+ """The Results module provides tools for working with collections of Result objects.
3
2
 
4
3
  The Results class is the primary container for analyzing and manipulating data obtained
5
4
  from running surveys with language models. It implements a powerful data analysis interface
@@ -45,10 +44,10 @@ from typing import Optional, Callable, Any, Union, List, TYPE_CHECKING
45
44
  from bisect import bisect_left
46
45
 
47
46
  from ..base import Base
47
+ from ..caching import Cache, CacheEntry
48
48
 
49
49
  if TYPE_CHECKING:
50
50
  from ..surveys import Survey
51
- from ..data import Cache
52
51
  from ..agents import AgentList
53
52
  from ..scenarios import ScenarioList
54
53
  from ..results import Result
@@ -70,23 +69,43 @@ from .exceptions import (
70
69
  ResultsDeserializationError,
71
70
  )
72
71
 
72
+
73
73
  def ensure_fetched(method):
74
- """A decorator that checks if remote data is loaded, and if not, attempts to fetch it."""
74
+ """A decorator that checks if remote data is loaded, and if not, attempts to fetch it.
75
+
76
+ Args:
77
+ method: The method to decorate.
78
+
79
+ Returns:
80
+ The wrapped method that will ensure data is fetched before execution.
81
+ """
82
+
75
83
  def wrapper(self, *args, **kwargs):
76
84
  if not self._fetched:
77
85
  # If not fetched, try fetching now.
78
86
  # (If you know you have job info stored in self.job_info)
79
87
  self.fetch_remote(self.job_info)
80
88
  return method(self, *args, **kwargs)
89
+
81
90
  return wrapper
82
91
 
92
+
83
93
  def ensure_ready(method):
84
- """
85
- Decorator for Results methods.
86
-
94
+ """Decorator for Results methods to handle not-ready state.
95
+
87
96
  If the Results object is not ready, for most methods we return a NotReadyObject.
88
97
  However, for __repr__ (and other methods that need to return a string), we return
89
98
  the string representation of NotReadyObject.
99
+
100
+ Args:
101
+ method: The method to decorate.
102
+
103
+ Returns:
104
+ The wrapped method that will handle not-ready Results objects appropriately.
105
+
106
+ Raises:
107
+ Exception: Any exception from fetch_remote will be caught and printed.
108
+
90
109
  """
91
110
  from functools import wraps
92
111
 
@@ -101,7 +120,7 @@ def ensure_ready(method):
101
120
  except Exception as e:
102
121
  print(f"Error during fetch_remote in {method.__name__}: {e}")
103
122
  if not self.completed:
104
- not_ready = NotReadyObject(name = method.__name__, job_info = self.job_info)
123
+ not_ready = NotReadyObject(name=method.__name__, job_info=self.job_info)
105
124
  # For __repr__, ensure we return a string
106
125
  if method.__name__ == "__repr__" or method.__name__ == "__str__":
107
126
  return not_ready.__repr__()
@@ -110,59 +129,115 @@ def ensure_ready(method):
110
129
 
111
130
  return wrapper
112
131
 
132
+
113
133
  class NotReadyObject:
114
- """A placeholder object that prints a message when any attribute is accessed."""
115
- def __init__(self, name: str, job_info: 'Any'):
134
+ """A placeholder object that indicates results are not ready yet.
135
+
136
+ This class returns itself for all attribute accesses and method calls,
137
+ displaying a message about the job's running status when represented as a string.
138
+
139
+ Attributes:
140
+ name: The name of the method that was originally called.
141
+ job_info: Information about the running job.
142
+
143
+ """
144
+
145
+ def __init__(self, name: str, job_info: "Any"):
146
+ """Initialize a NotReadyObject.
147
+
148
+ Args:
149
+ name: The name of the method that was attempted to be called.
150
+ job_info: Information about the running job.
151
+ """
116
152
  self.name = name
117
153
  self.job_info = job_info
118
- #print(f"Not ready to call {name}")
154
+ # print(f"Not ready to call {name}")
119
155
 
120
156
  def __repr__(self):
157
+ """Generate a string representation showing the job is still running.
158
+
159
+ Returns:
160
+ str: A message indicating the job is still running, along with job details.
161
+ """
121
162
  message = """Results not ready - job still running on server."""
122
163
  for key, value in self.job_info.creation_data.items():
123
164
  message += f"\n{key}: {value}"
124
165
  return message
125
166
 
126
167
  def __getattr__(self, _):
168
+ """Return self for any attribute access.
169
+
170
+ Args:
171
+ _: The attribute name (ignored).
172
+
173
+ Returns:
174
+ NotReadyObject: Returns self for chaining.
175
+ """
127
176
  return self
128
-
177
+
129
178
  def __call__(self, *args, **kwargs):
179
+ """Return self when called as a function.
180
+
181
+ Args:
182
+ *args: Positional arguments (ignored).
183
+ **kwargs: Keyword arguments (ignored).
184
+
185
+ Returns:
186
+ NotReadyObject: Returns self for chaining.
187
+ """
130
188
  return self
131
189
 
132
190
 
133
191
  class Results(UserList, ResultsOperationsMixin, Base):
134
- """
135
- A collection of Result objects with powerful data analysis capabilities.
136
-
192
+ """A collection of Result objects with powerful data analysis capabilities.
193
+
137
194
  The Results class is the primary container for working with data from EDSL surveys.
138
195
  It provides a rich set of methods for data analysis, transformation, and visualization
139
- inspired by data manipulation libraries like dplyr and pandas. The Results class
140
- implements a functional, fluent interface for data manipulation where each method
196
+ inspired by data manipulation libraries like dplyr and pandas. The Results class
197
+ implements a functional, fluent interface for data manipulation where each method
141
198
  returns a new Results object, allowing method chaining.
142
-
199
+
200
+ Attributes:
201
+ survey: The Survey object containing the questions used to generate results.
202
+ data: A list of Result objects containing the responses.
203
+ created_columns: A list of column names created through transformations.
204
+ cache: A Cache object for storing model responses.
205
+ completed: Whether the Results object is ready for use.
206
+ task_history: A TaskHistory object containing information about the tasks.
207
+ known_data_types: List of valid data type strings for accessing data.
208
+
143
209
  Key features:
144
-
145
- - List-like interface for accessing individual Result objects
146
- - Selection of specific data columns with `select()`
147
- - Filtering results with boolean expressions using `filter()`
148
- - Creating new derived columns with `mutate()`
149
- - Recoding values with `recode()` and `answer_truncate()`
150
- - Sorting results with `order_by()`
151
- - Converting to other formats (dataset, table, pandas DataFrame)
152
- - Serialization for storage and retrieval
153
- - Support for remote execution and result retrieval
154
-
210
+ - List-like interface for accessing individual Result objects
211
+ - Selection of specific data columns with `select()`
212
+ - Filtering results with boolean expressions using `filter()`
213
+ - Creating new derived columns with `mutate()`
214
+ - Recoding values with `recode()` and `answer_truncate()`
215
+ - Sorting results with `order_by()`
216
+ - Converting to other formats (dataset, table, pandas DataFrame)
217
+ - Serialization for storage and retrieval
218
+ - Support for remote execution and result retrieval
219
+
155
220
  Results objects have a hierarchical structure with the following components:
156
-
157
- 1. Each Results object contains multiple Result objects
158
- 2. Each Result object contains data organized by type (agent, scenario, model, answer, etc.)
159
- 3. Each data type contains multiple attributes (e.g., "how_feeling" in the answer type)
160
-
221
+ 1. Each Results object contains multiple Result objects
222
+ 2. Each Result object contains data organized by type (agent, scenario, model, answer, etc.)
223
+ 3. Each data type contains multiple attributes (e.g., "how_feeling" in the answer type)
224
+
161
225
  You can access data in a Results object using dot notation (`answer.how_feeling`) or
162
226
  using just the attribute name if it's not ambiguous (`how_feeling`).
163
-
227
+
164
228
  The Results class also tracks "created columns" - new derived values that aren't
165
229
  part of the original data but were created through transformations.
230
+
231
+ Examples:
232
+ >>> # Create a simple Results object from example data
233
+ >>> r = Results.example()
234
+ >>> len(r) > 0 # Contains Result objects
235
+ True
236
+ >>> # Filter and transform data
237
+ >>> filtered = r.filter("how_feeling == 'Great'")
238
+ >>> # Access hierarchical data
239
+ >>> 'agent' in r.known_data_types
240
+ True
166
241
  """
167
242
 
168
243
  __documentation__ = "https://docs.expectedparrot.com/en/latest/results.html"
@@ -185,9 +260,28 @@ class Results(UserList, ResultsOperationsMixin, Base):
185
260
  ]
186
261
 
187
262
  @classmethod
188
- def from_job_info(cls, job_info: dict) -> Results:
189
- """
190
- Instantiate a `Results` object from a job info dictionary.
263
+ def from_job_info(cls, job_info: dict) -> "Results":
264
+ """Instantiate a Results object from a job info dictionary.
265
+
266
+ This method creates a Results object in a not-ready state that will
267
+ fetch its data from a remote source when methods are called on it.
268
+
269
+ Args:
270
+ job_info: Dictionary containing information about a remote job.
271
+
272
+ Returns:
273
+ Results: A new Results instance with completed=False that will
274
+ fetch remote data when needed.
275
+
276
+ Examples:
277
+ >>> # Create a job info dictionary
278
+ >>> job_info = {'job_uuid': '12345', 'creation_data': {'model': 'gpt-4'}}
279
+ >>> # Create a Results object from the job info
280
+ >>> results = Results.from_job_info(job_info)
281
+ >>> results.completed
282
+ False
283
+ >>> hasattr(results, 'job_info')
284
+ True
191
285
  """
192
286
  results = cls()
193
287
  results.completed = False
@@ -204,14 +298,37 @@ class Results(UserList, ResultsOperationsMixin, Base):
204
298
  total_results: Optional[int] = None,
205
299
  task_history: Optional[TaskHistory] = None,
206
300
  ):
207
- """Instantiate a `Results` object with a survey and a list of `Result` objects.
208
-
209
- :param survey: A Survey object.
210
- :param data: A list of Result objects.
211
- :param created_columns: A list of strings that are created columns.
212
- :param job_uuid: A string representing the job UUID.
213
- :param total_results: An integer representing the total number of results.
214
- :cache: A Cache object.
301
+ """Instantiate a Results object with a survey and a list of Result objects.
302
+
303
+ This initializes a completed Results object with the provided data.
304
+ For creating a not-ready Results object from a job info dictionary,
305
+ use the from_job_info class method instead.
306
+
307
+ Args:
308
+ survey: A Survey object containing the questions used to generate results.
309
+ data: A list of Result objects containing the responses.
310
+ created_columns: A list of column names created through transformations.
311
+ cache: A Cache object for storing model responses.
312
+ job_uuid: A string representing the job UUID.
313
+ total_results: An integer representing the total number of results.
314
+ task_history: A TaskHistory object containing information about the tasks.
315
+
316
+ Examples:
317
+ >>> from ..results import Result
318
+ >>> # Create an empty Results object
319
+ >>> r = Results()
320
+ >>> r.completed
321
+ True
322
+ >>> len(r.created_columns)
323
+ 0
324
+
325
+ >>> # Create a Results object with data
326
+ >>> from unittest.mock import Mock
327
+ >>> mock_survey = Mock()
328
+ >>> mock_result = Mock(spec=Result)
329
+ >>> r = Results(survey=mock_survey, data=[mock_result])
330
+ >>> len(r)
331
+ 1
215
332
  """
216
333
  self.completed = True
217
334
  self._fetching = False
@@ -230,19 +347,26 @@ class Results(UserList, ResultsOperationsMixin, Base):
230
347
  if hasattr(self, "_add_output_functions"):
231
348
  self._add_output_functions()
232
349
 
233
-
234
350
  def _fetch_list(self, data_type: str, key: str) -> list:
235
- """
236
- Return a list of values from the data for a given data type and key.
351
+ """Return a list of values from the data for a given data type and key.
237
352
 
238
353
  Uses the filtered data, not the original data.
239
354
 
240
- Example:
355
+ Args:
356
+ data_type: The type of data to fetch (e.g., 'answer', 'agent', 'scenario').
357
+ key: The key to fetch from each data type dictionary.
241
358
 
242
- >>> from edsl.results import Results
243
- >>> r = Results.example()
244
- >>> r._fetch_list('answer', 'how_feeling')
245
- ['OK', 'Great', 'Terrible', 'OK']
359
+ Returns:
360
+ list: A list of values, one from each result in the data.
361
+
362
+ Examples:
363
+ >>> from edsl.results import Results
364
+ >>> r = Results.example()
365
+ >>> values = r._fetch_list('answer', 'how_feeling')
366
+ >>> len(values) == len(r)
367
+ True
368
+ >>> all(isinstance(v, (str, type(None))) for v in values)
369
+ True
246
370
  """
247
371
  returned_list = []
248
372
  for row in self.data:
@@ -250,6 +374,25 @@ class Results(UserList, ResultsOperationsMixin, Base):
250
374
 
251
375
  return returned_list
252
376
 
377
+ def get_answers(self, question_name: str) -> list:
378
+ """Get the answers for a given question name.
379
+
380
+ Args:
381
+ question_name: The name of the question to fetch answers for.
382
+
383
+ Returns:
384
+ list: A list of answers, one from each result in the data.
385
+
386
+ Examples:
387
+ >>> from edsl.results import Results
388
+ >>> r = Results.example()
389
+ >>> answers = r.get_answers('how_feeling')
390
+ >>> isinstance(answers, list)
391
+ True
392
+ >>> len(answers) == len(r)
393
+ True
394
+ """
395
+ return self._fetch_list("answer", question_name)
253
396
 
254
397
  def _summary(self) -> dict:
255
398
  import reprlib
@@ -301,8 +444,23 @@ class Results(UserList, ResultsOperationsMixin, Base):
301
444
  self.insert(item)
302
445
 
303
446
  def compute_job_cost(self, include_cached_responses_in_cost: bool = False) -> float:
304
- """
305
- Computes the cost of a completed job in USD.
447
+ """Compute the cost of a completed job in USD.
448
+
449
+ This method calculates the total cost of all model responses in the results.
450
+ By default, it only counts the cost of responses that were not cached.
451
+
452
+ Args:
453
+ include_cached_responses_in_cost: Whether to include the cost of cached
454
+ responses in the total. Defaults to False.
455
+
456
+ Returns:
457
+ float: The total cost in USD.
458
+
459
+ Examples:
460
+ >>> from edsl.results import Results
461
+ >>> r = Results.example()
462
+ >>> r.compute_job_cost()
463
+ 0
306
464
  """
307
465
  total_cost = 0
308
466
  for result in self:
@@ -321,88 +479,55 @@ class Results(UserList, ResultsOperationsMixin, Base):
321
479
 
322
480
  return total_cost
323
481
 
324
- # def leaves(self):
325
- # leaves = []
326
- # for result in self:
327
- # leaves.extend(result.leaves())
328
- # return leaves
329
-
330
- # def tree(self, node_list: Optional[List[str]] = None):
331
- # return self.to_scenario_list().tree(node_list)
332
-
333
- # def interactive_tree(
334
- # self,
335
- # fold_attributes: Optional[List[str]] = None,
336
- # drop: Optional[List[str]] = None,
337
- # open_file=True,
338
- # ) -> dict:
339
- # """Return the results as a tree."""
340
- # from edsl.results.tree_explore import FoldableHTMLTableGenerator
341
-
342
- # if drop is None:
343
- # drop = []
344
-
345
- # valid_attributes = [
346
- # "model",
347
- # "scenario",
348
- # "agent",
349
- # "answer",
350
- # "question",
351
- # "iteration",
352
- # ]
353
- # if fold_attributes is None:
354
- # fold_attributes = []
355
-
356
- # for attribute in fold_attributes:
357
- # if attribute not in valid_attributes:
358
- # raise ValueError(
359
- # f"Invalid fold attribute: {attribute}; must be in {valid_attributes}"
360
- # )
361
- # data = self.leaves()
362
- # generator = FoldableHTMLTableGenerator(data)
363
- # tree = generator.tree(fold_attributes=fold_attributes, drop=drop)
364
- # html_content = generator.generate_html(tree, fold_attributes)
365
- # import tempfile
366
- # from edsl.utilities.utilities import is_notebook
367
-
368
- # from IPython.display import display, HTML
369
-
370
- # if is_notebook():
371
- # import html
372
- # from IPython.display import display, HTML
373
-
374
- # height = 1000
375
- # width = 1000
376
- # escaped_output = html.escape(html_content)
377
- # # escaped_output = rendered_html
378
- # iframe = f""""
379
- # <iframe srcdoc="{ escaped_output }" style="width: {width}px; height: {height}px;"></iframe>
380
- # """
381
- # display(HTML(iframe))
382
- # return None
383
-
384
- # with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
385
- # f.write(html_content.encode())
386
- # print(f"HTML file has been generated: {f.name}")
387
-
388
- # if open_file:
389
- # import webbrowser
390
- # import time
391
-
392
- # time.sleep(1) # Wait for 1 second
393
- # # webbrowser.open(f.name)
394
- # import os
395
-
396
- # filename = f.name
397
- # webbrowser.open(f"file://{os.path.abspath(filename)}")
398
-
399
- # else:
400
- # return html_content
401
-
402
482
  def code(self):
403
- raise NotImplementedError
483
+ """Method for generating code representations.
484
+
485
+ Raises:
486
+ ResultsError: This method is not implemented for Results objects.
487
+
488
+ Examples:
489
+ >>> from edsl.results import Results
490
+ >>> r = Results.example()
491
+ >>> try:
492
+ ... r.code()
493
+ ... except ResultsError as e:
494
+ ... str(e).startswith("The code() method is not implemented")
495
+ True
496
+ """
497
+ raise ResultsError("The code() method is not implemented for Results objects")
404
498
 
405
499
  def __getitem__(self, i):
500
+ """Get an item from the Results object by index, slice, or key.
501
+
502
+ Args:
503
+ i: An integer index, a slice, or a string key.
504
+
505
+ Returns:
506
+ The requested item, slice of results, or dictionary value.
507
+
508
+ Raises:
509
+ ResultsError: If the argument type is invalid for indexing.
510
+
511
+ Examples:
512
+ >>> from edsl.results import Results
513
+ >>> r = Results.example()
514
+ >>> # Get by integer index
515
+ >>> result = r[0]
516
+ >>> # Get by slice
517
+ >>> subset = r[0:2]
518
+ >>> len(subset) == 2
519
+ True
520
+ >>> # Get by string key
521
+ >>> data = r["data"]
522
+ >>> isinstance(data, list)
523
+ True
524
+ >>> # Invalid index type
525
+ >>> try:
526
+ ... r[1.5]
527
+ ... except ResultsError:
528
+ ... True
529
+ True
530
+ """
406
531
  if isinstance(i, int):
407
532
  return self.data[i]
408
533
 
@@ -412,18 +537,40 @@ class Results(UserList, ResultsOperationsMixin, Base):
412
537
  if isinstance(i, str):
413
538
  return self.to_dict()[i]
414
539
 
415
- raise TypeError("Invalid argument type")
540
+ raise ResultsError("Invalid argument type for indexing Results object")
416
541
 
417
542
  def __add__(self, other: Results) -> Results:
418
543
  """Add two Results objects together.
419
- They must have the same survey and created columns.
420
- :param other: A Results object.
421
544
 
422
- Example:
545
+ Combines two Results objects into a new one. Both objects must have the same
546
+ survey and created columns.
423
547
 
424
- >>> r = Results.example()
425
- >>> r2 = Results.example()
426
- >>> r3 = r + r2
548
+ Args:
549
+ other: A Results object to add to this one.
550
+
551
+ Returns:
552
+ A new Results object containing data from both objects.
553
+
554
+ Raises:
555
+ ResultsError: If the surveys or created columns of the two objects don't match.
556
+
557
+ Examples:
558
+ >>> from edsl.results import Results
559
+ >>> r1 = Results.example()
560
+ >>> r2 = Results.example()
561
+ >>> # Combine two Results objects
562
+ >>> r3 = r1 + r2
563
+ >>> len(r3) == len(r1) + len(r2)
564
+ True
565
+
566
+ >>> # Attempting to add incompatible Results
567
+ >>> from unittest.mock import Mock
568
+ >>> r4 = Results(survey=Mock()) # Different survey
569
+ >>> try:
570
+ ... r1 + r4
571
+ ... except ResultsError:
572
+ ... True
573
+ True
427
574
  """
428
575
  if self.survey != other.survey:
429
576
  raise ResultsError(
@@ -439,21 +586,17 @@ class Results(UserList, ResultsOperationsMixin, Base):
439
586
  data=self.data + other.data,
440
587
  created_columns=self.created_columns,
441
588
  )
442
-
589
+
443
590
  def _repr_html_(self):
444
591
  if not self.completed:
445
592
  if hasattr(self, "job_info"):
446
593
  self.fetch_remote(self.job_info)
447
-
594
+
448
595
  if not self.completed:
449
596
  return "Results not ready to call"
450
-
597
+
451
598
  return super()._repr_html_()
452
599
 
453
- # @ensure_ready
454
- # def __str__(self):
455
- # super().__str__()
456
-
457
600
  @ensure_ready
458
601
  def __repr__(self) -> str:
459
602
  return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
@@ -495,8 +638,8 @@ class Results(UserList, ResultsOperationsMixin, Base):
495
638
  print_parameters=print_parameters,
496
639
  )
497
640
  )
498
-
499
- def to_dataset(self) -> 'Dataset':
641
+
642
+ def to_dataset(self) -> "Dataset":
500
643
  return self.select()
501
644
 
502
645
  def to_dict(
@@ -540,7 +683,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
540
683
  d.update({"task_history": self.task_history.to_dict()})
541
684
 
542
685
  if add_edsl_version:
543
- from edsl import __version__
686
+ from .. import __version__
544
687
 
545
688
  d["edsl_version"] = __version__
546
689
  d["edsl_class_name"] = "Results"
@@ -564,12 +707,41 @@ class Results(UserList, ResultsOperationsMixin, Base):
564
707
  "b_not_a": [other_results[i] for i in indices_other],
565
708
  }
566
709
 
710
+ def initialize_cache_from_results(self):
711
+ cache = Cache(data={})
712
+
713
+ for result in self.data:
714
+ for key in result.data["prompt"]:
715
+ if key.endswith("_system_prompt"):
716
+ question_name = key.removesuffix("_system_prompt")
717
+ system_prompt = result.data["prompt"][key].text
718
+ user_key = f"{question_name}_user_prompt"
719
+ if user_key in result.data["prompt"]:
720
+ user_prompt = result.data["prompt"][user_key].text
721
+ else:
722
+ user_prompt = ""
723
+
724
+ # Get corresponding model response
725
+ response_key = f"{question_name}_raw_model_response"
726
+ output = result.data["raw_model_response"].get(response_key, "")
727
+
728
+ entry = CacheEntry(
729
+ model=result.model.model,
730
+ parameters=result.model.parameters,
731
+ system_prompt=system_prompt,
732
+ user_prompt=user_prompt,
733
+ output=json.dumps(output),
734
+ iteration=0,
735
+ )
736
+ cache.data[entry.key] = entry
737
+
738
+ self.cache = cache
739
+
567
740
  @property
568
741
  def has_unfixed_exceptions(self) -> bool:
569
742
  return self.task_history.has_unfixed_exceptions
570
743
 
571
744
  def __hash__(self) -> int:
572
-
573
745
  return dict_hash(
574
746
  self.to_dict(sort=True, add_edsl_version=False, include_cache_info=False)
575
747
  )
@@ -695,7 +867,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
695
867
  ['agent.agent_index', ...]
696
868
  """
697
869
  column_names = [f"{v}.{k}" for k, v in self._key_to_data_type.items()]
698
- from edsl.utilities.PrettyList import PrettyList
870
+ from ..utilities.PrettyList import PrettyList
699
871
 
700
872
  return PrettyList(sorted(column_names))
701
873
 
@@ -709,7 +881,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
709
881
  >>> r.answer_keys
710
882
  {'how_feeling': 'How are you this {{ period }}?', 'how_feeling_yesterday': 'How were you feeling yesterday {{ period }}?'}
711
883
  """
712
- from edsl.utilities.utilities import shorten_string
884
+ from ..utilities.utilities import shorten_string
713
885
 
714
886
  if not self.survey:
715
887
  raise ResultsError("Survey is not defined so no answer keys are available.")
@@ -734,7 +906,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
734
906
  >>> r.agents
735
907
  AgentList([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'}), Agent(traits = {'status': 'Sad'})])
736
908
  """
737
- from edsl.agents import AgentList
909
+ from ..agents import AgentList
738
910
 
739
911
  return AgentList([r.agent for r in self.data])
740
912
 
@@ -845,7 +1017,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
845
1017
  return self.data[0]
846
1018
 
847
1019
  def answer_truncate(
848
- self, column: str, top_n: int = 5, new_var_name: str = None
1020
+ self, column: str, top_n: int = 5, new_var_name: Optional[str] = None
849
1021
  ) -> Results:
850
1022
  """Create a new variable that truncates the answers to the top_n.
851
1023
 
@@ -976,24 +1148,23 @@ class Results(UserList, ResultsOperationsMixin, Base):
976
1148
  def mutate(
977
1149
  self, new_var_string: str, functions_dict: Optional[dict] = None
978
1150
  ) -> Results:
979
- """
980
- Create a new column based on a computational expression.
981
-
1151
+ """Create a new column based on a computational expression.
1152
+
982
1153
  The mutate method allows you to create new derived variables based on existing data.
983
1154
  You provide an assignment expression where the left side is the new column name
984
1155
  and the right side is a Python expression that computes the value. The expression
985
1156
  can reference any existing columns in the Results object.
986
-
987
- Parameters:
988
- new_var_string: A string containing an assignment expression in the form
989
- "new_column_name = expression". The expression can reference
990
- any existing column and use standard Python syntax.
991
- functions_dict: Optional dictionary of custom functions that can be used in
992
- the expression. Keys are function names, values are function objects.
993
-
1157
+
1158
+ Args:
1159
+ new_var_string: A string containing an assignment expression in the form
1160
+ "new_column_name = expression". The expression can reference
1161
+ any existing column and use standard Python syntax.
1162
+ functions_dict: Optional dictionary of custom functions that can be used in
1163
+ the expression. Keys are function names, values are function objects.
1164
+
994
1165
  Returns:
995
1166
  A new Results object with the additional column.
996
-
1167
+
997
1168
  Notes:
998
1169
  - The expression must contain an equals sign (=) separating the new column name
999
1170
  from the computation expression
@@ -1002,22 +1173,22 @@ class Results(UserList, ResultsOperationsMixin, Base):
1002
1173
  - The expression can access any data in the Result object using the column names
1003
1174
  - New columns are added to the "answer" data type
1004
1175
  - Created columns are tracked in the `created_columns` property
1005
-
1176
+
1006
1177
  Examples:
1007
1178
  >>> r = Results.example()
1008
-
1009
- # Create a simple derived column
1179
+
1180
+ >>> # Create a simple derived column
1010
1181
  >>> r.mutate('how_feeling_x = how_feeling + "x"').select('how_feeling_x')
1011
1182
  Dataset([{'answer.how_feeling_x': ['OKx', 'Greatx', 'Terriblex', 'OKx']}])
1012
-
1013
- # Create a binary indicator column
1183
+
1184
+ >>> # Create a binary indicator column
1014
1185
  >>> r.mutate('is_great = 1 if how_feeling == "Great" else 0').select('is_great')
1015
1186
  Dataset([{'answer.is_great': [0, 1, 0, 0]}])
1016
-
1017
- # Create a column with custom functions
1187
+
1188
+ >>> # Create a column with custom functions
1018
1189
  >>> def sentiment(text):
1019
1190
  ... return len(text) > 5
1020
- >>> r.mutate('is_long = sentiment(how_feeling)',
1191
+ >>> r.mutate('is_long = sentiment(how_feeling)',
1021
1192
  ... functions_dict={'sentiment': sentiment}).select('is_long')
1022
1193
  Dataset([{'answer.is_long': [False, False, True, False]}])
1023
1194
  """
@@ -1028,7 +1199,7 @@ class Results(UserList, ResultsOperationsMixin, Base):
1028
1199
  )
1029
1200
  raw_var_name, expression = new_var_string.split("=", 1)
1030
1201
  var_name = raw_var_name.strip()
1031
- from edsl.utilities.utilities import is_valid_variable_name
1202
+ from ..utilities.utilities import is_valid_variable_name
1032
1203
 
1033
1204
  if not is_valid_variable_name(var_name):
1034
1205
  raise ResultsInvalidNameError(f"{var_name} is not a valid variable name.")
@@ -1116,10 +1287,14 @@ class Results(UserList, ResultsOperationsMixin, Base):
1116
1287
  random.seed(seed)
1117
1288
 
1118
1289
  if n is None and frac is None:
1119
- raise Exception("You must specify either n or frac.")
1290
+ from .exceptions import ResultsError
1291
+
1292
+ raise ResultsError("You must specify either n or frac.")
1120
1293
 
1121
1294
  if n is not None and frac is not None:
1122
- raise Exception("You cannot specify both n and frac.")
1295
+ from .exceptions import ResultsError
1296
+
1297
+ raise ResultsError("You cannot specify both n and frac.")
1123
1298
 
1124
1299
  if frac is not None and n is None:
1125
1300
  n = int(frac * len(self.data))
@@ -1132,61 +1307,62 @@ class Results(UserList, ResultsOperationsMixin, Base):
1132
1307
  return Results(survey=self.survey, data=new_data, created_columns=None)
1133
1308
 
1134
1309
  @ensure_ready
1135
- def select(self, *columns: Union[str, list[str]]) -> 'Dataset':
1136
- """
1137
- Extract specific columns from the Results into a Dataset.
1138
-
1310
+ def select(self, *columns: Union[str, list[str]]) -> "Dataset":
1311
+ """Extract specific columns from the Results into a Dataset.
1312
+
1139
1313
  This method allows you to select specific columns from the Results object
1140
1314
  and transforms the data into a Dataset for further analysis and visualization.
1141
1315
  A Dataset is a more general-purpose data structure optimized for analysis
1142
1316
  operations rather than the hierarchical structure of Result objects.
1143
-
1144
- Parameters:
1317
+
1318
+ Args:
1145
1319
  *columns: Column names to select. Each column can be:
1146
- - A simple attribute name (e.g., "how_feeling")
1147
- - A fully qualified name with type (e.g., "answer.how_feeling")
1148
- - A wildcard pattern (e.g., "answer.*" to select all answer fields)
1149
- If no columns are provided, selects all data.
1150
-
1320
+ - A simple attribute name (e.g., "how_feeling")
1321
+ - A fully qualified name with type (e.g., "answer.how_feeling")
1322
+ - A wildcard pattern (e.g., "answer.*" to select all answer fields)
1323
+ If no columns are provided, selects all data.
1324
+
1151
1325
  Returns:
1152
1326
  A Dataset object containing the selected data.
1153
-
1327
+
1154
1328
  Notes:
1155
1329
  - Column names are automatically disambiguated if needed
1156
1330
  - When column names are ambiguous, specify the full path with data type
1157
1331
  - You can use wildcard patterns with "*" to select multiple related fields
1158
1332
  - Selecting with no arguments returns all data
1159
1333
  - Results are restructured in a columnar format in the Dataset
1160
-
1334
+
1161
1335
  Examples:
1162
1336
  >>> results = Results.example()
1163
-
1164
- # Select a single column by name
1337
+
1338
+ >>> # Select a single column by name
1165
1339
  >>> results.select('how_feeling')
1166
1340
  Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
1167
-
1168
- # Select multiple columns
1341
+
1342
+ >>> # Select multiple columns
1169
1343
  >>> ds = results.select('how_feeling', 'how_feeling_yesterday')
1170
1344
  >>> sorted([list(d.keys())[0] for d in ds])
1171
1345
  ['answer.how_feeling', 'answer.how_feeling_yesterday']
1172
-
1173
- # Using fully qualified names with data type
1346
+
1347
+ >>> # Using fully qualified names with data type
1174
1348
  >>> results.select('answer.how_feeling')
1175
1349
  Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
1176
-
1177
- # Using partial matching for column names
1350
+
1351
+ >>> # Using partial matching for column names
1178
1352
  >>> results.select('answer.how_feeling_y')
1179
1353
  Dataset([{'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
1180
-
1181
- # Select all columns (same as calling select with no arguments)
1182
- >>> results.select('*.*')
1354
+
1355
+ >>> # Select all columns (same as calling select with no arguments)
1356
+ >>> results.select('*.*')
1183
1357
  Dataset([...])
1184
1358
  """
1185
1359
 
1186
- from edsl.results.results_selector import Selector
1360
+ from .results_selector import Selector
1187
1361
 
1188
1362
  if len(self) == 0:
1189
- raise Exception("No data to select from---the Results object is empty.")
1363
+ from .exceptions import ResultsError
1364
+
1365
+ raise ResultsError("No data to select from---the Results object is empty.")
1190
1366
 
1191
1367
  selector = Selector(
1192
1368
  known_data_types=self.known_data_types,
@@ -1250,21 +1426,24 @@ class Results(UserList, ResultsOperationsMixin, Base):
1250
1426
 
1251
1427
  @ensure_ready
1252
1428
  def filter(self, expression: str) -> Results:
1253
- """
1254
- Filter results based on a boolean expression.
1255
-
1429
+ """Filter results based on a boolean expression.
1430
+
1256
1431
  This method evaluates a boolean expression against each Result object in the
1257
1432
  collection and returns a new Results object containing only those that match.
1258
1433
  The expression can reference any column in the data and supports standard
1259
1434
  Python operators and syntax.
1260
-
1261
- Parameters:
1435
+
1436
+ Args:
1262
1437
  expression: A string containing a Python expression that evaluates to a boolean.
1263
1438
  The expression is applied to each Result object individually.
1264
-
1439
+
1265
1440
  Returns:
1266
1441
  A new Results object containing only the Result objects that satisfy the expression.
1267
-
1442
+
1443
+ Raises:
1444
+ ResultsFilterError: If the expression is invalid or uses improper syntax
1445
+ (like using '=' instead of '==').
1446
+
1268
1447
  Notes:
1269
1448
  - Column names can be specified with or without their data type prefix
1270
1449
  (e.g., both "how_feeling" and "answer.how_feeling" work if unambiguous)
@@ -1273,23 +1452,23 @@ class Results(UserList, ResultsOperationsMixin, Base):
1273
1452
  - You can use comparison operators like '==', '!=', '>', '<', '>=', '<='
1274
1453
  - You can use membership tests with 'in'
1275
1454
  - You can use string methods like '.startswith()', '.contains()', etc.
1276
-
1455
+
1277
1456
  Examples:
1278
1457
  >>> r = Results.example()
1279
-
1280
- # Simple equality filter
1458
+
1459
+ >>> # Simple equality filter
1281
1460
  >>> r.filter("how_feeling == 'Great'").select('how_feeling')
1282
1461
  Dataset([{'answer.how_feeling': ['Great']}])
1283
-
1284
- # Using OR condition
1462
+
1463
+ >>> # Using OR condition
1285
1464
  >>> r.filter("how_feeling == 'Great' or how_feeling == 'Terrible'").select('how_feeling')
1286
1465
  Dataset([{'answer.how_feeling': ['Great', 'Terrible']}])
1287
-
1288
- # Filter on agent properties
1466
+
1467
+ >>> # Filter on agent properties
1289
1468
  >>> r.filter("agent.status == 'Joyful'").select('agent.status')
1290
1469
  Dataset([{'agent.status': ['Joyful', 'Joyful']}])
1291
-
1292
- # Common error: using = instead of ==
1470
+
1471
+ >>> # Common error: using = instead of ==
1293
1472
  >>> try:
1294
1473
  ... r.filter("how_feeling = 'Great'")
1295
1474
  ... except Exception as e:
@@ -1394,45 +1573,58 @@ class Results(UserList, ResultsOperationsMixin, Base):
1394
1573
  [1, 1, 0, 0]
1395
1574
  """
1396
1575
  return [r.score(f) for r in self.data]
1397
-
1576
+
1398
1577
  def score_with_answer_key(self, answer_key: dict) -> list:
1399
1578
  """Score the results using an answer key.
1400
1579
 
1401
1580
  :param answer_key: A dictionary that maps answer values to scores.
1402
1581
  """
1403
1582
  return [r.score_with_answer_key(answer_key) for r in self.data]
1404
-
1405
1583
 
1406
1584
  def fetch_remote(self, job_info: Any) -> None:
1407
- """
1408
- Fetches the remote Results object using the provided RemoteJobInfo and updates this instance with the remote data.
1409
-
1410
- This is useful when you have a Results object that was created locally but want to sync it with
1585
+ """Fetch remote Results object and update this instance with the data.
1586
+
1587
+ This is useful when you have a Results object that was created locally but want to sync it with
1411
1588
  the latest data from the remote server.
1412
-
1589
+
1413
1590
  Args:
1414
1591
  job_info: RemoteJobInfo object containing the job_uuid and other remote job details
1415
-
1592
+
1593
+ Returns:
1594
+ bool: True if the fetch was successful, False if the job is not yet completed.
1595
+
1596
+ Raises:
1597
+ ResultsError: If there's an error during the fetch process.
1598
+
1599
+ Examples:
1600
+ >>> # This is a simplified example since we can't actually test this without a remote server
1601
+ >>> from unittest.mock import Mock, patch
1602
+ >>> # Create a mock job_info and Results
1603
+ >>> job_info = Mock()
1604
+ >>> job_info.job_uuid = "test_uuid"
1605
+ >>> results = Results()
1606
+ >>> # In a real scenario:
1607
+ >>> # results.fetch_remote(job_info)
1608
+ >>> # results.completed # Would be True if successful
1416
1609
  """
1417
- #print("Calling fetch_remote")
1418
1610
  try:
1419
1611
  from ..coop import Coop
1420
1612
  from ..jobs import JobsRemoteInferenceHandler
1421
-
1613
+
1422
1614
  # Get the remote job data
1423
1615
  remote_job_data = JobsRemoteInferenceHandler.check_status(job_info.job_uuid)
1424
-
1616
+
1425
1617
  if remote_job_data.get("status") not in ["completed", "failed"]:
1426
1618
  return False
1427
- #
1619
+ #
1428
1620
  results_uuid = remote_job_data.get("results_uuid")
1429
1621
  if not results_uuid:
1430
1622
  raise ResultsError("No results_uuid found in remote job data")
1431
-
1623
+
1432
1624
  # Fetch the remote Results object
1433
1625
  coop = Coop()
1434
1626
  remote_results = coop.get(results_uuid, expected_object_type="results")
1435
-
1627
+
1436
1628
  # Update this instance with remote data
1437
1629
  self.data = remote_results.data
1438
1630
  self.survey = remote_results.survey
@@ -1440,10 +1632,10 @@ class Results(UserList, ResultsOperationsMixin, Base):
1440
1632
  self.cache = remote_results.cache
1441
1633
  self.task_history = remote_results.task_history
1442
1634
  self.completed = True
1443
-
1635
+
1444
1636
  # Set job_uuid and results_uuid from remote data
1445
1637
  self.job_uuid = job_info.job_uuid
1446
- if hasattr(remote_results, 'results_uuid'):
1638
+ if hasattr(remote_results, "results_uuid"):
1447
1639
  self.results_uuid = remote_results.results_uuid
1448
1640
 
1449
1641
  return True
@@ -1451,39 +1643,60 @@ class Results(UserList, ResultsOperationsMixin, Base):
1451
1643
  except Exception as e:
1452
1644
  raise ResultsError(f"Failed to fetch remote results: {str(e)}")
1453
1645
 
1454
- def fetch(self, polling_interval: [float, int] = 1.0) -> Results:
1455
- """
1456
- Polls the server for job completion and updates this Results instance with the completed data.
1457
-
1646
+ def fetch(self, polling_interval: Union[float, int] = 1.0) -> Results:
1647
+ """Poll the server for job completion and update this Results instance.
1648
+
1649
+ This method continuously polls the remote server until the job is completed or
1650
+ fails, then updates this Results object with the final data.
1651
+
1458
1652
  Args:
1459
1653
  polling_interval: Number of seconds to wait between polling attempts (default: 1.0)
1460
-
1654
+
1461
1655
  Returns:
1462
1656
  self: The updated Results instance
1657
+
1658
+ Raises:
1659
+ ResultsError: If no job info is available or if there's an error during fetch.
1660
+
1661
+ Examples:
1662
+ >>> # This is a simplified example since we can't actually test polling
1663
+ >>> from unittest.mock import Mock, patch
1664
+ >>> # Create a mock results object
1665
+ >>> results = Results()
1666
+ >>> # In a real scenario with a running job:
1667
+ >>> # results.job_info = remote_job_info
1668
+ >>> # results.fetch() # Would poll until complete
1669
+ >>> # results.completed # Would be True if successful
1463
1670
  """
1464
1671
  if not hasattr(self, "job_info"):
1465
- raise ResultsError("No job info available - this Results object wasn't created from a remote job")
1466
-
1672
+ raise ResultsError(
1673
+ "No job info available - this Results object wasn't created from a remote job"
1674
+ )
1675
+
1467
1676
  from ..jobs import JobsRemoteInferenceHandler
1468
-
1677
+
1469
1678
  try:
1470
1679
  # Get the remote job data
1471
- remote_job_data = JobsRemoteInferenceHandler.check_status(self.job_info.job_uuid)
1472
-
1680
+ remote_job_data = JobsRemoteInferenceHandler.check_status(
1681
+ self.job_info.job_uuid
1682
+ )
1683
+
1473
1684
  while remote_job_data.get("status") not in ["completed", "failed"]:
1474
1685
  print("Waiting for remote job to complete...")
1475
1686
  import time
1687
+
1476
1688
  time.sleep(polling_interval)
1477
- remote_job_data = JobsRemoteInferenceHandler.check_status(self.job_info.job_uuid)
1478
-
1689
+ remote_job_data = JobsRemoteInferenceHandler.check_status(
1690
+ self.job_info.job_uuid
1691
+ )
1692
+
1479
1693
  # Once complete, fetch the full results
1480
1694
  self.fetch_remote(self.job_info)
1481
1695
  return self
1482
-
1696
+
1483
1697
  except Exception as e:
1484
1698
  raise ResultsError(f"Failed to fetch remote results: {str(e)}")
1485
1699
 
1486
-
1487
1700
  def spot_issues(self, models: Optional[ModelList] = None) -> Results:
1488
1701
  """Run a survey to spot issues and suggest improvements for prompts that had no model response, returning a new Results object.
1489
1702
  Future version: Allow user to optionally pass a list of questions to review, regardless of whether they had a null model response.
@@ -1494,57 +1707,72 @@ class Results(UserList, ResultsOperationsMixin, Base):
1494
1707
  from ..language_models import ModelList
1495
1708
  import pandas as pd
1496
1709
 
1497
- df = self.select("agent.*", "scenario.*", "answer.*", "raw_model_response.*", "prompt.*").to_pandas()
1710
+ df = self.select(
1711
+ "agent.*", "scenario.*", "answer.*", "raw_model_response.*", "prompt.*"
1712
+ ).to_pandas()
1498
1713
  scenario_list = []
1499
1714
 
1500
1715
  for _, row in df.iterrows():
1501
1716
  for col in df.columns:
1502
1717
  if col.endswith("_raw_model_response") and pd.isna(row[col]):
1503
- q = col.split("_raw_model_response")[0].replace("raw_model_response.", "")
1504
-
1505
- s = Scenario({
1506
- "original_question": q,
1507
- "original_agent_index": row["agent.agent_index"],
1508
- "original_scenario_index": row["scenario.scenario_index"],
1509
- "original_prompts": f"User prompt: {row[f'prompt.{q}_user_prompt']}\nSystem prompt: {row[f'prompt.{q}_system_prompt']}"
1510
- })
1511
-
1718
+ q = col.split("_raw_model_response")[0].replace(
1719
+ "raw_model_response.", ""
1720
+ )
1721
+
1722
+ s = Scenario(
1723
+ {
1724
+ "original_question": q,
1725
+ "original_agent_index": row["agent.agent_index"],
1726
+ "original_scenario_index": row["scenario.scenario_index"],
1727
+ "original_prompts": f"User prompt: {row[f'prompt.{q}_user_prompt']}\nSystem prompt: {row[f'prompt.{q}_system_prompt']}",
1728
+ }
1729
+ )
1730
+
1512
1731
  scenario_list.append(s)
1513
1732
 
1514
1733
  sl = ScenarioList(set(scenario_list))
1515
1734
 
1516
1735
  q1 = QuestionFreeText(
1517
- question_name = "issues",
1518
- question_text = """
1736
+ question_name="issues",
1737
+ question_text="""
1519
1738
  The following prompts generated a bad or null response: '{{ original_prompts }}'
1520
1739
  What do you think was the likely issue(s)?
1521
- """
1740
+ """,
1522
1741
  )
1523
1742
 
1524
1743
  q2 = QuestionDict(
1525
- question_name = "revised",
1526
- question_text = """
1744
+ question_name="revised",
1745
+ question_text="""
1527
1746
  The following prompts generated a bad or null response: '{{ original_prompts }}'
1528
1747
  You identified the issue(s) as '{{ issues.answer }}'.
1529
1748
  Please revise the prompts to address the issue(s).
1530
1749
  """,
1531
- answer_keys = ["revised_user_prompt", "revised_system_prompt"]
1750
+ answer_keys=["revised_user_prompt", "revised_system_prompt"],
1532
1751
  )
1533
1752
 
1534
- survey = Survey(questions = [q1, q2])
1753
+ survey = Survey(questions=[q1, q2])
1535
1754
 
1536
1755
  if models is not None:
1537
1756
  if not isinstance(models, ModelList):
1538
1757
  raise ResultsError("models must be a ModelList")
1539
1758
  results = survey.by(sl).by(models).run()
1540
1759
  else:
1541
- results = survey.by(sl).run() # use the default model
1760
+ results = survey.by(sl).run() # use the default model
1542
1761
 
1543
1762
  return results
1544
1763
 
1545
1764
 
1546
1765
  def main(): # pragma: no cover
1547
- """Call the OpenAI API credits."""
1766
+ """Run example operations on a Results object.
1767
+
1768
+ This function demonstrates basic filtering and mutation operations on
1769
+ a Results object, printing the output.
1770
+
1771
+ Examples:
1772
+ >>> # This can be run directly as a script
1773
+ >>> # python -m edsl.results.results
1774
+ >>> # It will create example results and show filtering and mutation
1775
+ """
1548
1776
  from ..results import Results
1549
1777
 
1550
1778
  results = Results.example(debug=True)
@@ -1554,4 +1782,5 @@ def main(): # pragma: no cover
1554
1782
 
1555
1783
  if __name__ == "__main__":
1556
1784
  import doctest
1785
+
1557
1786
  doctest.testmod(optionflags=doctest.ELLIPSIS)