edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. edsl/__init__.py +8 -1
  2. edsl/__init__original.py +134 -0
  3. edsl/__version__.py +1 -1
  4. edsl/agents/agent.py +29 -0
  5. edsl/agents/agent_list.py +36 -1
  6. edsl/base/base_class.py +281 -151
  7. edsl/base/data_transfer_models.py +15 -4
  8. edsl/buckets/__init__.py +8 -3
  9. edsl/buckets/bucket_collection.py +9 -3
  10. edsl/buckets/model_buckets.py +4 -2
  11. edsl/buckets/token_bucket.py +2 -2
  12. edsl/buckets/token_bucket_client.py +5 -3
  13. edsl/caching/cache.py +131 -62
  14. edsl/caching/cache_entry.py +70 -58
  15. edsl/caching/sql_dict.py +17 -0
  16. edsl/cli.py +99 -0
  17. edsl/config/config_class.py +16 -0
  18. edsl/conversation/__init__.py +31 -0
  19. edsl/coop/coop.py +276 -242
  20. edsl/coop/coop_jobs_objects.py +59 -0
  21. edsl/coop/coop_objects.py +29 -0
  22. edsl/coop/coop_regular_objects.py +26 -0
  23. edsl/coop/utils.py +24 -19
  24. edsl/dataset/dataset.py +338 -101
  25. edsl/dataset/dataset_operations_mixin.py +216 -180
  26. edsl/db_list/sqlite_list.py +349 -0
  27. edsl/inference_services/__init__.py +40 -5
  28. edsl/inference_services/exceptions.py +11 -0
  29. edsl/inference_services/services/anthropic_service.py +5 -2
  30. edsl/inference_services/services/aws_bedrock.py +6 -2
  31. edsl/inference_services/services/azure_ai.py +6 -2
  32. edsl/inference_services/services/google_service.py +7 -3
  33. edsl/inference_services/services/mistral_ai_service.py +6 -2
  34. edsl/inference_services/services/open_ai_service.py +6 -2
  35. edsl/inference_services/services/perplexity_service.py +6 -2
  36. edsl/inference_services/services/test_service.py +94 -5
  37. edsl/interviews/answering_function.py +167 -59
  38. edsl/interviews/interview.py +124 -72
  39. edsl/interviews/interview_task_manager.py +10 -0
  40. edsl/interviews/request_token_estimator.py +8 -0
  41. edsl/invigilators/invigilators.py +35 -13
  42. edsl/jobs/async_interview_runner.py +146 -104
  43. edsl/jobs/data_structures.py +6 -4
  44. edsl/jobs/decorators.py +61 -0
  45. edsl/jobs/fetch_invigilator.py +61 -18
  46. edsl/jobs/html_table_job_logger.py +14 -2
  47. edsl/jobs/jobs.py +180 -104
  48. edsl/jobs/jobs_component_constructor.py +2 -2
  49. edsl/jobs/jobs_interview_constructor.py +2 -0
  50. edsl/jobs/jobs_pricing_estimation.py +154 -113
  51. edsl/jobs/jobs_remote_inference_logger.py +4 -0
  52. edsl/jobs/jobs_runner_status.py +30 -25
  53. edsl/jobs/progress_bar_manager.py +79 -0
  54. edsl/jobs/remote_inference.py +35 -1
  55. edsl/key_management/key_lookup_builder.py +6 -1
  56. edsl/language_models/language_model.py +110 -12
  57. edsl/language_models/model.py +10 -3
  58. edsl/language_models/price_manager.py +176 -71
  59. edsl/language_models/registry.py +5 -0
  60. edsl/notebooks/notebook.py +77 -10
  61. edsl/questions/VALIDATION_README.md +134 -0
  62. edsl/questions/__init__.py +24 -1
  63. edsl/questions/exceptions.py +21 -0
  64. edsl/questions/question_dict.py +201 -16
  65. edsl/questions/question_multiple_choice_with_other.py +624 -0
  66. edsl/questions/question_registry.py +2 -1
  67. edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
  68. edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
  69. edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
  70. edsl/questions/validation_analysis.py +185 -0
  71. edsl/questions/validation_cli.py +131 -0
  72. edsl/questions/validation_html_report.py +404 -0
  73. edsl/questions/validation_logger.py +136 -0
  74. edsl/results/result.py +115 -46
  75. edsl/results/results.py +702 -171
  76. edsl/scenarios/construct_download_link.py +16 -3
  77. edsl/scenarios/directory_scanner.py +226 -226
  78. edsl/scenarios/file_methods.py +5 -0
  79. edsl/scenarios/file_store.py +150 -9
  80. edsl/scenarios/handlers/__init__.py +5 -1
  81. edsl/scenarios/handlers/mp4_file_store.py +104 -0
  82. edsl/scenarios/handlers/webm_file_store.py +104 -0
  83. edsl/scenarios/scenario.py +120 -101
  84. edsl/scenarios/scenario_list.py +800 -727
  85. edsl/scenarios/scenario_list_gc_test.py +146 -0
  86. edsl/scenarios/scenario_list_memory_test.py +214 -0
  87. edsl/scenarios/scenario_list_source_refactor.md +35 -0
  88. edsl/scenarios/scenario_selector.py +5 -4
  89. edsl/scenarios/scenario_source.py +1990 -0
  90. edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
  91. edsl/surveys/survey.py +22 -0
  92. edsl/tasks/__init__.py +4 -2
  93. edsl/tasks/task_history.py +198 -36
  94. edsl/tests/scenarios/test_ScenarioSource.py +51 -0
  95. edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
  96. edsl/utilities/__init__.py +2 -1
  97. edsl/utilities/decorators.py +121 -0
  98. edsl/utilities/memory_debugger.py +1010 -0
  99. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
  100. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
  101. edsl/jobs/jobs_runner_asyncio.py +0 -281
  102. edsl/language_models/unused/fake_openai_service.py +0 -60
  103. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
  104. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
  105. {edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0
@@ -4,11 +4,14 @@ import random
4
4
 
5
5
  from ..inference_service_abc import InferenceServiceABC
6
6
 
7
- from ...language_models import LanguageModel
8
7
  from ...enums import InferenceServiceType
9
8
 
9
+ # Use TYPE_CHECKING to avoid circular imports at runtime
10
10
  if TYPE_CHECKING:
11
- from ....scenarios.file_store import FileStore as File
11
+ from ...language_models import LanguageModel
12
+
13
+ if TYPE_CHECKING:
14
+ from ...scenarios.file_store import FileStore as File
12
15
 
13
16
 
14
17
  class TestService(InferenceServiceABC):
@@ -36,9 +39,12 @@ class TestService(InferenceServiceABC):
36
39
  return ["test"]
37
40
 
38
41
  @classmethod
39
- def create_model(cls, model_name, model_class_name=None) -> LanguageModel:
42
+ def create_model(cls, model_name, model_class_name=None) -> "LanguageModel":
40
43
  # Removed unused variable
41
44
 
45
+ # Import LanguageModel only when actually creating a model
46
+ from ...language_models import LanguageModel
47
+
42
48
  class TestServiceLanguageModel(LanguageModel):
43
49
  _model_ = "test"
44
50
  _parameters_ = {"temperature": 0.5}
@@ -74,9 +80,9 @@ class TestService(InferenceServiceABC):
74
80
  p = 1
75
81
 
76
82
  if random.random() < p:
77
- from ..exceptions import InferenceServiceError
83
+ from ..exceptions import InferenceServiceIntendedError
78
84
 
79
- raise InferenceServiceError("This is a test error")
85
+ raise InferenceServiceIntendedError("This is a test error")
80
86
 
81
87
  if hasattr(self, "func"):
82
88
  return {
@@ -99,4 +105,87 @@ class TestService(InferenceServiceABC):
99
105
  "usage": {"prompt_tokens": 1, "completion_tokens": 1},
100
106
  }
101
107
 
108
+ def set_canned_response(self, survey: "Survey") -> None:
109
+ from edsl import Model
110
+ from edsl.questions import (
111
+ QuestionMultipleChoice,
112
+ QuestionCheckBox,
113
+ QuestionLinearScale,
114
+ QuestionList,
115
+ QuestionDict,
116
+ QuestionNumerical,
117
+ QuestionFreeText,
118
+ )
119
+
120
+ canned_response = {}
121
+
122
+ for q in survey.questions:
123
+ name = q.question_name
124
+
125
+ if isinstance(q, QuestionMultipleChoice):
126
+ # Return first option
127
+ canned_response[name] = q.question_options[0]
128
+
129
+ elif isinstance(q, QuestionCheckBox):
130
+ # Return first two options as a list
131
+ canned_response[name] = q.question_options[:2]
132
+
133
+ elif isinstance(q, QuestionLinearScale):
134
+ # Return middle of the scale
135
+ values = q.question_options
136
+ if isinstance(values, list) and all(
137
+ isinstance(i, int) for i in values
138
+ ):
139
+ mid = values[len(values) // 2]
140
+ canned_response[name] = mid
141
+ else:
142
+ canned_response[name] = 5 # default fallback
143
+
144
+ elif isinstance(q, QuestionNumerical):
145
+ # Return a fixed float value
146
+ canned_response[name] = 42.0
147
+
148
+ elif isinstance(q, QuestionList):
149
+ # Return a list of simple strings
150
+ canned_response[name] = [f"{name} item 1", f"{name} item 2"]
151
+
152
+ elif isinstance(q, QuestionDict):
153
+ # Handle response types for each key
154
+ keys = getattr(q, "answer_keys", ["field1", "field2"])
155
+ value_types = getattr(q, "value_types", [])
156
+ canned_response[name] = {}
157
+
158
+ for i, key in enumerate(keys):
159
+ # Check the type for each key and generate the appropriate response
160
+ response_type = (
161
+ value_types[i] if i < len(value_types) else "string"
162
+ ) # Default to "string" if not provided
163
+
164
+ if "str" in response_type:
165
+ canned_response[name][key] = f"{key} value"
166
+ elif "int" in response_type:
167
+ canned_response[name][
168
+ key
169
+ ] = 42 # Example integer response
170
+ elif "float" in response_type:
171
+ canned_response[name][
172
+ key
173
+ ] = 42.0 # Example float response
174
+ elif "bool" in response_type:
175
+ canned_response[name][
176
+ key
177
+ ] = True # Example boolean response
178
+ else:
179
+ canned_response[name][key] = f"{key} unknown type"
180
+
181
+ elif isinstance(q, QuestionFreeText):
182
+ # Return a string
183
+ canned_response[name] = f"This is a canned answer for {name}."
184
+
185
+ else:
186
+ # Fallback: simple string
187
+ canned_response[name] = f"Canned fallback for {name}"
188
+
189
+ self.canned_response = canned_response
190
+
102
191
  return TestServiceLanguageModel
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import copy
3
+ import weakref
3
4
  from typing import TYPE_CHECKING, Any, Callable, Union
4
5
 
5
6
  if TYPE_CHECKING:
@@ -27,21 +28,73 @@ class RetryConfig:
27
28
 
28
29
  class SkipHandler:
29
30
  def __init__(self, interview: "Interview"):
30
- self.interview = interview
31
- self.question_index = self.interview.to_index
31
+ # Store a weak reference to the interview
32
+ self._interview_ref = weakref.ref(interview)
32
33
 
33
- self.skip_function: Callable = (
34
- self.interview.survey.rule_collection.skip_question_before_running
35
- )
34
+ # Cache only the skip function which doesn't maintain a reference to the interview
35
+ try:
36
+ self.skip_function: Callable = (
37
+ interview.survey.rule_collection.skip_question_before_running
38
+ )
39
+ except (AttributeError, KeyError):
40
+ # Fallback for test environments
41
+ self.skip_function = lambda *args: False
42
+
43
+ @property
44
+ def interview(self):
45
+ """Access the interview via weak reference if it still exists."""
46
+ interview = self._interview_ref()
47
+ if interview is None:
48
+ raise RuntimeError("Interview has been garbage collected")
49
+ return interview
50
+
51
+ @property
52
+ def _to_index(self):
53
+ return self.interview.to_index
54
+
55
+ @property
56
+ def _survey(self):
57
+ return self.interview.survey
58
+
59
+ @property
60
+ def _answers(self):
61
+ return self.interview.answers
62
+
63
+ @property
64
+ def _scenario(self):
65
+ return self.interview.scenario
66
+
67
+ @property
68
+ def _agent_traits(self):
69
+ try:
70
+ return self.interview.agent["traits"]
71
+ except (AttributeError, KeyError):
72
+ return {}
73
+
74
+ @property
75
+ def _skip_flags(self):
76
+ return self.interview.skip_flags
36
77
 
37
78
  def should_skip(self, current_question: "QuestionBase") -> bool:
38
79
  """Determine if the current question should be skipped."""
39
- current_question_index = self.question_index[current_question.question_name]
40
- combined_answers = (
41
- self.interview.answers
42
- | self.interview.scenario
43
- | self.interview.agent["traits"]
44
- )
80
+ current_question_index = self._to_index[current_question.question_name]
81
+
82
+ # Handle ScenarioList case - convert to dict first
83
+ scenario_dict = {}
84
+ if hasattr(self._scenario, "items"):
85
+ # Handle standard dict scenario
86
+ scenario_dict = self._scenario
87
+ else:
88
+ # Handle ScenarioList or other scenario object
89
+ # Access as a dict if possible, otherwise try to convert
90
+ scenario_dict = (
91
+ dict(self._scenario) if hasattr(self._scenario, "__iter__") else {}
92
+ )
93
+
94
+ combined_answers = dict(self._answers)
95
+ combined_answers.update(scenario_dict)
96
+ combined_answers.update(self._agent_traits)
97
+
45
98
  return self.skip_function(current_question_index, combined_answers)
46
99
 
47
100
  def _current_info_env(self) -> dict[str, Any]:
@@ -52,7 +105,7 @@ class SkipHandler:
52
105
  """
53
106
  # Process answers dictionary
54
107
  processed_answers = {}
55
- for key, value in self.interview.answers.items():
108
+ for key, value in self._answers.items():
56
109
  if key.endswith("_generated_tokens"):
57
110
  base_name = key.replace("_generated_tokens", "")
58
111
  processed_answers[f"{base_name}.generated_tokens"] = value
@@ -64,33 +117,22 @@ class SkipHandler:
64
117
  processed_answers[f"{key}.answer"] = value
65
118
 
66
119
  # Process scenario dictionary
67
- processed_scenario = {
68
- f"scenario.{k}": v for k, v in self.interview.scenario.items()
69
- }
120
+ processed_scenario = {f"scenario.{k}": v for k, v in self._scenario.items()}
70
121
 
71
122
  # Process agent traits
72
- processed_agent = {
73
- f"agent.{k}": v for k, v in self.interview.agent["traits"].items()
74
- }
123
+ processed_agent = {f"agent.{k}": v for k, v in self._agent_traits.items()}
75
124
 
76
125
  return processed_answers | processed_scenario | processed_agent
77
126
 
78
127
  def cancel_skipped_questions(self, current_question: "QuestionBase") -> None:
79
128
  """Cancel the tasks for questions that should be skipped."""
80
- current_question_index: int = self.interview.to_index[
81
- current_question.question_name
82
- ]
129
+ current_question_index: int = self._to_index[current_question.question_name]
83
130
  answers = self._current_info_env()
84
- # answers = (
85
- # self.interview.answers
86
- # | self.interview.scenario
87
- # | self.interview.agent["traits"]
88
- # )
89
131
 
90
132
  # Get the index of the next question, which could also be the end of the survey
91
133
  next_question: Union[
92
134
  int, EndOfSurvey
93
- ] = self.interview.survey.rule_collection.next_question(
135
+ ] = self._survey.rule_collection.next_question(
94
136
  q_now=current_question_index,
95
137
  answers=answers,
96
138
  )
@@ -101,14 +143,15 @@ class SkipHandler:
101
143
  # print(f"Cancelling task {i}")
102
144
  # self.interview.tasks[i].cancel()
103
145
  # self.interview.tasks[i].set_result("skipped")
104
- self.interview.skip_flags[
105
- self.interview.survey.questions[i].question_name
106
- ] = True
146
+ interview = self._interview_ref()
147
+ if interview is not None:
148
+ interview.skip_flags[self._survey.questions[i].question_name] = True
149
+ else:
150
+ # If interview is gone, there's nothing to skip anymore
151
+ return
107
152
 
108
153
  if (next_question_index := next_question.next_q) == EndOfSurvey:
109
- cancel_between(
110
- current_question_index + 1, len(self.interview.survey.questions)
111
- )
154
+ cancel_between(current_question_index + 1, len(self._survey.questions))
112
155
  return
113
156
 
114
157
  if next_question_index > (current_question_index + 1):
@@ -119,48 +162,90 @@ class AnswerQuestionFunctionConstructor:
119
162
  """Constructs a function that answers a question and records the answer."""
120
163
 
121
164
  def __init__(self, interview: "Interview", key_lookup: "KeyLookup"):
122
- self.interview = interview
165
+ # Store a weak reference to the interview
166
+ self._interview_ref = weakref.ref(interview)
123
167
  self.key_lookup = key_lookup
124
168
 
125
- self.had_language_model_no_response_error: bool = False
126
- self.question_index = self.interview.to_index
127
-
128
- self.skip_function: Callable = (
129
- self.interview.survey.rule_collection.skip_question_before_running
169
+ # Store configuration settings that won't change during lifecycle
170
+ self._raise_validation_errors = getattr(
171
+ interview, "raise_validation_errors", False
130
172
  )
173
+ self._stop_on_exception = getattr(interview, "stop_on_exception", False)
131
174
 
175
+ self.had_language_model_no_response_error: bool = False
176
+
177
+ # Initialize fetch invigilator with the interview - this should use weakref internally
132
178
  self.invigilator_fetcher = FetchInvigilator(
133
- self.interview, key_lookup=self.key_lookup
179
+ interview, key_lookup=self.key_lookup
134
180
  )
135
- self.skip_handler = SkipHandler(self.interview)
181
+
182
+ # In our test environment, we might not be able to create the SkipHandler
183
+ # because example Interview might not have all required attributes
184
+ # So we'll initialize it conditionally
185
+ if hasattr(interview, "skip_flags"):
186
+ self.skip_handler = SkipHandler(interview)
187
+ else:
188
+ self.skip_handler = None
189
+
190
+ @property
191
+ def interview(self):
192
+ """Access the interview via weak reference if it still exists."""
193
+ interview = self._interview_ref()
194
+ if interview is None:
195
+ raise RuntimeError("Interview has been garbage collected")
196
+ return interview
197
+
198
+ @property
199
+ def _answers(self):
200
+ return self.interview.answers
201
+
202
+ @property
203
+ def _exceptions(self):
204
+ return self.interview.exceptions
205
+
206
+ @property
207
+ def _to_index(self):
208
+ return self.interview.to_index
209
+
210
+ @property
211
+ def _skip_flags(self):
212
+ if hasattr(self.interview, "skip_flags"):
213
+ return self.interview.skip_flags
214
+ return {}
136
215
 
137
216
  def _handle_exception(
138
217
  self, e: Exception, invigilator: "InvigilatorBase", task=None
139
218
  ):
140
219
  """Handle an exception that occurred while answering a question."""
220
+ interview = self._interview_ref()
221
+ if interview is None:
222
+ # If interview is gone, we can't really handle the exception properly
223
+ # Just raise it to the caller
224
+ raise e
225
+
226
+ # Copy to freeze the answers here for logging
227
+ answers = copy.copy(self._answers)
141
228
 
142
- answers = copy.copy(
143
- self.interview.answers
144
- ) # copy to freeze the answers here for logging
145
229
  exception_entry = InterviewExceptionEntry(
146
230
  exception=e,
147
231
  invigilator=invigilator,
148
232
  answers=answers,
149
233
  )
234
+
150
235
  if task:
151
236
  task.task_status = TaskStatus.FAILED
152
237
 
153
- self.interview.exceptions.add(
154
- invigilator.question.question_name, exception_entry
155
- )
238
+ # Add to exceptions - need to use the interview reference here
239
+ interview.exceptions.add(invigilator.question.question_name, exception_entry)
156
240
 
157
- if self.interview.raise_validation_errors and isinstance(
241
+ # Check if we should raise validation errors
242
+ if self._raise_validation_errors and isinstance(
158
243
  e, QuestionAnswerValidationError
159
244
  ):
160
245
  raise e
161
246
 
162
- stop_on_exception = getattr(self.interview, "stop_on_exception", False)
163
- if stop_on_exception:
247
+ # Check if we should stop on exception
248
+ if self._stop_on_exception:
164
249
  raise e
165
250
 
166
251
  def __call__(self):
@@ -190,27 +275,46 @@ class AnswerQuestionFunctionConstructor:
190
275
  reraise=True,
191
276
  )
192
277
  async def attempt_answer():
278
+ # Get a reference to the interview (may be None if it's been garbage collected)
279
+ interview = self._interview_ref()
280
+
281
+ # Get the invigilator for this question
193
282
  invigilator = self.invigilator_fetcher(question)
194
283
 
195
- if self.interview.skip_flags.get(question.question_name, False):
284
+ # Check if interview still exists
285
+ if interview is None:
286
+ # If interview is gone, we can't really process this question
287
+ # Return a failure result
288
+ return invigilator.get_failed_task_result(
289
+ failure_reason="Interview has been garbage collected."
290
+ )
291
+
292
+ # Check if question should be skipped - use cached skip_flags if available
293
+ skip_flags = getattr(self, "_skip_flags", None) or interview.skip_flags
294
+ if skip_flags.get(question.question_name, False):
196
295
  return invigilator.get_failed_task_result(
197
296
  failure_reason="Question skipped."
198
297
  )
199
- if self.skip_handler.should_skip(question):
298
+
299
+ if self.skip_handler and self.skip_handler.should_skip(question):
200
300
  return invigilator.get_failed_task_result(
201
301
  failure_reason="Question skipped."
202
302
  )
203
303
 
304
+ had_language_model_no_response_error = False
204
305
  try:
205
306
  response: EDSLResultObjectInput = (
206
307
  await invigilator.async_answer_question()
207
308
  )
208
309
  if response.validated:
209
- self.interview.answers.add_answer(
210
- response=response, question=question
211
- )
212
-
213
- self.skip_handler.cancel_skipped_questions(question)
310
+ # Re-check if interview exists before updating it
311
+ interview = self._interview_ref()
312
+ if interview is not None:
313
+ interview.answers.add_answer(
314
+ response=response, question=question
315
+ )
316
+ if self.skip_handler:
317
+ self.skip_handler.cancel_skipped_questions(question)
214
318
  else:
215
319
  if (
216
320
  hasattr(response, "exception_occurred")
@@ -239,11 +343,15 @@ class AnswerQuestionFunctionConstructor:
239
343
  raise LanguageModelNoResponseError(
240
344
  f"Language model did not return a response for question '{question.question_name}.'"
241
345
  )
346
+
347
+ # Re-check if interview exists before accessing exceptions
348
+ interview = self._interview_ref()
242
349
  if (
243
- question.question_name in self.interview.exceptions
350
+ interview is not None
351
+ and question.question_name in interview.exceptions
244
352
  and had_language_model_no_response_error
245
353
  ):
246
- self.interview.exceptions.record_fixed_question(question.question_name)
354
+ interview.exceptions.record_fixed_question(question.question_name)
247
355
 
248
356
  return response
249
357