edsl 0.1.38__py3-none-any.whl → 0.1.38.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. edsl/Base.py +34 -63
  2. edsl/BaseDiff.py +7 -7
  3. edsl/__init__.py +1 -2
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +11 -23
  6. edsl/agents/AgentList.py +23 -86
  7. edsl/agents/Invigilator.py +7 -18
  8. edsl/agents/InvigilatorBase.py +19 -0
  9. edsl/agents/PromptConstructor.py +4 -5
  10. edsl/auto/SurveyCreatorPipeline.py +1 -1
  11. edsl/auto/utilities.py +1 -1
  12. edsl/base/Base.py +13 -3
  13. edsl/config.py +0 -8
  14. edsl/conjure/AgentConstructionMixin.py +160 -0
  15. edsl/conjure/Conjure.py +62 -0
  16. edsl/conjure/InputData.py +659 -0
  17. edsl/conjure/InputDataCSV.py +48 -0
  18. edsl/conjure/InputDataMixinQuestionStats.py +182 -0
  19. edsl/conjure/InputDataPyRead.py +91 -0
  20. edsl/conjure/InputDataSPSS.py +8 -0
  21. edsl/conjure/InputDataStata.py +8 -0
  22. edsl/conjure/QuestionOptionMixin.py +76 -0
  23. edsl/conjure/QuestionTypeMixin.py +23 -0
  24. edsl/conjure/RawQuestion.py +65 -0
  25. edsl/conjure/SurveyResponses.py +7 -0
  26. edsl/conjure/__init__.py +9 -0
  27. edsl/conjure/examples/placeholder.txt +0 -0
  28. edsl/{utilities → conjure}/naming_utilities.py +1 -1
  29. edsl/conjure/utilities.py +201 -0
  30. edsl/coop/coop.py +7 -77
  31. edsl/data/Cache.py +17 -45
  32. edsl/data/CacheEntry.py +3 -8
  33. edsl/data/RemoteCacheSync.py +19 -0
  34. edsl/enums.py +0 -2
  35. edsl/exceptions/agents.py +0 -4
  36. edsl/inference_services/GoogleService.py +15 -7
  37. edsl/inference_services/registry.py +0 -2
  38. edsl/jobs/Jobs.py +559 -110
  39. edsl/jobs/buckets/TokenBucket.py +0 -3
  40. edsl/jobs/interviews/Interview.py +7 -7
  41. edsl/jobs/runners/JobsRunnerAsyncio.py +28 -156
  42. edsl/jobs/runners/JobsRunnerStatus.py +196 -194
  43. edsl/jobs/tasks/TaskHistory.py +19 -27
  44. edsl/language_models/LanguageModel.py +90 -52
  45. edsl/language_models/ModelList.py +14 -67
  46. edsl/language_models/registry.py +4 -57
  47. edsl/notebooks/Notebook.py +8 -7
  48. edsl/prompts/Prompt.py +3 -8
  49. edsl/questions/QuestionBase.py +30 -38
  50. edsl/questions/QuestionBaseGenMixin.py +1 -1
  51. edsl/questions/QuestionBasePromptsMixin.py +17 -0
  52. edsl/questions/QuestionExtract.py +4 -3
  53. edsl/questions/QuestionFunctional.py +3 -10
  54. edsl/questions/derived/QuestionTopK.py +0 -2
  55. edsl/questions/question_registry.py +6 -36
  56. edsl/results/Dataset.py +15 -146
  57. edsl/results/DatasetExportMixin.py +217 -231
  58. edsl/results/DatasetTree.py +4 -134
  59. edsl/results/Result.py +16 -31
  60. edsl/results/Results.py +65 -159
  61. edsl/scenarios/FileStore.py +13 -187
  62. edsl/scenarios/Scenario.py +18 -73
  63. edsl/scenarios/ScenarioList.py +76 -251
  64. edsl/surveys/MemoryPlan.py +1 -1
  65. edsl/surveys/Rule.py +5 -1
  66. edsl/surveys/RuleCollection.py +1 -1
  67. edsl/surveys/Survey.py +19 -25
  68. edsl/surveys/SurveyFlowVisualizationMixin.py +9 -67
  69. edsl/surveys/instructions/ChangeInstruction.py +7 -9
  70. edsl/surveys/instructions/Instruction.py +7 -21
  71. edsl/templates/error_reporting/interview_details.html +3 -3
  72. edsl/templates/error_reporting/interviews.html +9 -18
  73. edsl/utilities/utilities.py +0 -15
  74. {edsl-0.1.38.dist-info → edsl-0.1.38.dev1.dist-info}/METADATA +1 -2
  75. {edsl-0.1.38.dist-info → edsl-0.1.38.dev1.dist-info}/RECORD +77 -71
  76. edsl/exceptions/cache.py +0 -5
  77. edsl/inference_services/PerplexityService.py +0 -163
  78. edsl/jobs/JobsChecks.py +0 -147
  79. edsl/jobs/JobsPrompts.py +0 -268
  80. edsl/jobs/JobsRemoteInferenceHandler.py +0 -239
  81. edsl/results/CSSParameterizer.py +0 -108
  82. edsl/results/TableDisplay.py +0 -198
  83. edsl/results/table_display.css +0 -78
  84. edsl/scenarios/ScenarioJoin.py +0 -127
  85. {edsl-0.1.38.dist-info → edsl-0.1.38.dev1.dist-info}/LICENSE +0 -0
  86. {edsl-0.1.38.dist-info → edsl-0.1.38.dev1.dist-info}/WHEEL +0 -0
@@ -220,9 +220,6 @@ class TokenBucket:
220
220
 
221
221
  elapsed_time = now - start_time
222
222
 
223
- if elapsed_time == 0:
224
- return self.num_released / 0.001
225
-
226
223
  return (self.num_released / elapsed_time) * 60
227
224
 
228
225
  # # Filter log entries within the time window
@@ -159,7 +159,7 @@ class Interview:
159
159
  return self.task_creators.interview_status
160
160
 
161
161
  # region: Serialization
162
- def to_dict(self, include_exceptions=True, add_edsl_version=True) -> dict[str, Any]:
162
+ def _to_dict(self, include_exceptions=True) -> dict[str, Any]:
163
163
  """Return a dictionary representation of the Interview instance.
164
164
  This is just for hashing purposes.
165
165
 
@@ -168,10 +168,10 @@ class Interview:
168
168
  1217840301076717434
169
169
  """
170
170
  d = {
171
- "agent": self.agent.to_dict(add_edsl_version=add_edsl_version),
172
- "survey": self.survey.to_dict(add_edsl_version=add_edsl_version),
173
- "scenario": self.scenario.to_dict(add_edsl_version=add_edsl_version),
174
- "model": self.model.to_dict(add_edsl_version=add_edsl_version),
171
+ "agent": self.agent._to_dict(),
172
+ "survey": self.survey._to_dict(),
173
+ "scenario": self.scenario._to_dict(),
174
+ "model": self.model._to_dict(),
175
175
  "iteration": self.iteration,
176
176
  "exceptions": {},
177
177
  }
@@ -202,11 +202,11 @@ class Interview:
202
202
  def __hash__(self) -> int:
203
203
  from edsl.utilities.utilities import dict_hash
204
204
 
205
- return dict_hash(self.to_dict(include_exceptions=False, add_edsl_version=False))
205
+ return dict_hash(self._to_dict(include_exceptions=False))
206
206
 
207
207
  def __eq__(self, other: "Interview") -> bool:
208
208
  """
209
- >>> from edsl.jobs.interviews.Interview import Interview; i = Interview.example(); d = i.to_dict(); i2 = Interview.from_dict(d); i == i2
209
+ >>> from edsl.jobs.interviews.Interview import Interview; i = Interview.example(); d = i._to_dict(); i2 = Interview.from_dict(d); i == i2
210
210
  True
211
211
  """
212
212
  return hash(self) == hash(other)
@@ -2,14 +2,13 @@ from __future__ import annotations
2
2
  import time
3
3
  import asyncio
4
4
  import threading
5
- import warnings
6
- from typing import Coroutine, List, AsyncGenerator, Optional, Union, Generator, Type
7
- from uuid import UUID
5
+ from typing import Coroutine, List, AsyncGenerator, Optional, Union, Generator
6
+ from contextlib import contextmanager
8
7
  from collections import UserList
9
8
 
10
9
  from edsl.results.Results import Results
11
10
  from edsl.jobs.interviews.Interview import Interview
12
- from edsl.jobs.runners.JobsRunnerStatus import JobsRunnerStatus, JobsRunnerStatusBase
11
+ from edsl.jobs.runners.JobsRunnerStatus import JobsRunnerStatus
13
12
 
14
13
  from edsl.jobs.tasks.TaskHistory import TaskHistory
15
14
  from edsl.jobs.buckets.BucketCollection import BucketCollection
@@ -37,61 +36,11 @@ class JobsRunnerAsyncio:
37
36
  The Jobs object is a collection of interviews that are to be run.
38
37
  """
39
38
 
40
- MAX_CONCURRENT_DEFAULT = 500
41
-
42
39
  def __init__(self, jobs: "Jobs"):
43
40
  self.jobs = jobs
44
41
  self.interviews: List["Interview"] = jobs.interviews()
45
42
  self.bucket_collection: "BucketCollection" = jobs.bucket_collection
46
43
  self.total_interviews: List["Interview"] = []
47
- self._initialized = threading.Event()
48
-
49
- from edsl.config import CONFIG
50
-
51
- self.MAX_CONCURRENT = int(CONFIG.get("EDSL_MAX_CONCURRENT_TASKS"))
52
- # print(f"MAX_CONCURRENT: {self.MAX_CONCURRENT}")
53
-
54
- # async def run_async_generator(
55
- # self,
56
- # cache: Cache,
57
- # n: int = 1,
58
- # stop_on_exception: bool = False,
59
- # sidecar_model: Optional[LanguageModel] = None,
60
- # total_interviews: Optional[List["Interview"]] = None,
61
- # raise_validation_errors: bool = False,
62
- # ) -> AsyncGenerator["Result", None]:
63
- # """Creates the tasks, runs them asynchronously, and returns the results as a Results object.
64
-
65
- # Completed tasks are yielded as they are completed.
66
-
67
- # :param n: how many times to run each interview
68
- # :param stop_on_exception: Whether to stop the interview if an exception is raised
69
- # :param sidecar_model: a language model to use in addition to the interview's model
70
- # :param total_interviews: A list of interviews to run can be provided instead.
71
- # :param raise_validation_errors: Whether to raise validation errors
72
- # """
73
- # tasks = []
74
- # if total_interviews: # was already passed in total interviews
75
- # self.total_interviews = total_interviews
76
- # else:
77
- # self.total_interviews = list(
78
- # self._populate_total_interviews(n=n)
79
- # ) # Populate self.total_interviews before creating tasks
80
- # self._initialized.set() # Signal that we're ready
81
-
82
- # for interview in self.total_interviews:
83
- # interviewing_task = self._build_interview_task(
84
- # interview=interview,
85
- # stop_on_exception=stop_on_exception,
86
- # sidecar_model=sidecar_model,
87
- # raise_validation_errors=raise_validation_errors,
88
- # )
89
- # tasks.append(asyncio.create_task(interviewing_task))
90
-
91
- # for task in asyncio.as_completed(tasks):
92
- # result = await task
93
- # self.jobs_runner_status.add_completed_interview(result)
94
- # yield result
95
44
 
96
45
  async def run_async_generator(
97
46
  self,
@@ -102,10 +51,9 @@ class JobsRunnerAsyncio:
102
51
  total_interviews: Optional[List["Interview"]] = None,
103
52
  raise_validation_errors: bool = False,
104
53
  ) -> AsyncGenerator["Result", None]:
105
- """Creates and processes tasks asynchronously, yielding results as they complete.
54
+ """Creates the tasks, runs them asynchronously, and returns the results as a Results object.
106
55
 
107
- Tasks are created and processed in a streaming fashion rather than building the full list upfront.
108
- Results are yielded as soon as they are available.
56
+ Completed tasks are yielded as they are completed.
109
57
 
110
58
  :param n: how many times to run each interview
111
59
  :param stop_on_exception: Whether to stop the interview if an exception is raised
@@ -113,70 +61,27 @@ class JobsRunnerAsyncio:
113
61
  :param total_interviews: A list of interviews to run can be provided instead.
114
62
  :param raise_validation_errors: Whether to raise validation errors
115
63
  """
116
- # Initialize interviews iterator
117
- if total_interviews:
118
- interviews_iter = iter(total_interviews)
64
+ tasks = []
65
+ if total_interviews: # was already passed in total interviews
119
66
  self.total_interviews = total_interviews
120
67
  else:
121
- interviews_iter = self._populate_total_interviews(n=n)
122
- self.total_interviews = list(interviews_iter)
123
- interviews_iter = iter(self.total_interviews) # Create fresh iterator
124
-
125
- self._initialized.set() # Signal that we're ready
68
+ self.total_interviews = list(
69
+ self._populate_total_interviews(n=n)
70
+ ) # Populate self.total_interviews before creating tasks
126
71
 
127
- # Keep track of active tasks
128
- active_tasks = set()
72
+ for interview in self.total_interviews:
73
+ interviewing_task = self._build_interview_task(
74
+ interview=interview,
75
+ stop_on_exception=stop_on_exception,
76
+ sidecar_model=sidecar_model,
77
+ raise_validation_errors=raise_validation_errors,
78
+ )
79
+ tasks.append(asyncio.create_task(interviewing_task))
129
80
 
130
- try:
131
- while True:
132
- # Add new tasks if we're below max_concurrent and there are more interviews
133
- while len(active_tasks) < self.MAX_CONCURRENT:
134
- try:
135
- interview = next(interviews_iter)
136
- task = asyncio.create_task(
137
- self._build_interview_task(
138
- interview=interview,
139
- stop_on_exception=stop_on_exception,
140
- sidecar_model=sidecar_model,
141
- raise_validation_errors=raise_validation_errors,
142
- )
143
- )
144
- active_tasks.add(task)
145
- # Add callback to remove task from set when done
146
- task.add_done_callback(active_tasks.discard)
147
- except StopIteration:
148
- break
149
-
150
- if not active_tasks:
151
- break
152
-
153
- # Wait for next completed task
154
- done, _ = await asyncio.wait(
155
- active_tasks, return_when=asyncio.FIRST_COMPLETED
156
- )
157
-
158
- # Process completed tasks
159
- for task in done:
160
- try:
161
- result = await task
162
- self.jobs_runner_status.add_completed_interview(result)
163
- yield result
164
- except Exception as e:
165
- if stop_on_exception:
166
- # Cancel remaining tasks
167
- for t in active_tasks:
168
- if not t.done():
169
- t.cancel()
170
- raise
171
- else:
172
- # Log error and continue
173
- # logger.error(f"Task failed with error: {e}")
174
- continue
175
- finally:
176
- # Ensure we cancel any remaining tasks if we exit early
177
- for task in active_tasks:
178
- if not task.done():
179
- task.cancel()
81
+ for task in asyncio.as_completed(tasks):
82
+ result = await task
83
+ self.jobs_runner_status.add_completed_interview(result)
84
+ yield result
180
85
 
181
86
  def _populate_total_interviews(
182
87
  self, n: int = 1
@@ -337,25 +242,11 @@ class JobsRunnerAsyncio:
337
242
  if len(results.task_history.indices) > 5:
338
243
  msg += f"Exceptions were raised in the following interviews: {results.task_history.indices}.\n"
339
244
 
340
- import sys
341
-
342
- print(msg, file=sys.stderr)
343
- from edsl.config import CONFIG
344
-
345
- if CONFIG.get("EDSL_OPEN_EXCEPTION_REPORT_URL") == "True":
346
- open_in_browser = True
347
- elif CONFIG.get("EDSL_OPEN_EXCEPTION_REPORT_URL") == "False":
348
- open_in_browser = False
349
- else:
350
- raise Exception(
351
- "EDSL_OPEN_EXCEPTION_REPORT_URL", "must be either True or False"
352
- )
353
-
354
- # print("open_in_browser", open_in_browser)
355
-
245
+ print(msg)
246
+ # this is where exceptions are opening up
356
247
  filepath = results.task_history.html(
357
248
  cta="Open report to see details.",
358
- open_in_browser=open_in_browser,
249
+ open_in_browser=True,
359
250
  return_link=True,
360
251
  )
361
252
 
@@ -384,8 +275,6 @@ class JobsRunnerAsyncio:
384
275
  stop_on_exception: bool = False,
385
276
  progress_bar: bool = False,
386
277
  sidecar_model: Optional[LanguageModel] = None,
387
- jobs_runner_status: Optional[Type[JobsRunnerStatusBase]] = None,
388
- job_uuid: Optional[UUID] = None,
389
278
  print_exceptions: bool = True,
390
279
  raise_validation_errors: bool = False,
391
280
  ) -> "Coroutine":
@@ -397,19 +286,7 @@ class JobsRunnerAsyncio:
397
286
  self.cache = cache
398
287
  self.sidecar_model = sidecar_model
399
288
 
400
- from edsl.coop import Coop
401
-
402
- coop = Coop()
403
- endpoint_url = coop.get_progress_bar_url()
404
-
405
- if jobs_runner_status is not None:
406
- self.jobs_runner_status = jobs_runner_status(
407
- self, n=n, endpoint_url=endpoint_url, job_uuid=job_uuid
408
- )
409
- else:
410
- self.jobs_runner_status = JobsRunnerStatus(
411
- self, n=n, endpoint_url=endpoint_url, job_uuid=job_uuid
412
- )
289
+ self.jobs_runner_status = JobsRunnerStatus(self, n=n)
413
290
 
414
291
  stop_event = threading.Event()
415
292
 
@@ -429,16 +306,11 @@ class JobsRunnerAsyncio:
429
306
  """Runs the progress bar in a separate thread."""
430
307
  self.jobs_runner_status.update_progress(stop_event)
431
308
 
432
- if progress_bar and self.jobs_runner_status.has_ep_api_key():
433
- self.jobs_runner_status.setup()
309
+ if progress_bar:
434
310
  progress_thread = threading.Thread(
435
311
  target=run_progress_bar, args=(stop_event,)
436
312
  )
437
313
  progress_thread.start()
438
- elif progress_bar:
439
- warnings.warn(
440
- "You need an Expected Parrot API key to view job progress bars."
441
- )
442
314
 
443
315
  exception_to_raise = None
444
316
  try:
@@ -453,7 +325,7 @@ class JobsRunnerAsyncio:
453
325
  stop_event.set()
454
326
  finally:
455
327
  stop_event.set()
456
- if progress_bar and self.jobs_runner_status.has_ep_api_key():
328
+ if progress_bar:
457
329
  # self.jobs_runner_status.stop_event.set()
458
330
  if progress_thread:
459
331
  progress_thread.join()