edsl 0.1.37.dev5__py3-none-any.whl → 0.1.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. edsl/Base.py +63 -34
  2. edsl/BaseDiff.py +7 -7
  3. edsl/__init__.py +2 -1
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +23 -11
  6. edsl/agents/AgentList.py +86 -23
  7. edsl/agents/Invigilator.py +18 -7
  8. edsl/agents/InvigilatorBase.py +0 -19
  9. edsl/agents/PromptConstructor.py +5 -4
  10. edsl/auto/SurveyCreatorPipeline.py +1 -1
  11. edsl/auto/utilities.py +1 -1
  12. edsl/base/Base.py +3 -13
  13. edsl/config.py +8 -0
  14. edsl/coop/coop.py +89 -19
  15. edsl/data/Cache.py +45 -17
  16. edsl/data/CacheEntry.py +8 -3
  17. edsl/data/RemoteCacheSync.py +0 -19
  18. edsl/enums.py +2 -0
  19. edsl/exceptions/agents.py +4 -0
  20. edsl/exceptions/cache.py +5 -0
  21. edsl/inference_services/GoogleService.py +7 -15
  22. edsl/inference_services/PerplexityService.py +163 -0
  23. edsl/inference_services/registry.py +2 -0
  24. edsl/jobs/Jobs.py +110 -559
  25. edsl/jobs/JobsChecks.py +147 -0
  26. edsl/jobs/JobsPrompts.py +268 -0
  27. edsl/jobs/JobsRemoteInferenceHandler.py +239 -0
  28. edsl/jobs/buckets/TokenBucket.py +3 -0
  29. edsl/jobs/interviews/Interview.py +7 -7
  30. edsl/jobs/runners/JobsRunnerAsyncio.py +156 -28
  31. edsl/jobs/runners/JobsRunnerStatus.py +194 -196
  32. edsl/jobs/tasks/TaskHistory.py +27 -19
  33. edsl/language_models/LanguageModel.py +52 -90
  34. edsl/language_models/ModelList.py +67 -14
  35. edsl/language_models/registry.py +57 -4
  36. edsl/notebooks/Notebook.py +7 -8
  37. edsl/prompts/Prompt.py +8 -3
  38. edsl/questions/QuestionBase.py +38 -30
  39. edsl/questions/QuestionBaseGenMixin.py +1 -1
  40. edsl/questions/QuestionBasePromptsMixin.py +0 -17
  41. edsl/questions/QuestionExtract.py +3 -4
  42. edsl/questions/QuestionFunctional.py +10 -3
  43. edsl/questions/derived/QuestionTopK.py +2 -0
  44. edsl/questions/question_registry.py +36 -6
  45. edsl/results/CSSParameterizer.py +108 -0
  46. edsl/results/Dataset.py +146 -15
  47. edsl/results/DatasetExportMixin.py +231 -217
  48. edsl/results/DatasetTree.py +134 -4
  49. edsl/results/Result.py +31 -16
  50. edsl/results/Results.py +159 -65
  51. edsl/results/TableDisplay.py +198 -0
  52. edsl/results/table_display.css +78 -0
  53. edsl/scenarios/FileStore.py +187 -13
  54. edsl/scenarios/Scenario.py +73 -18
  55. edsl/scenarios/ScenarioJoin.py +127 -0
  56. edsl/scenarios/ScenarioList.py +251 -76
  57. edsl/surveys/MemoryPlan.py +1 -1
  58. edsl/surveys/Rule.py +1 -5
  59. edsl/surveys/RuleCollection.py +1 -1
  60. edsl/surveys/Survey.py +25 -19
  61. edsl/surveys/SurveyFlowVisualizationMixin.py +67 -9
  62. edsl/surveys/instructions/ChangeInstruction.py +9 -7
  63. edsl/surveys/instructions/Instruction.py +21 -7
  64. edsl/templates/error_reporting/interview_details.html +3 -3
  65. edsl/templates/error_reporting/interviews.html +18 -9
  66. edsl/{conjure → utilities}/naming_utilities.py +1 -1
  67. edsl/utilities/utilities.py +15 -0
  68. {edsl-0.1.37.dev5.dist-info → edsl-0.1.38.dist-info}/METADATA +2 -1
  69. {edsl-0.1.37.dev5.dist-info → edsl-0.1.38.dist-info}/RECORD +71 -77
  70. edsl/conjure/AgentConstructionMixin.py +0 -160
  71. edsl/conjure/Conjure.py +0 -62
  72. edsl/conjure/InputData.py +0 -659
  73. edsl/conjure/InputDataCSV.py +0 -48
  74. edsl/conjure/InputDataMixinQuestionStats.py +0 -182
  75. edsl/conjure/InputDataPyRead.py +0 -91
  76. edsl/conjure/InputDataSPSS.py +0 -8
  77. edsl/conjure/InputDataStata.py +0 -8
  78. edsl/conjure/QuestionOptionMixin.py +0 -76
  79. edsl/conjure/QuestionTypeMixin.py +0 -23
  80. edsl/conjure/RawQuestion.py +0 -65
  81. edsl/conjure/SurveyResponses.py +0 -7
  82. edsl/conjure/__init__.py +0 -9
  83. edsl/conjure/examples/placeholder.txt +0 -0
  84. edsl/conjure/utilities.py +0 -201
  85. {edsl-0.1.37.dev5.dist-info → edsl-0.1.38.dist-info}/LICENSE +0 -0
  86. {edsl-0.1.37.dev5.dist-info → edsl-0.1.38.dist-info}/WHEEL +0 -0
@@ -159,7 +159,7 @@ class Interview:
159
159
  return self.task_creators.interview_status
160
160
 
161
161
  # region: Serialization
162
- def _to_dict(self, include_exceptions=True) -> dict[str, Any]:
162
+ def to_dict(self, include_exceptions=True, add_edsl_version=True) -> dict[str, Any]:
163
163
  """Return a dictionary representation of the Interview instance.
164
164
  This is just for hashing purposes.
165
165
 
@@ -168,10 +168,10 @@ class Interview:
168
168
  1217840301076717434
169
169
  """
170
170
  d = {
171
- "agent": self.agent._to_dict(),
172
- "survey": self.survey._to_dict(),
173
- "scenario": self.scenario._to_dict(),
174
- "model": self.model._to_dict(),
171
+ "agent": self.agent.to_dict(add_edsl_version=add_edsl_version),
172
+ "survey": self.survey.to_dict(add_edsl_version=add_edsl_version),
173
+ "scenario": self.scenario.to_dict(add_edsl_version=add_edsl_version),
174
+ "model": self.model.to_dict(add_edsl_version=add_edsl_version),
175
175
  "iteration": self.iteration,
176
176
  "exceptions": {},
177
177
  }
@@ -202,11 +202,11 @@ class Interview:
202
202
  def __hash__(self) -> int:
203
203
  from edsl.utilities.utilities import dict_hash
204
204
 
205
- return dict_hash(self._to_dict(include_exceptions=False))
205
+ return dict_hash(self.to_dict(include_exceptions=False, add_edsl_version=False))
206
206
 
207
207
  def __eq__(self, other: "Interview") -> bool:
208
208
  """
209
- >>> from edsl.jobs.interviews.Interview import Interview; i = Interview.example(); d = i._to_dict(); i2 = Interview.from_dict(d); i == i2
209
+ >>> from edsl.jobs.interviews.Interview import Interview; i = Interview.example(); d = i.to_dict(); i2 = Interview.from_dict(d); i == i2
210
210
  True
211
211
  """
212
212
  return hash(self) == hash(other)
@@ -2,13 +2,14 @@ from __future__ import annotations
2
2
  import time
3
3
  import asyncio
4
4
  import threading
5
- from typing import Coroutine, List, AsyncGenerator, Optional, Union, Generator
6
- from contextlib import contextmanager
5
+ import warnings
6
+ from typing import Coroutine, List, AsyncGenerator, Optional, Union, Generator, Type
7
+ from uuid import UUID
7
8
  from collections import UserList
8
9
 
9
10
  from edsl.results.Results import Results
10
11
  from edsl.jobs.interviews.Interview import Interview
11
- from edsl.jobs.runners.JobsRunnerStatus import JobsRunnerStatus
12
+ from edsl.jobs.runners.JobsRunnerStatus import JobsRunnerStatus, JobsRunnerStatusBase
12
13
 
13
14
  from edsl.jobs.tasks.TaskHistory import TaskHistory
14
15
  from edsl.jobs.buckets.BucketCollection import BucketCollection
@@ -36,11 +37,61 @@ class JobsRunnerAsyncio:
36
37
  The Jobs object is a collection of interviews that are to be run.
37
38
  """
38
39
 
40
+ MAX_CONCURRENT_DEFAULT = 500
41
+
39
42
  def __init__(self, jobs: "Jobs"):
40
43
  self.jobs = jobs
41
44
  self.interviews: List["Interview"] = jobs.interviews()
42
45
  self.bucket_collection: "BucketCollection" = jobs.bucket_collection
43
46
  self.total_interviews: List["Interview"] = []
47
+ self._initialized = threading.Event()
48
+
49
+ from edsl.config import CONFIG
50
+
51
+ self.MAX_CONCURRENT = int(CONFIG.get("EDSL_MAX_CONCURRENT_TASKS"))
52
+ # print(f"MAX_CONCURRENT: {self.MAX_CONCURRENT}")
53
+
54
+ # async def run_async_generator(
55
+ # self,
56
+ # cache: Cache,
57
+ # n: int = 1,
58
+ # stop_on_exception: bool = False,
59
+ # sidecar_model: Optional[LanguageModel] = None,
60
+ # total_interviews: Optional[List["Interview"]] = None,
61
+ # raise_validation_errors: bool = False,
62
+ # ) -> AsyncGenerator["Result", None]:
63
+ # """Creates the tasks, runs them asynchronously, and returns the results as a Results object.
64
+
65
+ # Completed tasks are yielded as they are completed.
66
+
67
+ # :param n: how many times to run each interview
68
+ # :param stop_on_exception: Whether to stop the interview if an exception is raised
69
+ # :param sidecar_model: a language model to use in addition to the interview's model
70
+ # :param total_interviews: A list of interviews to run can be provided instead.
71
+ # :param raise_validation_errors: Whether to raise validation errors
72
+ # """
73
+ # tasks = []
74
+ # if total_interviews: # was already passed in total interviews
75
+ # self.total_interviews = total_interviews
76
+ # else:
77
+ # self.total_interviews = list(
78
+ # self._populate_total_interviews(n=n)
79
+ # ) # Populate self.total_interviews before creating tasks
80
+ # self._initialized.set() # Signal that we're ready
81
+
82
+ # for interview in self.total_interviews:
83
+ # interviewing_task = self._build_interview_task(
84
+ # interview=interview,
85
+ # stop_on_exception=stop_on_exception,
86
+ # sidecar_model=sidecar_model,
87
+ # raise_validation_errors=raise_validation_errors,
88
+ # )
89
+ # tasks.append(asyncio.create_task(interviewing_task))
90
+
91
+ # for task in asyncio.as_completed(tasks):
92
+ # result = await task
93
+ # self.jobs_runner_status.add_completed_interview(result)
94
+ # yield result
44
95
 
45
96
  async def run_async_generator(
46
97
  self,
@@ -51,9 +102,10 @@ class JobsRunnerAsyncio:
51
102
  total_interviews: Optional[List["Interview"]] = None,
52
103
  raise_validation_errors: bool = False,
53
104
  ) -> AsyncGenerator["Result", None]:
54
- """Creates the tasks, runs them asynchronously, and returns the results as a Results object.
105
+ """Creates and processes tasks asynchronously, yielding results as they complete.
55
106
 
56
- Completed tasks are yielded as they are completed.
107
+ Tasks are created and processed in a streaming fashion rather than building the full list upfront.
108
+ Results are yielded as soon as they are available.
57
109
 
58
110
  :param n: how many times to run each interview
59
111
  :param stop_on_exception: Whether to stop the interview if an exception is raised
@@ -61,27 +113,70 @@ class JobsRunnerAsyncio:
61
113
  :param total_interviews: A list of interviews to run can be provided instead.
62
114
  :param raise_validation_errors: Whether to raise validation errors
63
115
  """
64
- tasks = []
65
- if total_interviews: # was already passed in total interviews
116
+ # Initialize interviews iterator
117
+ if total_interviews:
118
+ interviews_iter = iter(total_interviews)
66
119
  self.total_interviews = total_interviews
67
120
  else:
68
- self.total_interviews = list(
69
- self._populate_total_interviews(n=n)
70
- ) # Populate self.total_interviews before creating tasks
121
+ interviews_iter = self._populate_total_interviews(n=n)
122
+ self.total_interviews = list(interviews_iter)
123
+ interviews_iter = iter(self.total_interviews) # Create fresh iterator
71
124
 
72
- for interview in self.total_interviews:
73
- interviewing_task = self._build_interview_task(
74
- interview=interview,
75
- stop_on_exception=stop_on_exception,
76
- sidecar_model=sidecar_model,
77
- raise_validation_errors=raise_validation_errors,
78
- )
79
- tasks.append(asyncio.create_task(interviewing_task))
125
+ self._initialized.set() # Signal that we're ready
80
126
 
81
- for task in asyncio.as_completed(tasks):
82
- result = await task
83
- self.jobs_runner_status.add_completed_interview(result)
84
- yield result
127
+ # Keep track of active tasks
128
+ active_tasks = set()
129
+
130
+ try:
131
+ while True:
132
+ # Add new tasks if we're below max_concurrent and there are more interviews
133
+ while len(active_tasks) < self.MAX_CONCURRENT:
134
+ try:
135
+ interview = next(interviews_iter)
136
+ task = asyncio.create_task(
137
+ self._build_interview_task(
138
+ interview=interview,
139
+ stop_on_exception=stop_on_exception,
140
+ sidecar_model=sidecar_model,
141
+ raise_validation_errors=raise_validation_errors,
142
+ )
143
+ )
144
+ active_tasks.add(task)
145
+ # Add callback to remove task from set when done
146
+ task.add_done_callback(active_tasks.discard)
147
+ except StopIteration:
148
+ break
149
+
150
+ if not active_tasks:
151
+ break
152
+
153
+ # Wait for next completed task
154
+ done, _ = await asyncio.wait(
155
+ active_tasks, return_when=asyncio.FIRST_COMPLETED
156
+ )
157
+
158
+ # Process completed tasks
159
+ for task in done:
160
+ try:
161
+ result = await task
162
+ self.jobs_runner_status.add_completed_interview(result)
163
+ yield result
164
+ except Exception as e:
165
+ if stop_on_exception:
166
+ # Cancel remaining tasks
167
+ for t in active_tasks:
168
+ if not t.done():
169
+ t.cancel()
170
+ raise
171
+ else:
172
+ # Log error and continue
173
+ # logger.error(f"Task failed with error: {e}")
174
+ continue
175
+ finally:
176
+ # Ensure we cancel any remaining tasks if we exit early
177
+ for task in active_tasks:
178
+ if not task.done():
179
+ task.cancel()
85
180
 
86
181
  def _populate_total_interviews(
87
182
  self, n: int = 1
@@ -242,11 +337,25 @@ class JobsRunnerAsyncio:
242
337
  if len(results.task_history.indices) > 5:
243
338
  msg += f"Exceptions were raised in the following interviews: {results.task_history.indices}.\n"
244
339
 
245
- print(msg)
246
- # this is where exceptions are opening up
340
+ import sys
341
+
342
+ print(msg, file=sys.stderr)
343
+ from edsl.config import CONFIG
344
+
345
+ if CONFIG.get("EDSL_OPEN_EXCEPTION_REPORT_URL") == "True":
346
+ open_in_browser = True
347
+ elif CONFIG.get("EDSL_OPEN_EXCEPTION_REPORT_URL") == "False":
348
+ open_in_browser = False
349
+ else:
350
+ raise Exception(
351
+ "EDSL_OPEN_EXCEPTION_REPORT_URL", "must be either True or False"
352
+ )
353
+
354
+ # print("open_in_browser", open_in_browser)
355
+
247
356
  filepath = results.task_history.html(
248
357
  cta="Open report to see details.",
249
- open_in_browser=True,
358
+ open_in_browser=open_in_browser,
250
359
  return_link=True,
251
360
  )
252
361
 
@@ -275,6 +384,8 @@ class JobsRunnerAsyncio:
275
384
  stop_on_exception: bool = False,
276
385
  progress_bar: bool = False,
277
386
  sidecar_model: Optional[LanguageModel] = None,
387
+ jobs_runner_status: Optional[Type[JobsRunnerStatusBase]] = None,
388
+ job_uuid: Optional[UUID] = None,
278
389
  print_exceptions: bool = True,
279
390
  raise_validation_errors: bool = False,
280
391
  ) -> "Coroutine":
@@ -286,7 +397,19 @@ class JobsRunnerAsyncio:
286
397
  self.cache = cache
287
398
  self.sidecar_model = sidecar_model
288
399
 
289
- self.jobs_runner_status = JobsRunnerStatus(self, n=n)
400
+ from edsl.coop import Coop
401
+
402
+ coop = Coop()
403
+ endpoint_url = coop.get_progress_bar_url()
404
+
405
+ if jobs_runner_status is not None:
406
+ self.jobs_runner_status = jobs_runner_status(
407
+ self, n=n, endpoint_url=endpoint_url, job_uuid=job_uuid
408
+ )
409
+ else:
410
+ self.jobs_runner_status = JobsRunnerStatus(
411
+ self, n=n, endpoint_url=endpoint_url, job_uuid=job_uuid
412
+ )
290
413
 
291
414
  stop_event = threading.Event()
292
415
 
@@ -306,11 +429,16 @@ class JobsRunnerAsyncio:
306
429
  """Runs the progress bar in a separate thread."""
307
430
  self.jobs_runner_status.update_progress(stop_event)
308
431
 
309
- if progress_bar:
432
+ if progress_bar and self.jobs_runner_status.has_ep_api_key():
433
+ self.jobs_runner_status.setup()
310
434
  progress_thread = threading.Thread(
311
435
  target=run_progress_bar, args=(stop_event,)
312
436
  )
313
437
  progress_thread.start()
438
+ elif progress_bar:
439
+ warnings.warn(
440
+ "You need an Expected Parrot API key to view job progress bars."
441
+ )
314
442
 
315
443
  exception_to_raise = None
316
444
  try:
@@ -325,7 +453,7 @@ class JobsRunnerAsyncio:
325
453
  stop_event.set()
326
454
  finally:
327
455
  stop_event.set()
328
- if progress_bar:
456
+ if progress_bar and self.jobs_runner_status.has_ep_api_key():
329
457
  # self.jobs_runner_status.stop_event.set()
330
458
  if progress_thread:
331
459
  progress_thread.join()