edsl 0.1.37.dev5__py3-none-any.whl → 0.1.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +63 -34
- edsl/BaseDiff.py +7 -7
- edsl/__init__.py +2 -1
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +23 -11
- edsl/agents/AgentList.py +86 -23
- edsl/agents/Invigilator.py +18 -7
- edsl/agents/InvigilatorBase.py +0 -19
- edsl/agents/PromptConstructor.py +5 -4
- edsl/auto/SurveyCreatorPipeline.py +1 -1
- edsl/auto/utilities.py +1 -1
- edsl/base/Base.py +3 -13
- edsl/config.py +8 -0
- edsl/coop/coop.py +89 -19
- edsl/data/Cache.py +45 -17
- edsl/data/CacheEntry.py +8 -3
- edsl/data/RemoteCacheSync.py +0 -19
- edsl/enums.py +2 -0
- edsl/exceptions/agents.py +4 -0
- edsl/exceptions/cache.py +5 -0
- edsl/inference_services/GoogleService.py +7 -15
- edsl/inference_services/PerplexityService.py +163 -0
- edsl/inference_services/registry.py +2 -0
- edsl/jobs/Jobs.py +110 -559
- edsl/jobs/JobsChecks.py +147 -0
- edsl/jobs/JobsPrompts.py +268 -0
- edsl/jobs/JobsRemoteInferenceHandler.py +239 -0
- edsl/jobs/buckets/TokenBucket.py +3 -0
- edsl/jobs/interviews/Interview.py +7 -7
- edsl/jobs/runners/JobsRunnerAsyncio.py +156 -28
- edsl/jobs/runners/JobsRunnerStatus.py +194 -196
- edsl/jobs/tasks/TaskHistory.py +27 -19
- edsl/language_models/LanguageModel.py +52 -90
- edsl/language_models/ModelList.py +67 -14
- edsl/language_models/registry.py +57 -4
- edsl/notebooks/Notebook.py +7 -8
- edsl/prompts/Prompt.py +8 -3
- edsl/questions/QuestionBase.py +38 -30
- edsl/questions/QuestionBaseGenMixin.py +1 -1
- edsl/questions/QuestionBasePromptsMixin.py +0 -17
- edsl/questions/QuestionExtract.py +3 -4
- edsl/questions/QuestionFunctional.py +10 -3
- edsl/questions/derived/QuestionTopK.py +2 -0
- edsl/questions/question_registry.py +36 -6
- edsl/results/CSSParameterizer.py +108 -0
- edsl/results/Dataset.py +146 -15
- edsl/results/DatasetExportMixin.py +231 -217
- edsl/results/DatasetTree.py +134 -4
- edsl/results/Result.py +31 -16
- edsl/results/Results.py +159 -65
- edsl/results/TableDisplay.py +198 -0
- edsl/results/table_display.css +78 -0
- edsl/scenarios/FileStore.py +187 -13
- edsl/scenarios/Scenario.py +73 -18
- edsl/scenarios/ScenarioJoin.py +127 -0
- edsl/scenarios/ScenarioList.py +251 -76
- edsl/surveys/MemoryPlan.py +1 -1
- edsl/surveys/Rule.py +1 -5
- edsl/surveys/RuleCollection.py +1 -1
- edsl/surveys/Survey.py +25 -19
- edsl/surveys/SurveyFlowVisualizationMixin.py +67 -9
- edsl/surveys/instructions/ChangeInstruction.py +9 -7
- edsl/surveys/instructions/Instruction.py +21 -7
- edsl/templates/error_reporting/interview_details.html +3 -3
- edsl/templates/error_reporting/interviews.html +18 -9
- edsl/{conjure → utilities}/naming_utilities.py +1 -1
- edsl/utilities/utilities.py +15 -0
- {edsl-0.1.37.dev5.dist-info → edsl-0.1.38.dist-info}/METADATA +2 -1
- {edsl-0.1.37.dev5.dist-info → edsl-0.1.38.dist-info}/RECORD +71 -77
- edsl/conjure/AgentConstructionMixin.py +0 -160
- edsl/conjure/Conjure.py +0 -62
- edsl/conjure/InputData.py +0 -659
- edsl/conjure/InputDataCSV.py +0 -48
- edsl/conjure/InputDataMixinQuestionStats.py +0 -182
- edsl/conjure/InputDataPyRead.py +0 -91
- edsl/conjure/InputDataSPSS.py +0 -8
- edsl/conjure/InputDataStata.py +0 -8
- edsl/conjure/QuestionOptionMixin.py +0 -76
- edsl/conjure/QuestionTypeMixin.py +0 -23
- edsl/conjure/RawQuestion.py +0 -65
- edsl/conjure/SurveyResponses.py +0 -7
- edsl/conjure/__init__.py +0 -9
- edsl/conjure/examples/placeholder.txt +0 -0
- edsl/conjure/utilities.py +0 -201
- {edsl-0.1.37.dev5.dist-info → edsl-0.1.38.dist-info}/LICENSE +0 -0
- {edsl-0.1.37.dev5.dist-info → edsl-0.1.38.dist-info}/WHEEL +0 -0
@@ -159,7 +159,7 @@ class Interview:
|
|
159
159
|
return self.task_creators.interview_status
|
160
160
|
|
161
161
|
# region: Serialization
|
162
|
-
def
|
162
|
+
def to_dict(self, include_exceptions=True, add_edsl_version=True) -> dict[str, Any]:
|
163
163
|
"""Return a dictionary representation of the Interview instance.
|
164
164
|
This is just for hashing purposes.
|
165
165
|
|
@@ -168,10 +168,10 @@ class Interview:
|
|
168
168
|
1217840301076717434
|
169
169
|
"""
|
170
170
|
d = {
|
171
|
-
"agent": self.agent.
|
172
|
-
"survey": self.survey.
|
173
|
-
"scenario": self.scenario.
|
174
|
-
"model": self.model.
|
171
|
+
"agent": self.agent.to_dict(add_edsl_version=add_edsl_version),
|
172
|
+
"survey": self.survey.to_dict(add_edsl_version=add_edsl_version),
|
173
|
+
"scenario": self.scenario.to_dict(add_edsl_version=add_edsl_version),
|
174
|
+
"model": self.model.to_dict(add_edsl_version=add_edsl_version),
|
175
175
|
"iteration": self.iteration,
|
176
176
|
"exceptions": {},
|
177
177
|
}
|
@@ -202,11 +202,11 @@ class Interview:
|
|
202
202
|
def __hash__(self) -> int:
|
203
203
|
from edsl.utilities.utilities import dict_hash
|
204
204
|
|
205
|
-
return dict_hash(self.
|
205
|
+
return dict_hash(self.to_dict(include_exceptions=False, add_edsl_version=False))
|
206
206
|
|
207
207
|
def __eq__(self, other: "Interview") -> bool:
|
208
208
|
"""
|
209
|
-
>>> from edsl.jobs.interviews.Interview import Interview; i = Interview.example(); d = i.
|
209
|
+
>>> from edsl.jobs.interviews.Interview import Interview; i = Interview.example(); d = i.to_dict(); i2 = Interview.from_dict(d); i == i2
|
210
210
|
True
|
211
211
|
"""
|
212
212
|
return hash(self) == hash(other)
|
@@ -2,13 +2,14 @@ from __future__ import annotations
|
|
2
2
|
import time
|
3
3
|
import asyncio
|
4
4
|
import threading
|
5
|
-
|
6
|
-
from
|
5
|
+
import warnings
|
6
|
+
from typing import Coroutine, List, AsyncGenerator, Optional, Union, Generator, Type
|
7
|
+
from uuid import UUID
|
7
8
|
from collections import UserList
|
8
9
|
|
9
10
|
from edsl.results.Results import Results
|
10
11
|
from edsl.jobs.interviews.Interview import Interview
|
11
|
-
from edsl.jobs.runners.JobsRunnerStatus import JobsRunnerStatus
|
12
|
+
from edsl.jobs.runners.JobsRunnerStatus import JobsRunnerStatus, JobsRunnerStatusBase
|
12
13
|
|
13
14
|
from edsl.jobs.tasks.TaskHistory import TaskHistory
|
14
15
|
from edsl.jobs.buckets.BucketCollection import BucketCollection
|
@@ -36,11 +37,61 @@ class JobsRunnerAsyncio:
|
|
36
37
|
The Jobs object is a collection of interviews that are to be run.
|
37
38
|
"""
|
38
39
|
|
40
|
+
MAX_CONCURRENT_DEFAULT = 500
|
41
|
+
|
39
42
|
def __init__(self, jobs: "Jobs"):
|
40
43
|
self.jobs = jobs
|
41
44
|
self.interviews: List["Interview"] = jobs.interviews()
|
42
45
|
self.bucket_collection: "BucketCollection" = jobs.bucket_collection
|
43
46
|
self.total_interviews: List["Interview"] = []
|
47
|
+
self._initialized = threading.Event()
|
48
|
+
|
49
|
+
from edsl.config import CONFIG
|
50
|
+
|
51
|
+
self.MAX_CONCURRENT = int(CONFIG.get("EDSL_MAX_CONCURRENT_TASKS"))
|
52
|
+
# print(f"MAX_CONCURRENT: {self.MAX_CONCURRENT}")
|
53
|
+
|
54
|
+
# async def run_async_generator(
|
55
|
+
# self,
|
56
|
+
# cache: Cache,
|
57
|
+
# n: int = 1,
|
58
|
+
# stop_on_exception: bool = False,
|
59
|
+
# sidecar_model: Optional[LanguageModel] = None,
|
60
|
+
# total_interviews: Optional[List["Interview"]] = None,
|
61
|
+
# raise_validation_errors: bool = False,
|
62
|
+
# ) -> AsyncGenerator["Result", None]:
|
63
|
+
# """Creates the tasks, runs them asynchronously, and returns the results as a Results object.
|
64
|
+
|
65
|
+
# Completed tasks are yielded as they are completed.
|
66
|
+
|
67
|
+
# :param n: how many times to run each interview
|
68
|
+
# :param stop_on_exception: Whether to stop the interview if an exception is raised
|
69
|
+
# :param sidecar_model: a language model to use in addition to the interview's model
|
70
|
+
# :param total_interviews: A list of interviews to run can be provided instead.
|
71
|
+
# :param raise_validation_errors: Whether to raise validation errors
|
72
|
+
# """
|
73
|
+
# tasks = []
|
74
|
+
# if total_interviews: # was already passed in total interviews
|
75
|
+
# self.total_interviews = total_interviews
|
76
|
+
# else:
|
77
|
+
# self.total_interviews = list(
|
78
|
+
# self._populate_total_interviews(n=n)
|
79
|
+
# ) # Populate self.total_interviews before creating tasks
|
80
|
+
# self._initialized.set() # Signal that we're ready
|
81
|
+
|
82
|
+
# for interview in self.total_interviews:
|
83
|
+
# interviewing_task = self._build_interview_task(
|
84
|
+
# interview=interview,
|
85
|
+
# stop_on_exception=stop_on_exception,
|
86
|
+
# sidecar_model=sidecar_model,
|
87
|
+
# raise_validation_errors=raise_validation_errors,
|
88
|
+
# )
|
89
|
+
# tasks.append(asyncio.create_task(interviewing_task))
|
90
|
+
|
91
|
+
# for task in asyncio.as_completed(tasks):
|
92
|
+
# result = await task
|
93
|
+
# self.jobs_runner_status.add_completed_interview(result)
|
94
|
+
# yield result
|
44
95
|
|
45
96
|
async def run_async_generator(
|
46
97
|
self,
|
@@ -51,9 +102,10 @@ class JobsRunnerAsyncio:
|
|
51
102
|
total_interviews: Optional[List["Interview"]] = None,
|
52
103
|
raise_validation_errors: bool = False,
|
53
104
|
) -> AsyncGenerator["Result", None]:
|
54
|
-
"""Creates
|
105
|
+
"""Creates and processes tasks asynchronously, yielding results as they complete.
|
55
106
|
|
56
|
-
|
107
|
+
Tasks are created and processed in a streaming fashion rather than building the full list upfront.
|
108
|
+
Results are yielded as soon as they are available.
|
57
109
|
|
58
110
|
:param n: how many times to run each interview
|
59
111
|
:param stop_on_exception: Whether to stop the interview if an exception is raised
|
@@ -61,27 +113,70 @@ class JobsRunnerAsyncio:
|
|
61
113
|
:param total_interviews: A list of interviews to run can be provided instead.
|
62
114
|
:param raise_validation_errors: Whether to raise validation errors
|
63
115
|
"""
|
64
|
-
|
65
|
-
if total_interviews:
|
116
|
+
# Initialize interviews iterator
|
117
|
+
if total_interviews:
|
118
|
+
interviews_iter = iter(total_interviews)
|
66
119
|
self.total_interviews = total_interviews
|
67
120
|
else:
|
68
|
-
|
69
|
-
|
70
|
-
|
121
|
+
interviews_iter = self._populate_total_interviews(n=n)
|
122
|
+
self.total_interviews = list(interviews_iter)
|
123
|
+
interviews_iter = iter(self.total_interviews) # Create fresh iterator
|
71
124
|
|
72
|
-
|
73
|
-
interviewing_task = self._build_interview_task(
|
74
|
-
interview=interview,
|
75
|
-
stop_on_exception=stop_on_exception,
|
76
|
-
sidecar_model=sidecar_model,
|
77
|
-
raise_validation_errors=raise_validation_errors,
|
78
|
-
)
|
79
|
-
tasks.append(asyncio.create_task(interviewing_task))
|
125
|
+
self._initialized.set() # Signal that we're ready
|
80
126
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
127
|
+
# Keep track of active tasks
|
128
|
+
active_tasks = set()
|
129
|
+
|
130
|
+
try:
|
131
|
+
while True:
|
132
|
+
# Add new tasks if we're below max_concurrent and there are more interviews
|
133
|
+
while len(active_tasks) < self.MAX_CONCURRENT:
|
134
|
+
try:
|
135
|
+
interview = next(interviews_iter)
|
136
|
+
task = asyncio.create_task(
|
137
|
+
self._build_interview_task(
|
138
|
+
interview=interview,
|
139
|
+
stop_on_exception=stop_on_exception,
|
140
|
+
sidecar_model=sidecar_model,
|
141
|
+
raise_validation_errors=raise_validation_errors,
|
142
|
+
)
|
143
|
+
)
|
144
|
+
active_tasks.add(task)
|
145
|
+
# Add callback to remove task from set when done
|
146
|
+
task.add_done_callback(active_tasks.discard)
|
147
|
+
except StopIteration:
|
148
|
+
break
|
149
|
+
|
150
|
+
if not active_tasks:
|
151
|
+
break
|
152
|
+
|
153
|
+
# Wait for next completed task
|
154
|
+
done, _ = await asyncio.wait(
|
155
|
+
active_tasks, return_when=asyncio.FIRST_COMPLETED
|
156
|
+
)
|
157
|
+
|
158
|
+
# Process completed tasks
|
159
|
+
for task in done:
|
160
|
+
try:
|
161
|
+
result = await task
|
162
|
+
self.jobs_runner_status.add_completed_interview(result)
|
163
|
+
yield result
|
164
|
+
except Exception as e:
|
165
|
+
if stop_on_exception:
|
166
|
+
# Cancel remaining tasks
|
167
|
+
for t in active_tasks:
|
168
|
+
if not t.done():
|
169
|
+
t.cancel()
|
170
|
+
raise
|
171
|
+
else:
|
172
|
+
# Log error and continue
|
173
|
+
# logger.error(f"Task failed with error: {e}")
|
174
|
+
continue
|
175
|
+
finally:
|
176
|
+
# Ensure we cancel any remaining tasks if we exit early
|
177
|
+
for task in active_tasks:
|
178
|
+
if not task.done():
|
179
|
+
task.cancel()
|
85
180
|
|
86
181
|
def _populate_total_interviews(
|
87
182
|
self, n: int = 1
|
@@ -242,11 +337,25 @@ class JobsRunnerAsyncio:
|
|
242
337
|
if len(results.task_history.indices) > 5:
|
243
338
|
msg += f"Exceptions were raised in the following interviews: {results.task_history.indices}.\n"
|
244
339
|
|
245
|
-
|
246
|
-
|
340
|
+
import sys
|
341
|
+
|
342
|
+
print(msg, file=sys.stderr)
|
343
|
+
from edsl.config import CONFIG
|
344
|
+
|
345
|
+
if CONFIG.get("EDSL_OPEN_EXCEPTION_REPORT_URL") == "True":
|
346
|
+
open_in_browser = True
|
347
|
+
elif CONFIG.get("EDSL_OPEN_EXCEPTION_REPORT_URL") == "False":
|
348
|
+
open_in_browser = False
|
349
|
+
else:
|
350
|
+
raise Exception(
|
351
|
+
"EDSL_OPEN_EXCEPTION_REPORT_URL", "must be either True or False"
|
352
|
+
)
|
353
|
+
|
354
|
+
# print("open_in_browser", open_in_browser)
|
355
|
+
|
247
356
|
filepath = results.task_history.html(
|
248
357
|
cta="Open report to see details.",
|
249
|
-
open_in_browser=
|
358
|
+
open_in_browser=open_in_browser,
|
250
359
|
return_link=True,
|
251
360
|
)
|
252
361
|
|
@@ -275,6 +384,8 @@ class JobsRunnerAsyncio:
|
|
275
384
|
stop_on_exception: bool = False,
|
276
385
|
progress_bar: bool = False,
|
277
386
|
sidecar_model: Optional[LanguageModel] = None,
|
387
|
+
jobs_runner_status: Optional[Type[JobsRunnerStatusBase]] = None,
|
388
|
+
job_uuid: Optional[UUID] = None,
|
278
389
|
print_exceptions: bool = True,
|
279
390
|
raise_validation_errors: bool = False,
|
280
391
|
) -> "Coroutine":
|
@@ -286,7 +397,19 @@ class JobsRunnerAsyncio:
|
|
286
397
|
self.cache = cache
|
287
398
|
self.sidecar_model = sidecar_model
|
288
399
|
|
289
|
-
|
400
|
+
from edsl.coop import Coop
|
401
|
+
|
402
|
+
coop = Coop()
|
403
|
+
endpoint_url = coop.get_progress_bar_url()
|
404
|
+
|
405
|
+
if jobs_runner_status is not None:
|
406
|
+
self.jobs_runner_status = jobs_runner_status(
|
407
|
+
self, n=n, endpoint_url=endpoint_url, job_uuid=job_uuid
|
408
|
+
)
|
409
|
+
else:
|
410
|
+
self.jobs_runner_status = JobsRunnerStatus(
|
411
|
+
self, n=n, endpoint_url=endpoint_url, job_uuid=job_uuid
|
412
|
+
)
|
290
413
|
|
291
414
|
stop_event = threading.Event()
|
292
415
|
|
@@ -306,11 +429,16 @@ class JobsRunnerAsyncio:
|
|
306
429
|
"""Runs the progress bar in a separate thread."""
|
307
430
|
self.jobs_runner_status.update_progress(stop_event)
|
308
431
|
|
309
|
-
if progress_bar:
|
432
|
+
if progress_bar and self.jobs_runner_status.has_ep_api_key():
|
433
|
+
self.jobs_runner_status.setup()
|
310
434
|
progress_thread = threading.Thread(
|
311
435
|
target=run_progress_bar, args=(stop_event,)
|
312
436
|
)
|
313
437
|
progress_thread.start()
|
438
|
+
elif progress_bar:
|
439
|
+
warnings.warn(
|
440
|
+
"You need an Expected Parrot API key to view job progress bars."
|
441
|
+
)
|
314
442
|
|
315
443
|
exception_to_raise = None
|
316
444
|
try:
|
@@ -325,7 +453,7 @@ class JobsRunnerAsyncio:
|
|
325
453
|
stop_event.set()
|
326
454
|
finally:
|
327
455
|
stop_event.set()
|
328
|
-
if progress_bar:
|
456
|
+
if progress_bar and self.jobs_runner_status.has_ep_api_key():
|
329
457
|
# self.jobs_runner_status.stop_event.set()
|
330
458
|
if progress_thread:
|
331
459
|
progress_thread.join()
|