edsl 0.1.39__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +0 -28
- edsl/__init__.py +1 -1
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +17 -9
- edsl/agents/Invigilator.py +14 -13
- edsl/agents/InvigilatorBase.py +1 -4
- edsl/agents/PromptConstructor.py +22 -42
- edsl/agents/QuestionInstructionPromptBuilder.py +1 -1
- edsl/auto/AutoStudy.py +5 -18
- edsl/auto/StageBase.py +40 -53
- edsl/auto/StageQuestions.py +1 -2
- edsl/auto/utilities.py +6 -0
- edsl/coop/coop.py +5 -21
- edsl/data/Cache.py +18 -29
- edsl/data/CacheHandler.py +2 -0
- edsl/data/RemoteCacheSync.py +46 -154
- edsl/enums.py +0 -7
- edsl/inference_services/AnthropicService.py +16 -38
- edsl/inference_services/AvailableModelFetcher.py +1 -7
- edsl/inference_services/GoogleService.py +1 -5
- edsl/inference_services/InferenceServicesCollection.py +2 -18
- edsl/inference_services/OpenAIService.py +31 -46
- edsl/inference_services/TestService.py +3 -1
- edsl/inference_services/TogetherAIService.py +3 -5
- edsl/inference_services/data_structures.py +2 -74
- edsl/jobs/AnswerQuestionFunctionConstructor.py +113 -148
- edsl/jobs/FetchInvigilator.py +3 -10
- edsl/jobs/InterviewsConstructor.py +4 -6
- edsl/jobs/Jobs.py +233 -299
- edsl/jobs/JobsChecks.py +2 -2
- edsl/jobs/JobsPrompts.py +1 -1
- edsl/jobs/JobsRemoteInferenceHandler.py +136 -160
- edsl/jobs/interviews/Interview.py +42 -80
- edsl/jobs/runners/JobsRunnerAsyncio.py +358 -88
- edsl/jobs/runners/JobsRunnerStatus.py +165 -133
- edsl/jobs/tasks/TaskHistory.py +3 -24
- edsl/language_models/LanguageModel.py +4 -59
- edsl/language_models/ModelList.py +8 -19
- edsl/language_models/__init__.py +1 -1
- edsl/language_models/registry.py +180 -0
- edsl/language_models/repair.py +1 -1
- edsl/questions/QuestionBase.py +26 -35
- edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +49 -52
- edsl/questions/QuestionBasePromptsMixin.py +1 -1
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +2 -2
- edsl/questions/QuestionExtract.py +7 -5
- edsl/questions/QuestionFreeText.py +1 -1
- edsl/questions/QuestionList.py +15 -9
- edsl/questions/QuestionMatrix.py +1 -1
- edsl/questions/QuestionMultipleChoice.py +1 -1
- edsl/questions/QuestionNumerical.py +1 -1
- edsl/questions/QuestionRank.py +1 -1
- edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +18 -6
- edsl/questions/{response_validator_factory.py → ResponseValidatorFactory.py} +1 -7
- edsl/questions/SimpleAskMixin.py +1 -1
- edsl/questions/__init__.py +1 -1
- edsl/results/DatasetExportMixin.py +119 -60
- edsl/results/Result.py +3 -109
- edsl/results/Results.py +39 -50
- edsl/scenarios/FileStore.py +0 -32
- edsl/scenarios/ScenarioList.py +7 -35
- edsl/scenarios/handlers/csv.py +0 -11
- edsl/surveys/Survey.py +20 -71
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +1 -1
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/RECORD +78 -84
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +1 -1
- edsl/jobs/async_interview_runner.py +0 -138
- edsl/jobs/check_survey_scenario_compatibility.py +0 -85
- edsl/jobs/data_structures.py +0 -120
- edsl/jobs/results_exceptions_handler.py +0 -98
- edsl/language_models/model.py +0 -256
- edsl/questions/data_structures.py +0 -20
- edsl/results/file_exports.py +0 -252
- /edsl/agents/{question_option_processor.py → QuestionOptionProcessor.py} +0 -0
- /edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +0 -0
- /edsl/questions/{loop_processor.py → LoopProcessor.py} +0 -0
- /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
- /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
- /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
- /edsl/results/{results_selector.py → Selector.py} +0 -0
- /edsl/scenarios/{directory_scanner.py → DirectoryScanner.py} +0 -0
- /edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +0 -0
- /edsl/scenarios/{scenario_selector.py → ScenarioSelector.py} +0 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
@@ -4,10 +4,10 @@ from __future__ import annotations
|
|
4
4
|
import asyncio
|
5
5
|
from typing import Any, Type, List, Generator, Optional, Union, TYPE_CHECKING
|
6
6
|
import copy
|
7
|
-
from dataclasses import dataclass
|
8
7
|
|
9
|
-
# from edsl.
|
10
|
-
|
8
|
+
# from edsl.config import CONFIG
|
9
|
+
|
10
|
+
from edsl.jobs.Answers import Answers
|
11
11
|
from edsl.jobs.interviews.InterviewStatusLog import InterviewStatusLog
|
12
12
|
from edsl.jobs.interviews.InterviewStatusDictionary import InterviewStatusDictionary
|
13
13
|
from edsl.jobs.interviews.InterviewExceptionCollection import (
|
@@ -22,7 +22,6 @@ from edsl.jobs.InterviewTaskManager import InterviewTaskManager
|
|
22
22
|
from edsl.jobs.FetchInvigilator import FetchInvigilator
|
23
23
|
from edsl.jobs.RequestTokenEstimator import RequestTokenEstimator
|
24
24
|
|
25
|
-
|
26
25
|
if TYPE_CHECKING:
|
27
26
|
from edsl.agents.Agent import Agent
|
28
27
|
from edsl.surveys.Survey import Survey
|
@@ -30,16 +29,6 @@ if TYPE_CHECKING:
|
|
30
29
|
from edsl.data.Cache import Cache
|
31
30
|
from edsl.language_models.LanguageModel import LanguageModel
|
32
31
|
from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
|
33
|
-
from edsl.agents.InvigilatorBase import InvigilatorBase
|
34
|
-
from edsl.language_models.key_management.KeyLookup import KeyLookup
|
35
|
-
|
36
|
-
|
37
|
-
@dataclass
|
38
|
-
class InterviewRunningConfig:
|
39
|
-
cache: Optional["Cache"] = (None,)
|
40
|
-
skip_retry: bool = (False,) # COULD BE SET WITH CONFIG
|
41
|
-
raise_validation_errors: bool = (True,)
|
42
|
-
stop_on_exception: bool = (False,)
|
43
32
|
|
44
33
|
|
45
34
|
class Interview:
|
@@ -56,11 +45,13 @@ class Interview:
|
|
56
45
|
survey: Survey,
|
57
46
|
scenario: Scenario,
|
58
47
|
model: Type["LanguageModel"],
|
48
|
+
debug: Optional[bool] = False, # DEPRECATE
|
59
49
|
iteration: int = 0,
|
60
|
-
indices: dict = None, # explain?
|
61
50
|
cache: Optional["Cache"] = None,
|
51
|
+
sidecar_model: Optional["LanguageModel"] = None, # DEPRECATE
|
62
52
|
skip_retry: bool = False, # COULD BE SET WITH CONFIG
|
63
53
|
raise_validation_errors: bool = True,
|
54
|
+
indices: dict = None, # explain?
|
64
55
|
):
|
65
56
|
"""Initialize the Interview instance.
|
66
57
|
|
@@ -68,9 +59,10 @@ class Interview:
|
|
68
59
|
:param survey: the survey being administered to the agent.
|
69
60
|
:param scenario: the scenario that populates the survey questions.
|
70
61
|
:param model: the language model used to answer the questions.
|
71
|
-
|
62
|
+
:param debug: if True, run without calls to the language model.
|
72
63
|
:param iteration: the iteration number of the interview.
|
73
64
|
:param cache: the cache used to store the answers.
|
65
|
+
:param sidecar_model: a sidecar model used to answer questions.
|
74
66
|
|
75
67
|
>>> i = Interview.example()
|
76
68
|
>>> i.task_manager.task_creators
|
@@ -91,9 +83,12 @@ class Interview:
|
|
91
83
|
self.survey = copy.deepcopy(survey) # why do we need to deepcopy the survey?
|
92
84
|
self.scenario = scenario
|
93
85
|
self.model = model
|
86
|
+
self.debug = debug
|
94
87
|
self.iteration = iteration
|
88
|
+
self.cache = cache
|
95
89
|
|
96
90
|
self.answers = Answers() # will get filled in as interview progresses
|
91
|
+
self.sidecar_model = sidecar_model
|
97
92
|
|
98
93
|
self.task_manager = InterviewTaskManager(
|
99
94
|
survey=self.survey,
|
@@ -102,13 +97,6 @@ class Interview:
|
|
102
97
|
|
103
98
|
self.exceptions = InterviewExceptionCollection()
|
104
99
|
|
105
|
-
self.running_config = InterviewRunningConfig(
|
106
|
-
cache=cache,
|
107
|
-
skip_retry=skip_retry,
|
108
|
-
raise_validation_errors=raise_validation_errors,
|
109
|
-
)
|
110
|
-
|
111
|
-
self.cache = cache
|
112
100
|
self.skip_retry = skip_retry
|
113
101
|
self.raise_validation_errors = raise_validation_errors
|
114
102
|
|
@@ -121,7 +109,6 @@ class Interview:
|
|
121
109
|
self.failed_questions = []
|
122
110
|
|
123
111
|
self.indices = indices
|
124
|
-
self.initial_hash = hash(self)
|
125
112
|
|
126
113
|
@property
|
127
114
|
def has_exceptions(self) -> bool:
|
@@ -147,6 +134,7 @@ class Interview:
|
|
147
134
|
# return self.task_creators.interview_status
|
148
135
|
return self.task_manager.interview_status
|
149
136
|
|
137
|
+
# region: Serialization
|
150
138
|
def to_dict(self, include_exceptions=True, add_edsl_version=True) -> dict[str, Any]:
|
151
139
|
"""Return a dictionary representation of the Interview instance.
|
152
140
|
This is just for hashing purposes.
|
@@ -210,13 +198,13 @@ class Interview:
|
|
210
198
|
"""
|
211
199
|
return hash(self) == hash(other)
|
212
200
|
|
201
|
+
# region: Conducting the interview
|
213
202
|
async def async_conduct_interview(
|
214
203
|
self,
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
# key_lookup: Optional[KeyLookup] = None,
|
204
|
+
model_buckets: Optional[ModelBuckets] = None,
|
205
|
+
stop_on_exception: bool = False,
|
206
|
+
sidecar_model: Optional["LanguageModel"] = None,
|
207
|
+
raise_validation_errors: bool = True,
|
220
208
|
) -> tuple["Answers", List[dict[str, Any]]]:
|
221
209
|
"""
|
222
210
|
Conduct an Interview asynchronously.
|
@@ -225,6 +213,7 @@ class Interview:
|
|
225
213
|
:param model_buckets: a dictionary of token buckets for the model.
|
226
214
|
:param debug: run without calls to LLM.
|
227
215
|
:param stop_on_exception: if True, stops the interview if an exception is raised.
|
216
|
+
:param sidecar_model: a sidecar model used to answer questions.
|
228
217
|
|
229
218
|
Example usage:
|
230
219
|
|
@@ -238,39 +227,21 @@ class Interview:
|
|
238
227
|
>>> i.exceptions
|
239
228
|
{'q0': ...
|
240
229
|
>>> i = Interview.example()
|
241
|
-
>>>
|
242
|
-
>>> run_config = RunConfig(parameters = RunParameters(), environment = RunEnvironment())
|
243
|
-
>>> run_config.parameters.stop_on_exception = True
|
244
|
-
>>> result, _ = asyncio.run(i.async_conduct_interview(run_config))
|
230
|
+
>>> result, _ = asyncio.run(i.async_conduct_interview(stop_on_exception = True))
|
245
231
|
Traceback (most recent call last):
|
246
232
|
...
|
247
233
|
asyncio.exceptions.CancelledError
|
248
234
|
"""
|
249
|
-
|
250
|
-
|
251
|
-
if run_config is None:
|
252
|
-
run_config = RunConfig(
|
253
|
-
parameters=RunParameters(),
|
254
|
-
environment=RunEnvironment(),
|
255
|
-
)
|
256
|
-
self.stop_on_exception = run_config.parameters.stop_on_exception
|
235
|
+
self.sidecar_model = sidecar_model
|
236
|
+
self.stop_on_exception = stop_on_exception
|
257
237
|
|
258
238
|
# if no model bucket is passed, create an 'infinity' bucket with no rate limits
|
259
|
-
bucket_collection = run_config.environment.bucket_collection
|
260
|
-
|
261
|
-
if bucket_collection:
|
262
|
-
model_buckets = bucket_collection.get(self.model)
|
263
|
-
else:
|
264
|
-
model_buckets = None
|
265
|
-
|
266
239
|
if model_buckets is None or hasattr(self.agent, "answer_question_directly"):
|
267
240
|
model_buckets = ModelBuckets.infinity_bucket()
|
268
241
|
|
269
242
|
# was "self.tasks" - is that necessary?
|
270
243
|
self.tasks = self.task_manager.build_question_tasks(
|
271
|
-
answer_func=AnswerQuestionFunctionConstructor(
|
272
|
-
self, key_lookup=run_config.environment.key_lookup
|
273
|
-
)(),
|
244
|
+
answer_func=AnswerQuestionFunctionConstructor(self)(),
|
274
245
|
token_estimator=RequestTokenEstimator(self),
|
275
246
|
model_buckets=model_buckets,
|
276
247
|
)
|
@@ -279,26 +250,23 @@ class Interview:
|
|
279
250
|
## with dependencies on the questions that must be answered before this one can be answered.
|
280
251
|
|
281
252
|
## 'Invigilators' are used to administer the survey.
|
282
|
-
|
283
|
-
interview=self,
|
284
|
-
|
285
|
-
|
286
|
-
)
|
287
|
-
self.invigilators = [fetcher(question) for question in self.survey.questions]
|
288
|
-
await asyncio.gather(
|
289
|
-
*self.tasks, return_exceptions=not run_config.parameters.stop_on_exception
|
290
|
-
)
|
253
|
+
self.invigilators = [
|
254
|
+
FetchInvigilator(interview=self, current_answers=self.answers)(question)
|
255
|
+
for question in self.survey.questions
|
256
|
+
]
|
257
|
+
await asyncio.gather(*self.tasks, return_exceptions=not stop_on_exception)
|
291
258
|
self.answers.replace_missing_answers_with_none(self.survey)
|
292
259
|
valid_results = list(
|
293
260
|
self._extract_valid_results(self.tasks, self.invigilators, self.exceptions)
|
294
261
|
)
|
295
262
|
return self.answers, valid_results
|
296
263
|
|
264
|
+
# endregion
|
265
|
+
|
266
|
+
# region: Extracting results and recording errors
|
297
267
|
@staticmethod
|
298
268
|
def _extract_valid_results(
|
299
|
-
tasks: List["
|
300
|
-
invigilators: List["InvigilatorBase"],
|
301
|
-
exceptions: InterviewExceptionCollection,
|
269
|
+
tasks, invigilators: List["InvigilatorABC"], exceptions
|
302
270
|
) -> Generator["Answers", None, None]:
|
303
271
|
"""Extract the valid results from the list of results.
|
304
272
|
|
@@ -311,7 +279,10 @@ class Interview:
|
|
311
279
|
"""
|
312
280
|
assert len(tasks) == len(invigilators)
|
313
281
|
|
314
|
-
|
282
|
+
for task, invigilator in zip(tasks, invigilators):
|
283
|
+
if not task.done():
|
284
|
+
raise ValueError(f"Task {task.get_name()} is not done.")
|
285
|
+
|
315
286
|
try:
|
316
287
|
result = task.result()
|
317
288
|
except asyncio.CancelledError as e: # task was cancelled
|
@@ -327,21 +298,17 @@ class Interview:
|
|
327
298
|
invigilator=invigilator,
|
328
299
|
)
|
329
300
|
exceptions.add(task.get_name(), exception_entry)
|
330
|
-
return result
|
331
301
|
|
332
|
-
|
333
|
-
if not task.done():
|
334
|
-
raise ValueError(f"Task {task.get_name()} is not done.")
|
302
|
+
yield result
|
335
303
|
|
336
|
-
|
304
|
+
# endregion
|
337
305
|
|
306
|
+
# region: Magic methods
|
338
307
|
def __repr__(self) -> str:
|
339
308
|
"""Return a string representation of the Interview instance."""
|
340
309
|
return f"Interview(agent = {repr(self.agent)}, survey = {repr(self.survey)}, scenario = {repr(self.scenario)}, model = {repr(self.model)})"
|
341
310
|
|
342
|
-
def duplicate(
|
343
|
-
self, iteration: int, cache: "Cache", randomize_survey: Optional[bool] = True
|
344
|
-
) -> Interview:
|
311
|
+
def duplicate(self, iteration: int, cache: "Cache") -> Interview:
|
345
312
|
"""Duplicate the interview, but with a new iteration number and cache.
|
346
313
|
|
347
314
|
>>> i = Interview.example()
|
@@ -350,19 +317,14 @@ class Interview:
|
|
350
317
|
True
|
351
318
|
|
352
319
|
"""
|
353
|
-
if randomize_survey:
|
354
|
-
new_survey = self.survey.draw()
|
355
|
-
else:
|
356
|
-
new_survey = self.survey
|
357
|
-
|
358
320
|
return Interview(
|
359
321
|
agent=self.agent,
|
360
|
-
survey=
|
322
|
+
survey=self.survey,
|
361
323
|
scenario=self.scenario,
|
362
324
|
model=self.model,
|
363
325
|
iteration=iteration,
|
364
|
-
cache=
|
365
|
-
skip_retry=self.
|
326
|
+
cache=cache,
|
327
|
+
skip_retry=self.skip_retry,
|
366
328
|
indices=self.indices,
|
367
329
|
)
|
368
330
|
|