edsl 0.1.32__py3-none-any.whl → 0.1.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +9 -3
- edsl/TemplateLoader.py +24 -0
- edsl/__init__.py +8 -3
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +40 -8
- edsl/agents/AgentList.py +43 -0
- edsl/agents/Invigilator.py +135 -219
- edsl/agents/InvigilatorBase.py +148 -59
- edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +138 -89
- edsl/agents/__init__.py +1 -0
- edsl/auto/AutoStudy.py +117 -0
- edsl/auto/StageBase.py +230 -0
- edsl/auto/StageGenerateSurvey.py +178 -0
- edsl/auto/StageLabelQuestions.py +125 -0
- edsl/auto/StagePersona.py +61 -0
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
- edsl/auto/StagePersonaDimensionValues.py +74 -0
- edsl/auto/StagePersonaDimensions.py +69 -0
- edsl/auto/StageQuestions.py +73 -0
- edsl/auto/SurveyCreatorPipeline.py +21 -0
- edsl/auto/utilities.py +224 -0
- edsl/config.py +47 -56
- edsl/coop/PriceFetcher.py +58 -0
- edsl/coop/coop.py +50 -7
- edsl/data/Cache.py +35 -1
- edsl/data_transfer_models.py +73 -38
- edsl/enums.py +4 -0
- edsl/exceptions/language_models.py +25 -1
- edsl/exceptions/questions.py +62 -5
- edsl/exceptions/results.py +4 -0
- edsl/inference_services/AnthropicService.py +13 -11
- edsl/inference_services/AwsBedrock.py +19 -17
- edsl/inference_services/AzureAI.py +37 -20
- edsl/inference_services/GoogleService.py +16 -12
- edsl/inference_services/GroqService.py +2 -0
- edsl/inference_services/InferenceServiceABC.py +58 -3
- edsl/inference_services/MistralAIService.py +120 -0
- edsl/inference_services/OpenAIService.py +48 -54
- edsl/inference_services/TestService.py +80 -0
- edsl/inference_services/TogetherAIService.py +170 -0
- edsl/inference_services/models_available_cache.py +0 -6
- edsl/inference_services/registry.py +6 -0
- edsl/jobs/Answers.py +10 -12
- edsl/jobs/FailedQuestion.py +78 -0
- edsl/jobs/Jobs.py +37 -22
- edsl/jobs/buckets/BucketCollection.py +24 -15
- edsl/jobs/buckets/TokenBucket.py +93 -14
- edsl/jobs/interviews/Interview.py +366 -78
- edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +14 -68
- edsl/jobs/interviews/InterviewExceptionEntry.py +85 -19
- edsl/jobs/runners/JobsRunnerAsyncio.py +146 -175
- edsl/jobs/runners/JobsRunnerStatus.py +331 -0
- edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
- edsl/jobs/tasks/TaskHistory.py +148 -213
- edsl/language_models/LanguageModel.py +261 -156
- edsl/language_models/ModelList.py +2 -2
- edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
- edsl/language_models/fake_openai_call.py +15 -0
- edsl/language_models/fake_openai_service.py +61 -0
- edsl/language_models/registry.py +23 -6
- edsl/language_models/repair.py +0 -19
- edsl/language_models/utilities.py +61 -0
- edsl/notebooks/Notebook.py +20 -2
- edsl/prompts/Prompt.py +52 -2
- edsl/questions/AnswerValidatorMixin.py +23 -26
- edsl/questions/QuestionBase.py +330 -249
- edsl/questions/QuestionBaseGenMixin.py +133 -0
- edsl/questions/QuestionBasePromptsMixin.py +266 -0
- edsl/questions/QuestionBudget.py +99 -41
- edsl/questions/QuestionCheckBox.py +227 -35
- edsl/questions/QuestionExtract.py +98 -27
- edsl/questions/QuestionFreeText.py +52 -29
- edsl/questions/QuestionFunctional.py +7 -0
- edsl/questions/QuestionList.py +141 -22
- edsl/questions/QuestionMultipleChoice.py +159 -65
- edsl/questions/QuestionNumerical.py +88 -46
- edsl/questions/QuestionRank.py +182 -24
- edsl/questions/Quick.py +41 -0
- edsl/questions/RegisterQuestionsMeta.py +31 -12
- edsl/questions/ResponseValidatorABC.py +170 -0
- edsl/questions/__init__.py +3 -4
- edsl/questions/decorators.py +21 -0
- edsl/questions/derived/QuestionLikertFive.py +10 -5
- edsl/questions/derived/QuestionLinearScale.py +15 -2
- edsl/questions/derived/QuestionTopK.py +10 -1
- edsl/questions/derived/QuestionYesNo.py +24 -3
- edsl/questions/descriptors.py +43 -7
- edsl/questions/prompt_templates/question_budget.jinja +13 -0
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
- edsl/questions/prompt_templates/question_extract.jinja +11 -0
- edsl/questions/prompt_templates/question_free_text.jinja +3 -0
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
- edsl/questions/prompt_templates/question_list.jinja +17 -0
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
- edsl/questions/prompt_templates/question_numerical.jinja +37 -0
- edsl/questions/question_registry.py +6 -2
- edsl/questions/templates/__init__.py +0 -0
- edsl/questions/templates/budget/__init__.py +0 -0
- edsl/questions/templates/budget/answering_instructions.jinja +7 -0
- edsl/questions/templates/budget/question_presentation.jinja +7 -0
- edsl/questions/templates/checkbox/__init__.py +0 -0
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
- edsl/questions/templates/extract/__init__.py +0 -0
- edsl/questions/templates/extract/answering_instructions.jinja +7 -0
- edsl/questions/templates/extract/question_presentation.jinja +1 -0
- edsl/questions/templates/free_text/__init__.py +0 -0
- edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
- edsl/questions/templates/free_text/question_presentation.jinja +1 -0
- edsl/questions/templates/likert_five/__init__.py +0 -0
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
- edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
- edsl/questions/templates/linear_scale/__init__.py +0 -0
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
- edsl/questions/templates/list/__init__.py +0 -0
- edsl/questions/templates/list/answering_instructions.jinja +4 -0
- edsl/questions/templates/list/question_presentation.jinja +5 -0
- edsl/questions/templates/multiple_choice/__init__.py +0 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
- edsl/questions/templates/multiple_choice/html.jinja +0 -0
- edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
- edsl/questions/templates/numerical/__init__.py +0 -0
- edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
- edsl/questions/templates/numerical/question_presentation.jinja +7 -0
- edsl/questions/templates/rank/__init__.py +0 -0
- edsl/questions/templates/rank/answering_instructions.jinja +11 -0
- edsl/questions/templates/rank/question_presentation.jinja +15 -0
- edsl/questions/templates/top_k/__init__.py +0 -0
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
- edsl/questions/templates/top_k/question_presentation.jinja +22 -0
- edsl/questions/templates/yes_no/__init__.py +0 -0
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
- edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
- edsl/results/Dataset.py +20 -0
- edsl/results/DatasetExportMixin.py +46 -48
- edsl/results/DatasetTree.py +145 -0
- edsl/results/Result.py +32 -5
- edsl/results/Results.py +135 -46
- edsl/results/ResultsDBMixin.py +3 -3
- edsl/results/Selector.py +118 -0
- edsl/results/tree_explore.py +115 -0
- edsl/scenarios/FileStore.py +71 -10
- edsl/scenarios/Scenario.py +96 -25
- edsl/scenarios/ScenarioImageMixin.py +2 -2
- edsl/scenarios/ScenarioList.py +361 -39
- edsl/scenarios/ScenarioListExportMixin.py +9 -0
- edsl/scenarios/ScenarioListPdfMixin.py +150 -4
- edsl/study/SnapShot.py +8 -1
- edsl/study/Study.py +32 -0
- edsl/surveys/Rule.py +10 -1
- edsl/surveys/RuleCollection.py +21 -5
- edsl/surveys/Survey.py +637 -311
- edsl/surveys/SurveyExportMixin.py +71 -9
- edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
- edsl/surveys/SurveyQualtricsImport.py +75 -4
- edsl/surveys/instructions/ChangeInstruction.py +47 -0
- edsl/surveys/instructions/Instruction.py +34 -0
- edsl/surveys/instructions/InstructionCollection.py +77 -0
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/templates/error_reporting/base.html +24 -0
- edsl/templates/error_reporting/exceptions_by_model.html +35 -0
- edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
- edsl/templates/error_reporting/exceptions_by_type.html +17 -0
- edsl/templates/error_reporting/interview_details.html +116 -0
- edsl/templates/error_reporting/interviews.html +10 -0
- edsl/templates/error_reporting/overview.html +5 -0
- edsl/templates/error_reporting/performance_plot.html +2 -0
- edsl/templates/error_reporting/report.css +74 -0
- edsl/templates/error_reporting/report.html +118 -0
- edsl/templates/error_reporting/report.js +25 -0
- edsl/utilities/utilities.py +9 -1
- {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/METADATA +5 -2
- edsl-0.1.33.dist-info/RECORD +295 -0
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
- edsl/jobs/interviews/retry_management.py +0 -37
- edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -333
- edsl/utilities/gcp_bucket/simple_example.py +0 -9
- edsl-0.1.32.dist-info/RECORD +0 -209
- {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
- {edsl-0.1.32.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -1,286 +0,0 @@
|
|
1
|
-
"""This module contains the Interview class, which is responsible for conducting an interview asynchronously."""
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
import asyncio
|
5
|
-
import time
|
6
|
-
import traceback
|
7
|
-
from typing import Generator, Union
|
8
|
-
|
9
|
-
from edsl import CONFIG
|
10
|
-
from edsl.exceptions import InterviewTimeoutError
|
11
|
-
|
12
|
-
# from edsl.questions.QuestionBase import QuestionBase
|
13
|
-
from edsl.surveys.base import EndOfSurvey
|
14
|
-
from edsl.jobs.buckets.ModelBuckets import ModelBuckets
|
15
|
-
from edsl.jobs.interviews.InterviewExceptionEntry import InterviewExceptionEntry
|
16
|
-
from edsl.jobs.interviews.retry_management import retry_strategy
|
17
|
-
from edsl.jobs.tasks.task_status_enum import TaskStatus
|
18
|
-
from edsl.jobs.tasks.QuestionTaskCreator import QuestionTaskCreator
|
19
|
-
|
20
|
-
# from edsl.agents.InvigilatorBase import InvigilatorBase
|
21
|
-
|
22
|
-
from rich.console import Console
|
23
|
-
from rich.traceback import Traceback
|
24
|
-
|
25
|
-
TIMEOUT = float(CONFIG.get("EDSL_API_TIMEOUT"))
|
26
|
-
|
27
|
-
|
28
|
-
def frame_summary_to_dict(frame):
|
29
|
-
"""
|
30
|
-
Convert a FrameSummary object to a dictionary.
|
31
|
-
|
32
|
-
:param frame: A traceback FrameSummary object
|
33
|
-
:return: A dictionary containing the frame's details
|
34
|
-
"""
|
35
|
-
return {
|
36
|
-
"filename": frame.filename,
|
37
|
-
"lineno": frame.lineno,
|
38
|
-
"name": frame.name,
|
39
|
-
"line": frame.line,
|
40
|
-
}
|
41
|
-
|
42
|
-
|
43
|
-
class InterviewTaskBuildingMixin:
|
44
|
-
def _build_invigilators(
|
45
|
-
self, debug: bool
|
46
|
-
) -> Generator["InvigilatorBase", None, None]:
|
47
|
-
"""Create an invigilator for each question.
|
48
|
-
|
49
|
-
:param debug: whether to use debug mode, in which case `InvigilatorDebug` is used.
|
50
|
-
|
51
|
-
An invigilator is responsible for answering a particular question in the survey.
|
52
|
-
"""
|
53
|
-
for question in self.survey.questions:
|
54
|
-
yield self._get_invigilator(question=question, debug=debug)
|
55
|
-
|
56
|
-
def _get_invigilator(self, question: "QuestionBase", debug: bool) -> "Invigilator":
|
57
|
-
"""Return an invigilator for the given question.
|
58
|
-
|
59
|
-
:param question: the question to be answered
|
60
|
-
:param debug: whether to use debug mode, in which case `InvigilatorDebug` is used.
|
61
|
-
"""
|
62
|
-
invigilator = self.agent.create_invigilator(
|
63
|
-
question=question,
|
64
|
-
scenario=self.scenario,
|
65
|
-
model=self.model,
|
66
|
-
debug=debug,
|
67
|
-
survey=self.survey,
|
68
|
-
memory_plan=self.survey.memory_plan,
|
69
|
-
current_answers=self.answers,
|
70
|
-
iteration=self.iteration,
|
71
|
-
cache=self.cache,
|
72
|
-
sidecar_model=self.sidecar_model,
|
73
|
-
)
|
74
|
-
"""Return an invigilator for the given question."""
|
75
|
-
return invigilator
|
76
|
-
|
77
|
-
def _build_question_tasks(
|
78
|
-
self,
|
79
|
-
debug: bool,
|
80
|
-
model_buckets: ModelBuckets,
|
81
|
-
) -> list[asyncio.Task]:
|
82
|
-
"""Create a task for each question, with dependencies on the questions that must be answered before this one can be answered.
|
83
|
-
|
84
|
-
:param debug: whether to use debug mode, in which case `InvigilatorDebug` is used.
|
85
|
-
:param model_buckets: the model buckets used to track and control usage rates.
|
86
|
-
"""
|
87
|
-
tasks = []
|
88
|
-
for question in self.survey.questions:
|
89
|
-
tasks_that_must_be_completed_before = list(
|
90
|
-
self._get_tasks_that_must_be_completed_before(
|
91
|
-
tasks=tasks, question=question
|
92
|
-
)
|
93
|
-
)
|
94
|
-
question_task = self._create_question_task(
|
95
|
-
question=question,
|
96
|
-
tasks_that_must_be_completed_before=tasks_that_must_be_completed_before,
|
97
|
-
model_buckets=model_buckets,
|
98
|
-
debug=debug,
|
99
|
-
iteration=self.iteration,
|
100
|
-
)
|
101
|
-
tasks.append(question_task)
|
102
|
-
return tuple(tasks) # , invigilators
|
103
|
-
|
104
|
-
def _get_tasks_that_must_be_completed_before(
|
105
|
-
self, *, tasks: list[asyncio.Task], question: "QuestionBase"
|
106
|
-
) -> Generator[asyncio.Task, None, None]:
|
107
|
-
"""Return the tasks that must be completed before the given question can be answered.
|
108
|
-
|
109
|
-
:param tasks: a list of tasks that have been created so far.
|
110
|
-
:param question: the question for which we are determining dependencies.
|
111
|
-
|
112
|
-
If a question has no dependencies, this will be an empty list, [].
|
113
|
-
"""
|
114
|
-
parents_of_focal_question = self.dag.get(question.question_name, [])
|
115
|
-
for parent_question_name in parents_of_focal_question:
|
116
|
-
yield tasks[self.to_index[parent_question_name]]
|
117
|
-
|
118
|
-
def _create_question_task(
|
119
|
-
self,
|
120
|
-
*,
|
121
|
-
question: "QuestionBase",
|
122
|
-
tasks_that_must_be_completed_before: list[asyncio.Task],
|
123
|
-
model_buckets: ModelBuckets,
|
124
|
-
debug: bool,
|
125
|
-
iteration: int = 0,
|
126
|
-
) -> asyncio.Task:
|
127
|
-
"""Create a task that depends on the passed-in dependencies that are awaited before the task is run.
|
128
|
-
|
129
|
-
:param question: the question to be answered. This is the question we are creating a task for.
|
130
|
-
:param tasks_that_must_be_completed_before: the tasks that must be completed before the focal task is run.
|
131
|
-
:param model_buckets: the model buckets used to track and control usage rates.
|
132
|
-
:param debug: whether to use debug mode, in which case `InvigilatorDebug` is used.
|
133
|
-
:param iteration: the iteration number for the interview.
|
134
|
-
|
135
|
-
The task is created by a `QuestionTaskCreator`, which is responsible for creating the task and managing its dependencies.
|
136
|
-
It is passed a reference to the function that will be called to answer the question.
|
137
|
-
It is passed a list "tasks_that_must_be_completed_before" that are awaited before the task is run.
|
138
|
-
These are added as a dependency to the focal task.
|
139
|
-
"""
|
140
|
-
task_creator = QuestionTaskCreator(
|
141
|
-
question=question,
|
142
|
-
answer_question_func=self._answer_question_and_record_task,
|
143
|
-
token_estimator=self._get_estimated_request_tokens,
|
144
|
-
model_buckets=model_buckets,
|
145
|
-
iteration=iteration,
|
146
|
-
)
|
147
|
-
for task in tasks_that_must_be_completed_before:
|
148
|
-
task_creator.add_dependency(task)
|
149
|
-
|
150
|
-
self.task_creators.update(
|
151
|
-
{question.question_name: task_creator}
|
152
|
-
) # track this task creator
|
153
|
-
return task_creator.generate_task(debug)
|
154
|
-
|
155
|
-
def _get_estimated_request_tokens(self, question) -> float:
|
156
|
-
"""Estimate the number of tokens that will be required to run the focal task."""
|
157
|
-
invigilator = self._get_invigilator(question=question, debug=False)
|
158
|
-
# TODO: There should be a way to get a more accurate estimate.
|
159
|
-
combined_text = ""
|
160
|
-
for prompt in invigilator.get_prompts().values():
|
161
|
-
if hasattr(prompt, "text"):
|
162
|
-
combined_text += prompt.text
|
163
|
-
elif isinstance(prompt, str):
|
164
|
-
combined_text += prompt
|
165
|
-
else:
|
166
|
-
raise ValueError(f"Prompt is of type {type(prompt)}")
|
167
|
-
return len(combined_text) / 4.0
|
168
|
-
|
169
|
-
async def _answer_question_and_record_task(
|
170
|
-
self,
|
171
|
-
*,
|
172
|
-
question: "QuestionBase",
|
173
|
-
debug: bool,
|
174
|
-
task=None,
|
175
|
-
) -> "AgentResponseDict":
|
176
|
-
"""Answer a question and records the task.
|
177
|
-
|
178
|
-
This in turn calls the the passed-in agent's async_answer_question method, which returns a response dictionary.
|
179
|
-
Note that is updates answers dictionary with the response.
|
180
|
-
"""
|
181
|
-
from edsl.data_transfer_models import AgentResponseDict
|
182
|
-
|
183
|
-
async def _inner():
|
184
|
-
try:
|
185
|
-
invigilator = self._get_invigilator(question, debug=debug)
|
186
|
-
|
187
|
-
if self._skip_this_question(question):
|
188
|
-
return invigilator.get_failed_task_result()
|
189
|
-
|
190
|
-
response: AgentResponseDict = await self._attempt_to_answer_question(
|
191
|
-
invigilator, task
|
192
|
-
)
|
193
|
-
|
194
|
-
self._add_answer(response=response, question=question)
|
195
|
-
|
196
|
-
self._cancel_skipped_questions(question)
|
197
|
-
return AgentResponseDict(**response)
|
198
|
-
except Exception as e:
|
199
|
-
raise e
|
200
|
-
|
201
|
-
skip_rety = getattr(self, "skip_retry", False)
|
202
|
-
if not skip_rety:
|
203
|
-
_inner = retry_strategy(_inner)
|
204
|
-
|
205
|
-
return await _inner()
|
206
|
-
|
207
|
-
def _add_answer(
|
208
|
-
self, response: "AgentResponseDict", question: "QuestionBase"
|
209
|
-
) -> None:
|
210
|
-
"""Add the answer to the answers dictionary.
|
211
|
-
|
212
|
-
:param response: the response to the question.
|
213
|
-
:param question: the question that was answered.
|
214
|
-
"""
|
215
|
-
self.answers.add_answer(response=response, question=question)
|
216
|
-
|
217
|
-
def _skip_this_question(self, current_question: "QuestionBase") -> bool:
|
218
|
-
"""Determine if the current question should be skipped.
|
219
|
-
|
220
|
-
:param current_question: the question to be answered.
|
221
|
-
"""
|
222
|
-
current_question_index = self.to_index[current_question.question_name]
|
223
|
-
|
224
|
-
answers = self.answers | self.scenario | self.agent["traits"]
|
225
|
-
skip = self.survey.rule_collection.skip_question_before_running(
|
226
|
-
current_question_index, answers
|
227
|
-
)
|
228
|
-
return skip
|
229
|
-
|
230
|
-
def _handle_exception(self, e, question_name: str, task=None):
|
231
|
-
exception_entry = InterviewExceptionEntry(e)
|
232
|
-
if task:
|
233
|
-
task.task_status = TaskStatus.FAILED
|
234
|
-
self.exceptions.add(question_name, exception_entry)
|
235
|
-
|
236
|
-
async def _attempt_to_answer_question(
|
237
|
-
self, invigilator: "InvigilatorBase", task: asyncio.Task
|
238
|
-
) -> "AgentResponseDict":
|
239
|
-
"""Attempt to answer the question, and handle exceptions.
|
240
|
-
|
241
|
-
:param invigilator: the invigilator that will answer the question.
|
242
|
-
:param task: the task that is being run.
|
243
|
-
|
244
|
-
"""
|
245
|
-
try:
|
246
|
-
return await asyncio.wait_for(
|
247
|
-
invigilator.async_answer_question(), timeout=TIMEOUT
|
248
|
-
)
|
249
|
-
except asyncio.TimeoutError as e:
|
250
|
-
self._handle_exception(e, invigilator.question.question_name, task)
|
251
|
-
raise InterviewTimeoutError(f"Task timed out after {TIMEOUT} seconds.")
|
252
|
-
except Exception as e:
|
253
|
-
self._handle_exception(e, invigilator.question.question_name, task)
|
254
|
-
raise e
|
255
|
-
|
256
|
-
def _cancel_skipped_questions(self, current_question: QuestionBase) -> None:
|
257
|
-
"""Cancel the tasks for questions that are skipped.
|
258
|
-
|
259
|
-
:param current_question: the question that was just answered.
|
260
|
-
|
261
|
-
It first determines the next question, given the current question and the current answers.
|
262
|
-
If the next question is the end of the survey, it cancels all remaining tasks.
|
263
|
-
If the next question is after the current question, it cancels all tasks between the current question and the next question.
|
264
|
-
"""
|
265
|
-
current_question_index: int = self.to_index[current_question.question_name]
|
266
|
-
|
267
|
-
next_question: Union[
|
268
|
-
int, EndOfSurvey
|
269
|
-
] = self.survey.rule_collection.next_question(
|
270
|
-
q_now=current_question_index,
|
271
|
-
answers=self.answers | self.scenario | self.agent["traits"],
|
272
|
-
)
|
273
|
-
|
274
|
-
next_question_index = next_question.next_q
|
275
|
-
|
276
|
-
def cancel_between(start, end):
|
277
|
-
"""Cancel the tasks between the start and end indices."""
|
278
|
-
for i in range(start, end):
|
279
|
-
self.tasks[i].cancel()
|
280
|
-
|
281
|
-
if next_question_index == EndOfSurvey:
|
282
|
-
cancel_between(current_question_index + 1, len(self.survey.questions))
|
283
|
-
return
|
284
|
-
|
285
|
-
if next_question_index > (current_question_index + 1):
|
286
|
-
cancel_between(current_question_index + 1, next_question_index)
|
@@ -1,37 +0,0 @@
|
|
1
|
-
from edsl import CONFIG
|
2
|
-
|
3
|
-
from tenacity import (
|
4
|
-
retry,
|
5
|
-
wait_exponential,
|
6
|
-
stop_after_attempt,
|
7
|
-
retry_if_exception_type,
|
8
|
-
before_sleep,
|
9
|
-
)
|
10
|
-
|
11
|
-
EDSL_BACKOFF_START_SEC = float(CONFIG.get("EDSL_BACKOFF_START_SEC"))
|
12
|
-
EDSL_MAX_BACKOFF_SEC = float(CONFIG.get("EDSL_MAX_BACKOFF_SEC"))
|
13
|
-
EDSL_MAX_ATTEMPTS = int(CONFIG.get("EDSL_MAX_ATTEMPTS"))
|
14
|
-
|
15
|
-
|
16
|
-
def print_retry(retry_state, print_to_terminal=True):
|
17
|
-
"Prints details on tenacity retries."
|
18
|
-
attempt_number = retry_state.attempt_number
|
19
|
-
exception = retry_state.outcome.exception()
|
20
|
-
wait_time = retry_state.next_action.sleep
|
21
|
-
if print_to_terminal:
|
22
|
-
print(
|
23
|
-
f"Attempt {attempt_number} failed with exception:" f"{exception}",
|
24
|
-
f"now waiting {wait_time:.2f} seconds before retrying."
|
25
|
-
f"Parameters: start={EDSL_BACKOFF_START_SEC}, max={EDSL_MAX_BACKOFF_SEC}, max_attempts={EDSL_MAX_ATTEMPTS}."
|
26
|
-
"\n\n",
|
27
|
-
)
|
28
|
-
|
29
|
-
|
30
|
-
retry_strategy = retry(
|
31
|
-
wait=wait_exponential(
|
32
|
-
multiplier=EDSL_BACKOFF_START_SEC, max=EDSL_MAX_BACKOFF_SEC
|
33
|
-
), # Exponential back-off starting at 1s, doubling, maxing out at 60s
|
34
|
-
stop=stop_after_attempt(EDSL_MAX_ATTEMPTS), # Stop after 5 attempts
|
35
|
-
# retry=retry_if_exception_type(Exception), # Customize this as per your specific retry-able exception
|
36
|
-
before_sleep=print_retry, # Use custom print function for retries
|
37
|
-
)
|
@@ -1,333 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
from typing import List, DefaultDict
|
3
|
-
import asyncio
|
4
|
-
from typing import Type
|
5
|
-
from collections import defaultdict
|
6
|
-
|
7
|
-
from typing import Literal, List, Type, DefaultDict
|
8
|
-
from collections import UserDict, defaultdict
|
9
|
-
|
10
|
-
from edsl.jobs.interviews.InterviewStatusDictionary import InterviewStatusDictionary
|
11
|
-
from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
|
12
|
-
from edsl.jobs.tokens.TokenUsage import TokenUsage
|
13
|
-
from edsl.enums import get_token_pricing
|
14
|
-
from edsl.jobs.tasks.task_status_enum import TaskStatus
|
15
|
-
|
16
|
-
InterviewTokenUsageMapping = DefaultDict[str, InterviewTokenUsage]
|
17
|
-
|
18
|
-
from edsl.jobs.interviews.InterviewStatistic import InterviewStatistic
|
19
|
-
from edsl.jobs.interviews.InterviewStatisticsCollection import (
|
20
|
-
InterviewStatisticsCollection,
|
21
|
-
)
|
22
|
-
from edsl.jobs.tokens.InterviewTokenUsage import InterviewTokenUsage
|
23
|
-
|
24
|
-
|
25
|
-
# return {"cache_status": token_usage_type, "details": details, "cost": f"${token_usage.cost(prices):.5f}"}
|
26
|
-
|
27
|
-
from dataclasses import dataclass, asdict
|
28
|
-
|
29
|
-
from rich.text import Text
|
30
|
-
from rich.box import SIMPLE
|
31
|
-
from rich.table import Table
|
32
|
-
|
33
|
-
|
34
|
-
@dataclass
|
35
|
-
class ModelInfo:
|
36
|
-
model_name: str
|
37
|
-
TPM_limit_k: float
|
38
|
-
RPM_limit_k: float
|
39
|
-
num_tasks_waiting: int
|
40
|
-
token_usage_info: dict
|
41
|
-
|
42
|
-
|
43
|
-
@dataclass
|
44
|
-
class ModelTokenUsageStats:
|
45
|
-
token_usage_type: str
|
46
|
-
details: List[dict]
|
47
|
-
cost: str
|
48
|
-
|
49
|
-
|
50
|
-
class Stats:
|
51
|
-
def elapsed_time(self):
|
52
|
-
InterviewStatistic("elapsed_time", value=elapsed_time, digits=1, units="sec.")
|
53
|
-
|
54
|
-
|
55
|
-
class JobsRunnerStatusMixin:
|
56
|
-
# @staticmethod
|
57
|
-
# def status_dict(interviews: List[Type["Interview"]]) -> List[Type[InterviewStatusDictionary]]:
|
58
|
-
# """
|
59
|
-
# >>> from edsl.jobs.interviews.Interview import Interview
|
60
|
-
# >>> interviews = [Interview.example()]
|
61
|
-
# >>> JobsRunnerStatusMixin().status_dict(interviews)
|
62
|
-
# [InterviewStatusDictionary({<TaskStatus.NOT_STARTED: 1>: 0, <TaskStatus.WAITING_FOR_DEPENDENCIES: 2>: 0, <TaskStatus.CANCELLED: 3>: 0, <TaskStatus.PARENT_FAILED: 4>: 0, <TaskStatus.WAITING_FOR_REQUEST_CAPACITY: 5>: 0, <TaskStatus.WAITING_FOR_TOKEN_CAPACITY: 6>: 0, <TaskStatus.API_CALL_IN_PROGRESS: 7>: 0, <TaskStatus.SUCCESS: 8>: 0, <TaskStatus.FAILED: 9>: 0, 'number_from_cache': 0})]
|
63
|
-
# """
|
64
|
-
# return [interview.interview_status for interview in interviews]
|
65
|
-
|
66
|
-
def _compute_statistic(stat_name: str, completed_tasks, elapsed_time, interviews):
|
67
|
-
stat_definitions = {
|
68
|
-
"elapsed_time": lambda: InterviewStatistic(
|
69
|
-
"elapsed_time", value=elapsed_time, digits=1, units="sec."
|
70
|
-
),
|
71
|
-
"total_interviews_requested": lambda: InterviewStatistic(
|
72
|
-
"total_interviews_requested", value=len(interviews), units=""
|
73
|
-
),
|
74
|
-
"completed_interviews": lambda: InterviewStatistic(
|
75
|
-
"completed_interviews", value=len(completed_tasks), units=""
|
76
|
-
),
|
77
|
-
"percent_complete": lambda: InterviewStatistic(
|
78
|
-
"percent_complete",
|
79
|
-
value=(
|
80
|
-
len(completed_tasks) / len(interviews) * 100
|
81
|
-
if len(interviews) > 0
|
82
|
-
else "NA"
|
83
|
-
),
|
84
|
-
digits=0,
|
85
|
-
units="%",
|
86
|
-
),
|
87
|
-
"average_time_per_interview": lambda: InterviewStatistic(
|
88
|
-
"average_time_per_interview",
|
89
|
-
value=elapsed_time / len(completed_tasks) if completed_tasks else "NA",
|
90
|
-
digits=1,
|
91
|
-
units="sec.",
|
92
|
-
),
|
93
|
-
"task_remaining": lambda: InterviewStatistic(
|
94
|
-
"task_remaining", value=len(interviews) - len(completed_tasks), units=""
|
95
|
-
),
|
96
|
-
"estimated_time_remaining": lambda: InterviewStatistic(
|
97
|
-
"estimated_time_remaining",
|
98
|
-
value=(
|
99
|
-
(len(interviews) - len(completed_tasks))
|
100
|
-
* (elapsed_time / len(completed_tasks))
|
101
|
-
if len(completed_tasks) > 0
|
102
|
-
else "NA"
|
103
|
-
),
|
104
|
-
digits=1,
|
105
|
-
units="sec.",
|
106
|
-
),
|
107
|
-
}
|
108
|
-
if stat_name not in stat_definitions:
|
109
|
-
raise ValueError(
|
110
|
-
f"Invalid stat_name: {stat_name}. The valid stat_names are: {list(stat_definitions.keys())}"
|
111
|
-
)
|
112
|
-
return stat_definitions[stat_name]()
|
113
|
-
|
114
|
-
@staticmethod
|
115
|
-
def _job_level_info(
|
116
|
-
completed_tasks: List[Type[asyncio.Task]],
|
117
|
-
elapsed_time: float,
|
118
|
-
interviews: List[Type["Interview"]],
|
119
|
-
) -> InterviewStatisticsCollection:
|
120
|
-
interview_statistics = InterviewStatisticsCollection()
|
121
|
-
|
122
|
-
default_statistics = [
|
123
|
-
"elapsed_time",
|
124
|
-
"total_interviews_requested",
|
125
|
-
"completed_interviews",
|
126
|
-
"percent_complete",
|
127
|
-
"average_time_per_interview",
|
128
|
-
"task_remaining",
|
129
|
-
"estimated_time_remaining",
|
130
|
-
]
|
131
|
-
for stat_name in default_statistics:
|
132
|
-
interview_statistics.add_stat(
|
133
|
-
JobsRunnerStatusMixin._compute_statistic(
|
134
|
-
stat_name, completed_tasks, elapsed_time, interviews
|
135
|
-
)
|
136
|
-
)
|
137
|
-
|
138
|
-
return interview_statistics
|
139
|
-
|
140
|
-
@staticmethod
|
141
|
-
def _get_model_queues_info(interviews):
|
142
|
-
models_to_tokens = defaultdict(InterviewTokenUsage)
|
143
|
-
model_to_status = defaultdict(InterviewStatusDictionary)
|
144
|
-
waiting_dict = defaultdict(int)
|
145
|
-
|
146
|
-
for interview in interviews:
|
147
|
-
models_to_tokens[interview.model] += interview.token_usage
|
148
|
-
model_to_status[interview.model] += interview.interview_status
|
149
|
-
waiting_dict[interview.model] += interview.interview_status.waiting
|
150
|
-
|
151
|
-
for model, num_waiting in waiting_dict.items():
|
152
|
-
yield JobsRunnerStatusMixin._get_model_info(
|
153
|
-
model, num_waiting, models_to_tokens
|
154
|
-
)
|
155
|
-
|
156
|
-
@staticmethod
|
157
|
-
def generate_status_summary(
|
158
|
-
completed_tasks: List[Type[asyncio.Task]],
|
159
|
-
elapsed_time: float,
|
160
|
-
interviews: List[Type["Interview"]],
|
161
|
-
include_model_queues=False,
|
162
|
-
) -> InterviewStatisticsCollection:
|
163
|
-
"""Generate a summary of the status of the job runner.
|
164
|
-
|
165
|
-
:param completed_tasks: list of completed tasks
|
166
|
-
:param elapsed_time: time elapsed since the start of the job
|
167
|
-
:param interviews: list of interviews to be conducted
|
168
|
-
|
169
|
-
>>> from edsl.jobs.interviews.Interview import Interview
|
170
|
-
>>> interviews = [Interview.example()]
|
171
|
-
>>> completed_tasks = []
|
172
|
-
>>> elapsed_time = 0
|
173
|
-
>>> JobsRunnerStatusMixin().generate_status_summary(completed_tasks, elapsed_time, interviews)
|
174
|
-
{'Elapsed time': '0.0 sec.', 'Total interviews requested': '1 ', 'Completed interviews': '0 ', 'Percent complete': '0 %', 'Average time per interview': 'NA', 'Task remaining': '1 ', 'Estimated time remaining': 'NA'}
|
175
|
-
"""
|
176
|
-
|
177
|
-
interview_status_summary: InterviewStatisticsCollection = (
|
178
|
-
JobsRunnerStatusMixin._job_level_info(
|
179
|
-
completed_tasks=completed_tasks,
|
180
|
-
elapsed_time=elapsed_time,
|
181
|
-
interviews=interviews,
|
182
|
-
)
|
183
|
-
)
|
184
|
-
if include_model_queues:
|
185
|
-
interview_status_summary.model_queues = list(
|
186
|
-
JobsRunnerStatusMixin._get_model_queues_info(interviews)
|
187
|
-
)
|
188
|
-
else:
|
189
|
-
interview_status_summary.model_queues = None
|
190
|
-
|
191
|
-
return interview_status_summary
|
192
|
-
|
193
|
-
@staticmethod
|
194
|
-
def _get_model_info(
|
195
|
-
model: str,
|
196
|
-
num_waiting: int,
|
197
|
-
models_to_tokens: InterviewTokenUsageMapping,
|
198
|
-
) -> dict:
|
199
|
-
"""Get the status of a model.
|
200
|
-
|
201
|
-
:param model: the model name
|
202
|
-
:param num_waiting: the number of tasks waiting for capacity
|
203
|
-
:param models_to_tokens: a mapping of models to token usage
|
204
|
-
|
205
|
-
>>> from edsl.jobs.interviews.Interview import Interview
|
206
|
-
>>> interviews = [Interview.example()]
|
207
|
-
>>> models_to_tokens = defaultdict(InterviewTokenUsage)
|
208
|
-
>>> model = interviews[0].model
|
209
|
-
>>> num_waiting = 0
|
210
|
-
>>> JobsRunnerStatusMixin()._get_model_info(model, num_waiting, models_to_tokens)
|
211
|
-
ModelInfo(model_name='gpt-4-1106-preview', TPM_limit_k=480.0, RPM_limit_k=4.0, num_tasks_waiting=0, token_usage_info=[ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000'), ModelTokenUsageStats(token_usage_type='cached_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')])
|
212
|
-
"""
|
213
|
-
|
214
|
-
## TODO: This should probably be a coop method
|
215
|
-
prices = get_token_pricing(model.model)
|
216
|
-
|
217
|
-
token_usage_info = []
|
218
|
-
for token_usage_type in ["new_token_usage", "cached_token_usage"]:
|
219
|
-
token_usage_info.append(
|
220
|
-
JobsRunnerStatusMixin._get_token_usage_info(
|
221
|
-
token_usage_type, models_to_tokens, model, prices
|
222
|
-
)
|
223
|
-
)
|
224
|
-
|
225
|
-
return ModelInfo(
|
226
|
-
**{
|
227
|
-
"model_name": model.model,
|
228
|
-
"TPM_limit_k": model.TPM / 1000,
|
229
|
-
"RPM_limit_k": model.RPM / 1000,
|
230
|
-
"num_tasks_waiting": num_waiting,
|
231
|
-
"token_usage_info": token_usage_info,
|
232
|
-
}
|
233
|
-
)
|
234
|
-
|
235
|
-
@staticmethod
|
236
|
-
def _get_token_usage_info(
|
237
|
-
token_usage_type: Literal["new_token_usage", "cached_token_usage"],
|
238
|
-
models_to_tokens: InterviewTokenUsageMapping,
|
239
|
-
model: str,
|
240
|
-
prices: "TokenPricing",
|
241
|
-
) -> ModelTokenUsageStats:
|
242
|
-
"""Get the token usage info for a model.
|
243
|
-
|
244
|
-
>>> from edsl.jobs.interviews.Interview import Interview
|
245
|
-
>>> interviews = [Interview.example()]
|
246
|
-
>>> models_to_tokens = defaultdict(InterviewTokenUsage)
|
247
|
-
>>> model = interviews[0].model
|
248
|
-
>>> prices = get_token_pricing(model.model)
|
249
|
-
>>> cache_status = "new_token_usage"
|
250
|
-
>>> JobsRunnerStatusMixin()._get_token_usage_info(cache_status, models_to_tokens, model, prices)
|
251
|
-
ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')
|
252
|
-
|
253
|
-
"""
|
254
|
-
all_token_usage: InterviewTokenUsage = models_to_tokens[model]
|
255
|
-
token_usage: TokenUsage = getattr(all_token_usage, token_usage_type)
|
256
|
-
|
257
|
-
details = [
|
258
|
-
{"type": token_type, "tokens": getattr(token_usage, token_type)}
|
259
|
-
for token_type in ["prompt_tokens", "completion_tokens"]
|
260
|
-
]
|
261
|
-
|
262
|
-
return ModelTokenUsageStats(
|
263
|
-
token_usage_type=token_usage_type,
|
264
|
-
details=details,
|
265
|
-
cost=f"${token_usage.cost(prices):.5f}",
|
266
|
-
)
|
267
|
-
|
268
|
-
@staticmethod
|
269
|
-
def _add_statistics_to_table(table, status_summary):
|
270
|
-
table.add_column("Statistic", style="dim", no_wrap=True, width=50)
|
271
|
-
table.add_column("Value", width=10)
|
272
|
-
|
273
|
-
for key, value in status_summary.items():
|
274
|
-
if key != "model_queues":
|
275
|
-
table.add_row(key, value)
|
276
|
-
|
277
|
-
@staticmethod
|
278
|
-
def display_status_table(status_summary: InterviewStatisticsCollection) -> "Table":
|
279
|
-
table = Table(
|
280
|
-
title="Job Status",
|
281
|
-
show_header=True,
|
282
|
-
header_style="bold magenta",
|
283
|
-
box=SIMPLE,
|
284
|
-
)
|
285
|
-
|
286
|
-
### Job-level statistics
|
287
|
-
JobsRunnerStatusMixin._add_statistics_to_table(table, status_summary)
|
288
|
-
|
289
|
-
## Model-level statistics
|
290
|
-
spacing = " "
|
291
|
-
|
292
|
-
if status_summary.model_queues is not None:
|
293
|
-
table.add_row(Text("Model Queues", style="bold red"), "")
|
294
|
-
for model_info in status_summary.model_queues:
|
295
|
-
model_name = model_info.model_name
|
296
|
-
tpm = f"TPM (k)={model_info.TPM_limit_k}"
|
297
|
-
rpm = f"RPM (k)= {model_info.RPM_limit_k}"
|
298
|
-
pretty_model_name = model_name + ";" + tpm + ";" + rpm
|
299
|
-
table.add_row(Text(pretty_model_name, style="blue"), "")
|
300
|
-
table.add_row(
|
301
|
-
"Number question tasks waiting for capacity",
|
302
|
-
str(model_info.num_tasks_waiting),
|
303
|
-
)
|
304
|
-
# Token usage and cost info
|
305
|
-
for token_usage_info in model_info.token_usage_info:
|
306
|
-
token_usage_type = token_usage_info.token_usage_type
|
307
|
-
table.add_row(
|
308
|
-
Text(
|
309
|
-
spacing + token_usage_type.replace("_", " "), style="bold"
|
310
|
-
),
|
311
|
-
"",
|
312
|
-
)
|
313
|
-
for detail in token_usage_info.details:
|
314
|
-
token_type = detail["type"]
|
315
|
-
tokens = detail["tokens"]
|
316
|
-
table.add_row(spacing + f"{token_type}", f"{tokens:,}")
|
317
|
-
# table.add_row(spacing + "cost", cache_info["cost"])
|
318
|
-
|
319
|
-
return table
|
320
|
-
|
321
|
-
def status_table(self, completed_tasks: List[asyncio.Task], elapsed_time: float):
|
322
|
-
summary_data = JobsRunnerStatusMixin.generate_status_summary(
|
323
|
-
completed_tasks=completed_tasks,
|
324
|
-
elapsed_time=elapsed_time,
|
325
|
-
interviews=self.total_interviews,
|
326
|
-
)
|
327
|
-
return self.display_status_table(summary_data)
|
328
|
-
|
329
|
-
|
330
|
-
if __name__ == "__main__":
|
331
|
-
import doctest
|
332
|
-
|
333
|
-
doctest.testmod(optionflags=doctest.ELLIPSIS)
|