edsl 0.1.33.dev1__py3-none-any.whl → 0.1.33.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/TemplateLoader.py +24 -0
- edsl/__init__.py +8 -4
- edsl/agents/Agent.py +46 -14
- edsl/agents/AgentList.py +43 -0
- edsl/agents/Invigilator.py +125 -212
- edsl/agents/InvigilatorBase.py +140 -32
- edsl/agents/PromptConstructionMixin.py +43 -66
- edsl/agents/__init__.py +1 -0
- edsl/auto/AutoStudy.py +117 -0
- edsl/auto/StageBase.py +230 -0
- edsl/auto/StageGenerateSurvey.py +178 -0
- edsl/auto/StageLabelQuestions.py +125 -0
- edsl/auto/StagePersona.py +61 -0
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
- edsl/auto/StagePersonaDimensionValues.py +74 -0
- edsl/auto/StagePersonaDimensions.py +69 -0
- edsl/auto/StageQuestions.py +73 -0
- edsl/auto/SurveyCreatorPipeline.py +21 -0
- edsl/auto/utilities.py +224 -0
- edsl/config.py +38 -39
- edsl/coop/PriceFetcher.py +58 -0
- edsl/coop/coop.py +39 -5
- edsl/data/Cache.py +35 -1
- edsl/data_transfer_models.py +120 -38
- edsl/enums.py +2 -0
- edsl/exceptions/language_models.py +25 -1
- edsl/exceptions/questions.py +62 -5
- edsl/exceptions/results.py +4 -0
- edsl/inference_services/AnthropicService.py +13 -11
- edsl/inference_services/AwsBedrock.py +19 -17
- edsl/inference_services/AzureAI.py +37 -20
- edsl/inference_services/GoogleService.py +16 -12
- edsl/inference_services/GroqService.py +2 -0
- edsl/inference_services/InferenceServiceABC.py +24 -0
- edsl/inference_services/MistralAIService.py +120 -0
- edsl/inference_services/OpenAIService.py +41 -50
- edsl/inference_services/TestService.py +71 -0
- edsl/inference_services/models_available_cache.py +0 -6
- edsl/inference_services/registry.py +4 -0
- edsl/jobs/Answers.py +10 -12
- edsl/jobs/FailedQuestion.py +78 -0
- edsl/jobs/Jobs.py +18 -13
- edsl/jobs/buckets/TokenBucket.py +39 -14
- edsl/jobs/interviews/Interview.py +297 -77
- edsl/jobs/interviews/InterviewExceptionEntry.py +83 -19
- edsl/jobs/interviews/interview_exception_tracking.py +0 -70
- edsl/jobs/interviews/retry_management.py +3 -1
- edsl/jobs/runners/JobsRunnerAsyncio.py +116 -70
- edsl/jobs/runners/JobsRunnerStatusMixin.py +1 -1
- edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
- edsl/jobs/tasks/TaskHistory.py +131 -213
- edsl/language_models/LanguageModel.py +239 -129
- edsl/language_models/ModelList.py +2 -2
- edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
- edsl/language_models/fake_openai_call.py +15 -0
- edsl/language_models/fake_openai_service.py +61 -0
- edsl/language_models/registry.py +15 -2
- edsl/language_models/repair.py +0 -19
- edsl/language_models/utilities.py +61 -0
- edsl/prompts/Prompt.py +52 -2
- edsl/questions/AnswerValidatorMixin.py +23 -26
- edsl/questions/QuestionBase.py +273 -242
- edsl/questions/QuestionBaseGenMixin.py +133 -0
- edsl/questions/QuestionBasePromptsMixin.py +266 -0
- edsl/questions/QuestionBudget.py +6 -0
- edsl/questions/QuestionCheckBox.py +227 -35
- edsl/questions/QuestionExtract.py +98 -27
- edsl/questions/QuestionFreeText.py +46 -29
- edsl/questions/QuestionFunctional.py +7 -0
- edsl/questions/QuestionList.py +141 -22
- edsl/questions/QuestionMultipleChoice.py +173 -64
- edsl/questions/QuestionNumerical.py +87 -46
- edsl/questions/QuestionRank.py +182 -24
- edsl/questions/RegisterQuestionsMeta.py +31 -12
- edsl/questions/ResponseValidatorABC.py +169 -0
- edsl/questions/__init__.py +3 -4
- edsl/questions/decorators.py +21 -0
- edsl/questions/derived/QuestionLikertFive.py +10 -5
- edsl/questions/derived/QuestionLinearScale.py +11 -1
- edsl/questions/derived/QuestionTopK.py +6 -0
- edsl/questions/derived/QuestionYesNo.py +16 -1
- edsl/questions/descriptors.py +43 -7
- edsl/questions/prompt_templates/question_budget.jinja +13 -0
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
- edsl/questions/prompt_templates/question_extract.jinja +11 -0
- edsl/questions/prompt_templates/question_free_text.jinja +3 -0
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
- edsl/questions/prompt_templates/question_list.jinja +17 -0
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
- edsl/questions/prompt_templates/question_numerical.jinja +37 -0
- edsl/questions/question_registry.py +6 -2
- edsl/questions/templates/__init__.py +0 -0
- edsl/questions/templates/checkbox/__init__.py +0 -0
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
- edsl/questions/templates/extract/answering_instructions.jinja +7 -0
- edsl/questions/templates/extract/question_presentation.jinja +1 -0
- edsl/questions/templates/free_text/__init__.py +0 -0
- edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
- edsl/questions/templates/free_text/question_presentation.jinja +1 -0
- edsl/questions/templates/likert_five/__init__.py +0 -0
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
- edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
- edsl/questions/templates/linear_scale/__init__.py +0 -0
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
- edsl/questions/templates/list/__init__.py +0 -0
- edsl/questions/templates/list/answering_instructions.jinja +4 -0
- edsl/questions/templates/list/question_presentation.jinja +5 -0
- edsl/questions/templates/multiple_choice/__init__.py +0 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
- edsl/questions/templates/multiple_choice/html.jinja +0 -0
- edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
- edsl/questions/templates/numerical/__init__.py +0 -0
- edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
- edsl/questions/templates/numerical/question_presentation.jinja +7 -0
- edsl/questions/templates/rank/answering_instructions.jinja +11 -0
- edsl/questions/templates/rank/question_presentation.jinja +15 -0
- edsl/questions/templates/top_k/__init__.py +0 -0
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
- edsl/questions/templates/top_k/question_presentation.jinja +22 -0
- edsl/questions/templates/yes_no/__init__.py +0 -0
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
- edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
- edsl/results/Dataset.py +20 -0
- edsl/results/DatasetExportMixin.py +41 -47
- edsl/results/DatasetTree.py +145 -0
- edsl/results/Result.py +32 -5
- edsl/results/Results.py +131 -45
- edsl/results/ResultsDBMixin.py +3 -3
- edsl/results/Selector.py +118 -0
- edsl/results/tree_explore.py +115 -0
- edsl/scenarios/Scenario.py +10 -4
- edsl/scenarios/ScenarioList.py +348 -39
- edsl/scenarios/ScenarioListExportMixin.py +9 -0
- edsl/study/SnapShot.py +8 -1
- edsl/surveys/RuleCollection.py +2 -2
- edsl/surveys/Survey.py +634 -315
- edsl/surveys/SurveyExportMixin.py +71 -9
- edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
- edsl/surveys/SurveyQualtricsImport.py +75 -4
- edsl/surveys/instructions/ChangeInstruction.py +47 -0
- edsl/surveys/instructions/Instruction.py +34 -0
- edsl/surveys/instructions/InstructionCollection.py +77 -0
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/templates/error_reporting/base.html +24 -0
- edsl/templates/error_reporting/exceptions_by_model.html +35 -0
- edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
- edsl/templates/error_reporting/exceptions_by_type.html +17 -0
- edsl/templates/error_reporting/interview_details.html +111 -0
- edsl/templates/error_reporting/interviews.html +10 -0
- edsl/templates/error_reporting/overview.html +5 -0
- edsl/templates/error_reporting/performance_plot.html +2 -0
- edsl/templates/error_reporting/report.css +74 -0
- edsl/templates/error_reporting/report.html +118 -0
- edsl/templates/error_reporting/report.js +25 -0
- {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/METADATA +4 -2
- edsl-0.1.33.dev2.dist-info/RECORD +289 -0
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
- edsl/utilities/gcp_bucket/simple_example.py +0 -9
- edsl-0.1.33.dev1.dist-info/RECORD +0 -209
- {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/LICENSE +0 -0
- {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/WHEEL +0 -0
@@ -2,36 +2,98 @@ import traceback
|
|
2
2
|
import datetime
|
3
3
|
import time
|
4
4
|
from collections import UserDict
|
5
|
-
|
6
|
-
# traceback=traceback.format_exc(),
|
7
|
-
# traceback = frame_summary_to_dict(traceback.extract_tb(e.__traceback__))
|
8
|
-
# traceback = [frame_summary_to_dict(f) for f in traceback.extract_tb(e.__traceback__)]
|
5
|
+
from edsl.jobs.FailedQuestion import FailedQuestion
|
9
6
|
|
10
7
|
|
11
8
|
class InterviewExceptionEntry:
|
12
|
-
"""Class to record an exception that occurred during the interview.
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
9
|
+
"""Class to record an exception that occurred during the interview."""
|
10
|
+
|
11
|
+
def __init__(
|
12
|
+
self,
|
13
|
+
*,
|
14
|
+
exception: Exception,
|
15
|
+
# failed_question: FailedQuestion,
|
16
|
+
invigilator: "Invigilator",
|
17
|
+
traceback_format="text",
|
18
|
+
):
|
20
19
|
self.time = datetime.datetime.now().isoformat()
|
21
20
|
self.exception = exception
|
21
|
+
# self.failed_question = failed_question
|
22
|
+
self.invigilator = invigilator
|
22
23
|
self.traceback_format = traceback_format
|
23
24
|
|
25
|
+
@property
|
26
|
+
def question_type(self):
|
27
|
+
# return self.failed_question.question.question_type
|
28
|
+
return self.invigilator.question.question_type
|
29
|
+
|
30
|
+
@property
|
31
|
+
def name(self):
|
32
|
+
return repr(self.exception)
|
33
|
+
|
34
|
+
@property
|
35
|
+
def rendered_prompts(self):
|
36
|
+
return self.invigilator.get_prompts()
|
37
|
+
|
38
|
+
@property
|
39
|
+
def key_sequence(self):
|
40
|
+
return self.invigilator.model.key_sequence
|
41
|
+
|
42
|
+
@property
|
43
|
+
def generated_token_string(self):
|
44
|
+
# return "POO"
|
45
|
+
if self.invigilator.raw_model_response is None:
|
46
|
+
return "No raw model response available."
|
47
|
+
else:
|
48
|
+
return self.invigilator.model.get_generated_token_string(
|
49
|
+
self.invigilator.raw_model_response
|
50
|
+
)
|
51
|
+
|
52
|
+
@property
|
53
|
+
def raw_model_response(self):
|
54
|
+
import json
|
55
|
+
|
56
|
+
if self.invigilator.raw_model_response is None:
|
57
|
+
return "No raw model response available."
|
58
|
+
return json.dumps(self.invigilator.raw_model_response, indent=2)
|
59
|
+
|
24
60
|
def __getitem__(self, key):
|
25
61
|
# Support dict-like access obj['a']
|
26
62
|
return str(getattr(self, key))
|
27
63
|
|
28
64
|
@classmethod
|
29
65
|
def example(cls):
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
66
|
+
from edsl import QuestionFreeText
|
67
|
+
from edsl.language_models import LanguageModel
|
68
|
+
|
69
|
+
m = LanguageModel.example(test_model=True)
|
70
|
+
q = QuestionFreeText.example(exception_to_throw=ValueError)
|
71
|
+
results = q.by(m).run(
|
72
|
+
skip_retry=True, print_exceptions=False, raise_validation_errors=True
|
73
|
+
)
|
74
|
+
return results.task_history.exceptions[0]["how_are_you"][0]
|
75
|
+
|
76
|
+
@property
|
77
|
+
def code_to_reproduce(self):
|
78
|
+
return self.code(run=False)
|
79
|
+
|
80
|
+
def code(self, run=True):
|
81
|
+
lines = []
|
82
|
+
lines.append("from edsl import Question, Model, Scenario, Agent")
|
83
|
+
|
84
|
+
lines.append(f"q = {repr(self.invigilator.question)}")
|
85
|
+
lines.append(f"scenario = {repr(self.invigilator.scenario)}")
|
86
|
+
lines.append(f"agent = {repr(self.invigilator.agent)}")
|
87
|
+
lines.append(f"m = Model('{self.invigilator.model.model}')")
|
88
|
+
lines.append("results = q.by(m).by(agent).by(scenario).run()")
|
89
|
+
code_str = "\n".join(lines)
|
90
|
+
|
91
|
+
if run:
|
92
|
+
# Create a new namespace to avoid polluting the global namespace
|
93
|
+
namespace = {}
|
94
|
+
exec(code_str, namespace)
|
95
|
+
return namespace["results"]
|
96
|
+
return code_str
|
35
97
|
|
36
98
|
@property
|
37
99
|
def traceback(self):
|
@@ -78,13 +140,15 @@ class InterviewExceptionEntry:
|
|
78
140
|
|
79
141
|
>>> entry = InterviewExceptionEntry.example()
|
80
142
|
>>> entry.to_dict()['exception']
|
81
|
-
|
143
|
+
ValueError()
|
82
144
|
|
83
145
|
"""
|
84
146
|
return {
|
85
|
-
"exception":
|
147
|
+
"exception": self.exception,
|
86
148
|
"time": self.time,
|
87
149
|
"traceback": self.traceback,
|
150
|
+
# "failed_question": self.failed_question.to_dict(),
|
151
|
+
"invigilator": self.invigilator.to_dict(),
|
88
152
|
}
|
89
153
|
|
90
154
|
def push(self):
|
@@ -1,71 +1,7 @@
|
|
1
|
-
import traceback
|
2
|
-
import datetime
|
3
|
-
import time
|
4
1
|
from collections import UserDict
|
5
2
|
|
6
3
|
from edsl.jobs.interviews.InterviewExceptionEntry import InterviewExceptionEntry
|
7
4
|
|
8
|
-
# #traceback=traceback.format_exc(),
|
9
|
-
# #traceback = frame_summary_to_dict(traceback.extract_tb(e.__traceback__))
|
10
|
-
# #traceback = [frame_summary_to_dict(f) for f in traceback.extract_tb(e.__traceback__)]
|
11
|
-
|
12
|
-
# class InterviewExceptionEntry:
|
13
|
-
# """Class to record an exception that occurred during the interview.
|
14
|
-
|
15
|
-
# >>> entry = InterviewExceptionEntry.example()
|
16
|
-
# >>> entry.to_dict()['exception']
|
17
|
-
# "ValueError('An error occurred.')"
|
18
|
-
# """
|
19
|
-
|
20
|
-
# def __init__(self, exception: Exception):
|
21
|
-
# self.time = datetime.datetime.now().isoformat()
|
22
|
-
# self.exception = exception
|
23
|
-
|
24
|
-
# def __getitem__(self, key):
|
25
|
-
# # Support dict-like access obj['a']
|
26
|
-
# return str(getattr(self, key))
|
27
|
-
|
28
|
-
# @classmethod
|
29
|
-
# def example(cls):
|
30
|
-
# try:
|
31
|
-
# raise ValueError("An error occurred.")
|
32
|
-
# except Exception as e:
|
33
|
-
# entry = InterviewExceptionEntry(e)
|
34
|
-
# return entry
|
35
|
-
|
36
|
-
# @property
|
37
|
-
# def traceback(self):
|
38
|
-
# """Return the exception as HTML."""
|
39
|
-
# e = self.exception
|
40
|
-
# tb_str = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
41
|
-
# return tb_str
|
42
|
-
|
43
|
-
|
44
|
-
# @property
|
45
|
-
# def html(self):
|
46
|
-
# from rich.console import Console
|
47
|
-
# from rich.table import Table
|
48
|
-
# from rich.traceback import Traceback
|
49
|
-
|
50
|
-
# from io import StringIO
|
51
|
-
# html_output = StringIO()
|
52
|
-
|
53
|
-
# console = Console(file=html_output, record=True)
|
54
|
-
# tb = Traceback(show_locals=True)
|
55
|
-
# console.print(tb)
|
56
|
-
|
57
|
-
# tb = Traceback.from_exception(type(self.exception), self.exception, self.exception.__traceback__, show_locals=True)
|
58
|
-
# console.print(tb)
|
59
|
-
# return html_output.getvalue()
|
60
|
-
|
61
|
-
# def to_dict(self) -> dict:
|
62
|
-
# """Return the exception as a dictionary."""
|
63
|
-
# return {
|
64
|
-
# 'exception': repr(self.exception),
|
65
|
-
# 'time': self.time,
|
66
|
-
# 'traceback': self.traceback
|
67
|
-
# }
|
68
|
-
|
69
5
|
|
70
6
|
class InterviewExceptionCollection(UserDict):
|
71
7
|
"""A collection of exceptions that occurred during the interview."""
|
@@ -80,12 +16,6 @@ class InterviewExceptionCollection(UserDict):
|
|
80
16
|
def to_dict(self, include_traceback=True) -> dict:
|
81
17
|
"""Return the collection of exceptions as a dictionary."""
|
82
18
|
newdata = {k: [e.to_dict() for e in v] for k, v in self.data.items()}
|
83
|
-
# if not include_traceback:
|
84
|
-
# for question in newdata:
|
85
|
-
# for exception in newdata[question]:
|
86
|
-
# exception[
|
87
|
-
# "traceback"
|
88
|
-
# ] = "Traceback removed. Set include_traceback=True to include."
|
89
19
|
return newdata
|
90
20
|
|
91
21
|
def _repr_html_(self) -> str:
|
@@ -18,9 +18,11 @@ def print_retry(retry_state, print_to_terminal=True):
|
|
18
18
|
attempt_number = retry_state.attempt_number
|
19
19
|
exception = retry_state.outcome.exception()
|
20
20
|
wait_time = retry_state.next_action.sleep
|
21
|
+
exception_name = type(exception).__name__
|
21
22
|
if print_to_terminal:
|
22
23
|
print(
|
23
|
-
f"Attempt {attempt_number} failed with exception
|
24
|
+
f"Attempt {attempt_number} failed with exception '{exception_name}':"
|
25
|
+
f"{exception}",
|
24
26
|
f"now waiting {wait_time:.2f} seconds before retrying."
|
25
27
|
f"Parameters: start={EDSL_BACKOFF_START_SEC}, max={EDSL_MAX_BACKOFF_SEC}, max_attempts={EDSL_MAX_ATTEMPTS}."
|
26
28
|
"\n\n",
|
@@ -1,10 +1,11 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
import time
|
3
|
+
import math
|
3
4
|
import asyncio
|
4
|
-
import
|
5
|
+
import functools
|
6
|
+
from typing import Coroutine, List, AsyncGenerator, Optional, Union, Generator
|
5
7
|
from contextlib import contextmanager
|
6
|
-
|
7
|
-
from typing import Coroutine, List, AsyncGenerator, Optional, Union
|
8
|
+
from collections import UserList
|
8
9
|
|
9
10
|
from edsl import shared_globals
|
10
11
|
from edsl.jobs.interviews.Interview import Interview
|
@@ -12,12 +13,15 @@ from edsl.jobs.runners.JobsRunnerStatusMixin import JobsRunnerStatusMixin
|
|
12
13
|
from edsl.jobs.tasks.TaskHistory import TaskHistory
|
13
14
|
from edsl.jobs.buckets.BucketCollection import BucketCollection
|
14
15
|
from edsl.utilities.decorators import jupyter_nb_handler
|
15
|
-
|
16
|
-
import
|
17
|
-
import
|
16
|
+
from edsl.data.Cache import Cache
|
17
|
+
from edsl.results.Result import Result
|
18
|
+
from edsl.results.Results import Results
|
19
|
+
from edsl.jobs.FailedQuestion import FailedQuestion
|
18
20
|
|
19
21
|
|
20
22
|
def cache_with_timeout(timeout):
|
23
|
+
""" "Used to keep the generate table from being run too frequetly."""
|
24
|
+
|
21
25
|
def decorator(func):
|
22
26
|
cached_result = {}
|
23
27
|
last_computation_time = [0] # Using list to store mutable value
|
@@ -35,10 +39,6 @@ def cache_with_timeout(timeout):
|
|
35
39
|
return decorator
|
36
40
|
|
37
41
|
|
38
|
-
# from queue import Queue
|
39
|
-
from collections import UserList
|
40
|
-
|
41
|
-
|
42
42
|
class StatusTracker(UserList):
|
43
43
|
def __init__(self, total_tasks: int):
|
44
44
|
self.total_tasks = total_tasks
|
@@ -55,7 +55,7 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
55
55
|
The Jobs object is a collection of interviews that are to be run.
|
56
56
|
"""
|
57
57
|
|
58
|
-
def __init__(self, jobs: Jobs):
|
58
|
+
def __init__(self, jobs: "Jobs"):
|
59
59
|
self.jobs = jobs
|
60
60
|
# this creates the interviews, which can take a while
|
61
61
|
self.interviews: List["Interview"] = jobs.interviews()
|
@@ -66,81 +66,69 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
66
66
|
self,
|
67
67
|
cache: "Cache",
|
68
68
|
n: int = 1,
|
69
|
-
debug: bool = False,
|
70
69
|
stop_on_exception: bool = False,
|
71
|
-
sidecar_model: "LanguageModel" = None,
|
70
|
+
sidecar_model: Optional["LanguageModel"] = None,
|
72
71
|
total_interviews: Optional[List["Interview"]] = None,
|
72
|
+
raise_validation_errors: bool = False,
|
73
73
|
) -> AsyncGenerator["Result", None]:
|
74
74
|
"""Creates the tasks, runs them asynchronously, and returns the results as a Results object.
|
75
75
|
|
76
76
|
Completed tasks are yielded as they are completed.
|
77
77
|
|
78
78
|
:param n: how many times to run each interview
|
79
|
-
:param debug:
|
80
79
|
:param stop_on_exception: Whether to stop the interview if an exception is raised
|
81
80
|
:param sidecar_model: a language model to use in addition to the interview's model
|
82
81
|
:param total_interviews: A list of interviews to run can be provided instead.
|
83
82
|
"""
|
84
83
|
tasks = []
|
85
|
-
if total_interviews:
|
84
|
+
if total_interviews: # was already passed in total interviews
|
86
85
|
self.total_interviews = total_interviews
|
87
86
|
else:
|
88
|
-
self.
|
89
|
-
n=n
|
87
|
+
self.total_interviews = list(
|
88
|
+
self._populate_total_interviews(n=n)
|
90
89
|
) # Populate self.total_interviews before creating tasks
|
91
90
|
|
91
|
+
# print("Interviews created")
|
92
|
+
|
92
93
|
for interview in self.total_interviews:
|
93
94
|
interviewing_task = self._build_interview_task(
|
94
95
|
interview=interview,
|
95
|
-
debug=debug,
|
96
96
|
stop_on_exception=stop_on_exception,
|
97
97
|
sidecar_model=sidecar_model,
|
98
|
+
raise_validation_errors=raise_validation_errors,
|
98
99
|
)
|
99
100
|
tasks.append(asyncio.create_task(interviewing_task))
|
100
101
|
|
102
|
+
# print("Tasks created")
|
103
|
+
|
101
104
|
for task in asyncio.as_completed(tasks):
|
105
|
+
# print(f"Task {task} completed")
|
102
106
|
result = await task
|
103
107
|
yield result
|
104
108
|
|
105
|
-
def _populate_total_interviews(
|
109
|
+
def _populate_total_interviews(
|
110
|
+
self, n: int = 1
|
111
|
+
) -> Generator["Interview", None, None]:
|
106
112
|
"""Populates self.total_interviews with n copies of each interview.
|
107
113
|
|
108
114
|
:param n: how many times to run each interview.
|
109
115
|
"""
|
110
|
-
# TODO: Why not return a list of interviews instead of modifying the object?
|
111
|
-
|
112
|
-
self.total_interviews = []
|
113
116
|
for interview in self.interviews:
|
114
117
|
for iteration in range(n):
|
115
118
|
if iteration > 0:
|
116
|
-
|
117
|
-
iteration=iteration, cache=self.cache
|
118
|
-
)
|
119
|
-
self.total_interviews.append(new_interview)
|
119
|
+
yield interview.duplicate(iteration=iteration, cache=self.cache)
|
120
120
|
else:
|
121
|
-
interview.cache =
|
122
|
-
|
123
|
-
) # set the cache for the first interview
|
124
|
-
self.total_interviews.append(interview)
|
125
|
-
|
126
|
-
async def run_async(self, cache=None, n=1) -> Results:
|
127
|
-
from edsl.results.Results import Results
|
121
|
+
interview.cache = self.cache
|
122
|
+
yield interview
|
128
123
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
if cache is None:
|
133
|
-
self.cache = Cache()
|
134
|
-
else:
|
135
|
-
self.cache = cache
|
124
|
+
async def run_async(self, cache: Optional["Cache"] = None, n: int = 1) -> Results:
|
125
|
+
self.cache = Cache() if cache is None else cache
|
136
126
|
data = []
|
137
127
|
async for result in self.run_async_generator(cache=self.cache, n=n):
|
138
128
|
data.append(result)
|
139
129
|
return Results(survey=self.jobs.survey, data=data)
|
140
130
|
|
141
131
|
def simple_run(self):
|
142
|
-
from edsl.results.Results import Results
|
143
|
-
|
144
132
|
data = asyncio.run(self.run_async())
|
145
133
|
return Results(survey=self.jobs.survey, data=data)
|
146
134
|
|
@@ -148,14 +136,13 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
148
136
|
self,
|
149
137
|
*,
|
150
138
|
interview: Interview,
|
151
|
-
debug: bool,
|
152
139
|
stop_on_exception: bool = False,
|
153
|
-
sidecar_model: Optional[LanguageModel] = None,
|
154
|
-
|
140
|
+
sidecar_model: Optional["LanguageModel"] = None,
|
141
|
+
raise_validation_errors: bool = False,
|
142
|
+
) -> "Result":
|
155
143
|
"""Conducts an interview and returns the result.
|
156
144
|
|
157
145
|
:param interview: the interview to conduct
|
158
|
-
:param debug: prints debug messages
|
159
146
|
:param stop_on_exception: stops the interview if an exception is raised
|
160
147
|
:param sidecar_model: a language model to use in addition to the interview's model
|
161
148
|
"""
|
@@ -164,53 +151,93 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
164
151
|
|
165
152
|
# get the results of the interview
|
166
153
|
answer, valid_results = await interview.async_conduct_interview(
|
167
|
-
debug=debug,
|
168
154
|
model_buckets=model_buckets,
|
169
155
|
stop_on_exception=stop_on_exception,
|
170
156
|
sidecar_model=sidecar_model,
|
157
|
+
raise_validation_errors=raise_validation_errors,
|
171
158
|
)
|
172
159
|
|
173
|
-
#
|
174
|
-
|
160
|
+
# answer_key_names = {
|
161
|
+
# k
|
162
|
+
# for k in set(answer.keys())
|
163
|
+
# if not k.endswith("_comment") and not k.endswith("_generated_tokens")
|
164
|
+
# }
|
165
|
+
|
166
|
+
question_results = {}
|
167
|
+
for result in valid_results:
|
168
|
+
question_results[result.question_name] = result
|
169
|
+
|
170
|
+
answer_key_names = list(question_results.keys())
|
171
|
+
|
172
|
+
generated_tokens_dict = {
|
173
|
+
k + "_generated_tokens": question_results[k].generated_tokens
|
174
|
+
for k in answer_key_names
|
175
|
+
}
|
176
|
+
comments_dict = {
|
177
|
+
"k" + "_comment": question_results[k].comment for k in answer_key_names
|
178
|
+
}
|
175
179
|
|
180
|
+
# we should have a valid result for each question
|
181
|
+
answer_dict = {k: answer[k] for k in answer_key_names}
|
176
182
|
assert len(valid_results) == len(answer_key_names)
|
177
183
|
|
184
|
+
# breakpoint()
|
185
|
+
# generated_tokens_dict = {
|
186
|
+
# k + "_generated_tokens": v.generated_tokens
|
187
|
+
# for k, v in zip(answer_key_names, valid_results)
|
188
|
+
# }
|
189
|
+
|
190
|
+
# comments_dict = {
|
191
|
+
# k + "_comment": v.comment for k, v in zip(answer_key_names, valid_results)
|
192
|
+
# }
|
193
|
+
# breakpoint()
|
194
|
+
|
178
195
|
# TODO: move this down into Interview
|
179
196
|
question_name_to_prompts = dict({})
|
180
197
|
for result in valid_results:
|
181
|
-
question_name = result
|
198
|
+
question_name = result.question_name
|
182
199
|
question_name_to_prompts[question_name] = {
|
183
|
-
"user_prompt": result
|
184
|
-
"system_prompt": result
|
200
|
+
"user_prompt": result.prompts["user_prompt"],
|
201
|
+
"system_prompt": result.prompts["system_prompt"],
|
185
202
|
}
|
186
203
|
|
187
204
|
prompt_dictionary = {}
|
188
205
|
for answer_key_name in answer_key_names:
|
189
|
-
prompt_dictionary[
|
190
|
-
answer_key_name
|
191
|
-
|
192
|
-
prompt_dictionary[
|
193
|
-
answer_key_name
|
194
|
-
|
206
|
+
prompt_dictionary[answer_key_name + "_user_prompt"] = (
|
207
|
+
question_name_to_prompts[answer_key_name]["user_prompt"]
|
208
|
+
)
|
209
|
+
prompt_dictionary[answer_key_name + "_system_prompt"] = (
|
210
|
+
question_name_to_prompts[answer_key_name]["system_prompt"]
|
211
|
+
)
|
195
212
|
|
196
213
|
raw_model_results_dictionary = {}
|
197
214
|
for result in valid_results:
|
198
|
-
question_name = result
|
199
|
-
raw_model_results_dictionary[
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
215
|
+
question_name = result.question_name
|
216
|
+
raw_model_results_dictionary[question_name + "_raw_model_response"] = (
|
217
|
+
result.raw_model_response
|
218
|
+
)
|
219
|
+
raw_model_results_dictionary[question_name + "_cost"] = result.cost
|
220
|
+
one_use_buys = (
|
221
|
+
"NA"
|
222
|
+
if isinstance(result.cost, str)
|
223
|
+
or result.cost == 0
|
224
|
+
or result.cost is None
|
225
|
+
else 1.0 / result.cost
|
226
|
+
)
|
227
|
+
raw_model_results_dictionary[question_name + "_one_usd_buys"] = one_use_buys
|
204
228
|
|
229
|
+
# breakpoint()
|
205
230
|
result = Result(
|
206
231
|
agent=interview.agent,
|
207
232
|
scenario=interview.scenario,
|
208
233
|
model=interview.model,
|
209
234
|
iteration=interview.iteration,
|
210
|
-
answer=
|
235
|
+
answer=answer_dict,
|
211
236
|
prompt=prompt_dictionary,
|
212
237
|
raw_model_response=raw_model_results_dictionary,
|
213
238
|
survey=interview.survey,
|
239
|
+
generated_tokens=generated_tokens_dict,
|
240
|
+
comments_dict=comments_dict,
|
214
241
|
)
|
215
242
|
result.interview_hash = hash(interview)
|
216
243
|
|
@@ -225,11 +252,11 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
225
252
|
self,
|
226
253
|
cache: Union[Cache, False, None],
|
227
254
|
n: int = 1,
|
228
|
-
debug: bool = False,
|
229
255
|
stop_on_exception: bool = False,
|
230
256
|
progress_bar: bool = False,
|
231
257
|
sidecar_model: Optional[LanguageModel] = None,
|
232
258
|
print_exceptions: bool = True,
|
259
|
+
raise_validation_errors: bool = False,
|
233
260
|
) -> "Coroutine":
|
234
261
|
"""Runs a collection of interviews, handling both async and sync contexts."""
|
235
262
|
from rich.console import Console
|
@@ -253,15 +280,15 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
253
280
|
"""Processes results from interviews."""
|
254
281
|
async for result in self.run_async_generator(
|
255
282
|
n=n,
|
256
|
-
debug=debug,
|
257
283
|
stop_on_exception=stop_on_exception,
|
258
284
|
cache=cache,
|
259
285
|
sidecar_model=sidecar_model,
|
286
|
+
raise_validation_errors=raise_validation_errors,
|
260
287
|
):
|
261
288
|
self.results.append(result)
|
262
289
|
if progress_bar_context:
|
263
290
|
progress_bar_context.update(generate_table())
|
264
|
-
|
291
|
+
self.completed = True
|
265
292
|
|
266
293
|
async def update_progress_bar(progress_bar_context):
|
267
294
|
"""Updates the progress bar at fixed intervals."""
|
@@ -309,7 +336,11 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
309
336
|
progress_bar_context.update(generate_table())
|
310
337
|
|
311
338
|
# puts results in the same order as the total interviews
|
312
|
-
|
339
|
+
interview_lookup = {
|
340
|
+
hash(interview): index
|
341
|
+
for index, interview in enumerate(self.total_interviews)
|
342
|
+
}
|
343
|
+
interview_hashes = list(interview_lookup.keys())
|
313
344
|
self.results = sorted(
|
314
345
|
self.results, key=lambda x: interview_hashes.index(x.interview_hash)
|
315
346
|
)
|
@@ -318,8 +349,12 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
318
349
|
task_history = TaskHistory(self.total_interviews, include_traceback=False)
|
319
350
|
results.task_history = task_history
|
320
351
|
|
352
|
+
results.failed_questions = {}
|
321
353
|
results.has_exceptions = task_history.has_exceptions
|
322
354
|
|
355
|
+
# breakpoint()
|
356
|
+
results.bucket_collection = self.bucket_collection
|
357
|
+
|
323
358
|
if results.has_exceptions:
|
324
359
|
# put the failed interviews in the results object as a list
|
325
360
|
failed_interviews = [
|
@@ -329,6 +364,15 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
329
364
|
for interview in self.total_interviews
|
330
365
|
if interview.has_exceptions
|
331
366
|
]
|
367
|
+
|
368
|
+
failed_questions = {}
|
369
|
+
for interview in self.total_interviews:
|
370
|
+
if interview.has_exceptions:
|
371
|
+
index = interview_lookup[hash(interview)]
|
372
|
+
failed_questions[index] = interview.failed_questions
|
373
|
+
|
374
|
+
results.failed_questions = failed_questions
|
375
|
+
|
332
376
|
from edsl.jobs.Jobs import Jobs
|
333
377
|
|
334
378
|
results.failed_jobs = Jobs.from_interviews(
|
@@ -343,7 +387,9 @@ class JobsRunnerAsyncio(JobsRunnerStatusMixin):
|
|
343
387
|
shared_globals["edsl_runner_exceptions"] = task_history
|
344
388
|
print(msg)
|
345
389
|
# this is where exceptions are opening up
|
346
|
-
task_history.html(
|
390
|
+
task_history.html(
|
391
|
+
cta="Open report to see details.", open_in_browser=True
|
392
|
+
)
|
347
393
|
print(
|
348
394
|
"Also see: https://docs.expectedparrot.com/en/latest/exceptions.html"
|
349
395
|
)
|
@@ -208,7 +208,7 @@ class JobsRunnerStatusMixin:
|
|
208
208
|
>>> model = interviews[0].model
|
209
209
|
>>> num_waiting = 0
|
210
210
|
>>> JobsRunnerStatusMixin()._get_model_info(model, num_waiting, models_to_tokens)
|
211
|
-
ModelInfo(model_name='
|
211
|
+
ModelInfo(model_name='...', TPM_limit_k=..., RPM_limit_k=..., num_tasks_waiting=0, token_usage_info=[ModelTokenUsageStats(token_usage_type='new_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000'), ModelTokenUsageStats(token_usage_type='cached_token_usage', details=[{'type': 'prompt_tokens', 'tokens': 0}, {'type': 'completion_tokens', 'tokens': 0}], cost='$0.00000')])
|
212
212
|
"""
|
213
213
|
|
214
214
|
## TODO: This should probably be a coop method
|