edsl 0.1.31.dev4__py3-none-any.whl → 0.1.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +9 -3
- edsl/TemplateLoader.py +24 -0
- edsl/__init__.py +8 -3
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +40 -8
- edsl/agents/AgentList.py +43 -0
- edsl/agents/Invigilator.py +136 -221
- edsl/agents/InvigilatorBase.py +148 -59
- edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +154 -85
- edsl/agents/__init__.py +1 -0
- edsl/auto/AutoStudy.py +117 -0
- edsl/auto/StageBase.py +230 -0
- edsl/auto/StageGenerateSurvey.py +178 -0
- edsl/auto/StageLabelQuestions.py +125 -0
- edsl/auto/StagePersona.py +61 -0
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
- edsl/auto/StagePersonaDimensionValues.py +74 -0
- edsl/auto/StagePersonaDimensions.py +69 -0
- edsl/auto/StageQuestions.py +73 -0
- edsl/auto/SurveyCreatorPipeline.py +21 -0
- edsl/auto/utilities.py +224 -0
- edsl/config.py +48 -47
- edsl/conjure/Conjure.py +6 -0
- edsl/coop/PriceFetcher.py +58 -0
- edsl/coop/coop.py +50 -7
- edsl/data/Cache.py +35 -1
- edsl/data/CacheHandler.py +3 -4
- edsl/data_transfer_models.py +73 -38
- edsl/enums.py +8 -0
- edsl/exceptions/general.py +10 -8
- edsl/exceptions/language_models.py +25 -1
- edsl/exceptions/questions.py +62 -5
- edsl/exceptions/results.py +4 -0
- edsl/inference_services/AnthropicService.py +13 -11
- edsl/inference_services/AwsBedrock.py +112 -0
- edsl/inference_services/AzureAI.py +214 -0
- edsl/inference_services/DeepInfraService.py +4 -3
- edsl/inference_services/GoogleService.py +16 -12
- edsl/inference_services/GroqService.py +5 -4
- edsl/inference_services/InferenceServiceABC.py +58 -3
- edsl/inference_services/InferenceServicesCollection.py +13 -8
- edsl/inference_services/MistralAIService.py +120 -0
- edsl/inference_services/OllamaService.py +18 -0
- edsl/inference_services/OpenAIService.py +55 -56
- edsl/inference_services/TestService.py +80 -0
- edsl/inference_services/TogetherAIService.py +170 -0
- edsl/inference_services/models_available_cache.py +25 -0
- edsl/inference_services/registry.py +19 -1
- edsl/jobs/Answers.py +10 -12
- edsl/jobs/FailedQuestion.py +78 -0
- edsl/jobs/Jobs.py +137 -41
- edsl/jobs/buckets/BucketCollection.py +24 -15
- edsl/jobs/buckets/TokenBucket.py +105 -18
- edsl/jobs/interviews/Interview.py +393 -83
- edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +22 -18
- edsl/jobs/interviews/InterviewExceptionEntry.py +167 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +152 -160
- edsl/jobs/runners/JobsRunnerStatus.py +331 -0
- edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
- edsl/jobs/tasks/TaskCreators.py +1 -1
- edsl/jobs/tasks/TaskHistory.py +205 -126
- edsl/language_models/LanguageModel.py +297 -177
- edsl/language_models/ModelList.py +2 -2
- edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
- edsl/language_models/fake_openai_call.py +15 -0
- edsl/language_models/fake_openai_service.py +61 -0
- edsl/language_models/registry.py +25 -8
- edsl/language_models/repair.py +0 -19
- edsl/language_models/utilities.py +61 -0
- edsl/notebooks/Notebook.py +20 -2
- edsl/prompts/Prompt.py +52 -2
- edsl/questions/AnswerValidatorMixin.py +23 -26
- edsl/questions/QuestionBase.py +330 -249
- edsl/questions/QuestionBaseGenMixin.py +133 -0
- edsl/questions/QuestionBasePromptsMixin.py +266 -0
- edsl/questions/QuestionBudget.py +99 -42
- edsl/questions/QuestionCheckBox.py +227 -36
- edsl/questions/QuestionExtract.py +98 -28
- edsl/questions/QuestionFreeText.py +47 -31
- edsl/questions/QuestionFunctional.py +7 -0
- edsl/questions/QuestionList.py +141 -23
- edsl/questions/QuestionMultipleChoice.py +159 -66
- edsl/questions/QuestionNumerical.py +88 -47
- edsl/questions/QuestionRank.py +182 -25
- edsl/questions/Quick.py +41 -0
- edsl/questions/RegisterQuestionsMeta.py +31 -12
- edsl/questions/ResponseValidatorABC.py +170 -0
- edsl/questions/__init__.py +3 -4
- edsl/questions/decorators.py +21 -0
- edsl/questions/derived/QuestionLikertFive.py +10 -5
- edsl/questions/derived/QuestionLinearScale.py +15 -2
- edsl/questions/derived/QuestionTopK.py +10 -1
- edsl/questions/derived/QuestionYesNo.py +24 -3
- edsl/questions/descriptors.py +43 -7
- edsl/questions/prompt_templates/question_budget.jinja +13 -0
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
- edsl/questions/prompt_templates/question_extract.jinja +11 -0
- edsl/questions/prompt_templates/question_free_text.jinja +3 -0
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
- edsl/questions/prompt_templates/question_list.jinja +17 -0
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
- edsl/questions/prompt_templates/question_numerical.jinja +37 -0
- edsl/questions/question_registry.py +6 -2
- edsl/questions/templates/__init__.py +0 -0
- edsl/questions/templates/budget/__init__.py +0 -0
- edsl/questions/templates/budget/answering_instructions.jinja +7 -0
- edsl/questions/templates/budget/question_presentation.jinja +7 -0
- edsl/questions/templates/checkbox/__init__.py +0 -0
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
- edsl/questions/templates/extract/__init__.py +0 -0
- edsl/questions/templates/extract/answering_instructions.jinja +7 -0
- edsl/questions/templates/extract/question_presentation.jinja +1 -0
- edsl/questions/templates/free_text/__init__.py +0 -0
- edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
- edsl/questions/templates/free_text/question_presentation.jinja +1 -0
- edsl/questions/templates/likert_five/__init__.py +0 -0
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
- edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
- edsl/questions/templates/linear_scale/__init__.py +0 -0
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
- edsl/questions/templates/list/__init__.py +0 -0
- edsl/questions/templates/list/answering_instructions.jinja +4 -0
- edsl/questions/templates/list/question_presentation.jinja +5 -0
- edsl/questions/templates/multiple_choice/__init__.py +0 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
- edsl/questions/templates/multiple_choice/html.jinja +0 -0
- edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
- edsl/questions/templates/numerical/__init__.py +0 -0
- edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
- edsl/questions/templates/numerical/question_presentation.jinja +7 -0
- edsl/questions/templates/rank/__init__.py +0 -0
- edsl/questions/templates/rank/answering_instructions.jinja +11 -0
- edsl/questions/templates/rank/question_presentation.jinja +15 -0
- edsl/questions/templates/top_k/__init__.py +0 -0
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
- edsl/questions/templates/top_k/question_presentation.jinja +22 -0
- edsl/questions/templates/yes_no/__init__.py +0 -0
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
- edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
- edsl/results/Dataset.py +20 -0
- edsl/results/DatasetExportMixin.py +58 -30
- edsl/results/DatasetTree.py +145 -0
- edsl/results/Result.py +32 -5
- edsl/results/Results.py +135 -46
- edsl/results/ResultsDBMixin.py +3 -3
- edsl/results/Selector.py +118 -0
- edsl/results/tree_explore.py +115 -0
- edsl/scenarios/FileStore.py +71 -10
- edsl/scenarios/Scenario.py +109 -24
- edsl/scenarios/ScenarioImageMixin.py +2 -2
- edsl/scenarios/ScenarioList.py +546 -21
- edsl/scenarios/ScenarioListExportMixin.py +24 -4
- edsl/scenarios/ScenarioListPdfMixin.py +153 -4
- edsl/study/SnapShot.py +8 -1
- edsl/study/Study.py +32 -0
- edsl/surveys/Rule.py +15 -3
- edsl/surveys/RuleCollection.py +21 -5
- edsl/surveys/Survey.py +707 -298
- edsl/surveys/SurveyExportMixin.py +71 -9
- edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
- edsl/surveys/SurveyQualtricsImport.py +284 -0
- edsl/surveys/instructions/ChangeInstruction.py +47 -0
- edsl/surveys/instructions/Instruction.py +34 -0
- edsl/surveys/instructions/InstructionCollection.py +77 -0
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/templates/error_reporting/base.html +24 -0
- edsl/templates/error_reporting/exceptions_by_model.html +35 -0
- edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
- edsl/templates/error_reporting/exceptions_by_type.html +17 -0
- edsl/templates/error_reporting/interview_details.html +116 -0
- edsl/templates/error_reporting/interviews.html +10 -0
- edsl/templates/error_reporting/overview.html +5 -0
- edsl/templates/error_reporting/performance_plot.html +2 -0
- edsl/templates/error_reporting/report.css +74 -0
- edsl/templates/error_reporting/report.html +118 -0
- edsl/templates/error_reporting/report.js +25 -0
- edsl/utilities/utilities.py +40 -1
- {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/METADATA +8 -2
- edsl-0.1.33.dist-info/RECORD +295 -0
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -271
- edsl/jobs/interviews/retry_management.py +0 -37
- edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -303
- edsl/utilities/gcp_bucket/simple_example.py +0 -9
- edsl-0.1.31.dev4.dist-info/RECORD +0 -204
- {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
- {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
edsl/agents/Invigilator.py
CHANGED
@@ -1,253 +1,169 @@
|
|
1
1
|
"""Module for creating Invigilators, which are objects to administer a question to an Agent."""
|
2
2
|
|
3
|
-
import json
|
4
3
|
from typing import Dict, Any, Optional
|
5
4
|
|
6
|
-
from edsl.exceptions import AgentRespondedWithBadJSONError
|
7
5
|
from edsl.prompts.Prompt import Prompt
|
8
6
|
from edsl.utilities.decorators import sync_wrapper, jupyter_nb_handler
|
9
7
|
from edsl.prompts.registry import get_classes as prompt_lookup
|
10
|
-
from edsl.
|
11
|
-
from edsl.exceptions.agents import FailedTaskException
|
12
|
-
from edsl.agents.PromptConstructionMixin import PromptConstructorMixin
|
13
|
-
|
8
|
+
from edsl.exceptions.questions import QuestionAnswerValidationError
|
14
9
|
from edsl.agents.InvigilatorBase import InvigilatorBase
|
10
|
+
from edsl.data_transfer_models import AgentResponseDict, EDSLResultObjectInput
|
11
|
+
from edsl.agents.PromptConstructor import PromptConstructor
|
12
|
+
|
15
13
|
|
14
|
+
class NotApplicable(str):
|
15
|
+
def __new__(cls):
|
16
|
+
instance = super().__new__(cls, "Not Applicable")
|
17
|
+
instance.literal = "Not Applicable"
|
18
|
+
return instance
|
16
19
|
|
17
|
-
|
20
|
+
|
21
|
+
class InvigilatorAI(InvigilatorBase):
|
18
22
|
"""An invigilator that uses an AI model to answer questions."""
|
19
23
|
|
24
|
+
def get_prompts(self) -> Dict[str, Prompt]:
|
25
|
+
"""Return the prompts used."""
|
26
|
+
return self.prompt_constructor.get_prompts()
|
27
|
+
|
20
28
|
async def async_answer_question(self) -> AgentResponseDict:
|
21
29
|
"""Answer a question using the AI model.
|
22
|
-
|
30
|
+
|
23
31
|
>>> i = InvigilatorAI.example()
|
24
32
|
>>> i.answer_question()
|
25
|
-
{'message': '
|
33
|
+
{'message': [{'text': 'SPAM!'}], 'usage': {'prompt_tokens': 1, 'completion_tokens': 1}}
|
26
34
|
"""
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
"
|
31
|
-
"question": self.question,
|
32
|
-
"scenario": self.scenario,
|
33
|
-
"raw_response": raw_response,
|
34
|
-
"raw_model_response": raw_response["raw_model_response"],
|
35
|
+
prompts = self.get_prompts()
|
36
|
+
params = {
|
37
|
+
"user_prompt": prompts["user_prompt"].text,
|
38
|
+
"system_prompt": prompts["system_prompt"].text,
|
35
39
|
}
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
self
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
"user_prompt": user_prompt.text,
|
52
|
-
"system_prompt": system_prompt.text,
|
53
|
-
"iteration": iteration,
|
54
|
-
"cache": self.cache,
|
55
|
-
}
|
56
|
-
if encoded_image:
|
57
|
-
params["encoded_image"] = encoded_image
|
58
|
-
response = await self.model.async_get_response(**params)
|
59
|
-
|
60
|
-
# TODO: I *don't* think we need to delete the cache key here because I think
|
61
|
-
# it will not have been set yet; the exception would have been raised before.
|
62
|
-
except json.JSONDecodeError as e:
|
63
|
-
raise AgentRespondedWithBadJSONError(
|
64
|
-
f"Returned bad JSON: {e}"
|
65
|
-
f"Prompt: {user_prompt}"
|
66
|
-
f"System Prompt: {system_prompt}"
|
67
|
-
)
|
68
|
-
|
69
|
-
return response
|
70
|
-
|
71
|
-
def _remove_from_cache(self, raw_response) -> None:
|
40
|
+
if "encoded_image" in prompts:
|
41
|
+
params["encoded_image"] = prompts["encoded_image"]
|
42
|
+
|
43
|
+
params.update({"iteration": self.iteration, "cache": self.cache})
|
44
|
+
|
45
|
+
agent_response_dict: AgentResponseDict = await self.model.async_get_response(
|
46
|
+
**params
|
47
|
+
)
|
48
|
+
# store to self in case validation failure
|
49
|
+
self.raw_model_response = agent_response_dict.model_outputs.response
|
50
|
+
self.generated_tokens = agent_response_dict.edsl_dict.generated_tokens
|
51
|
+
|
52
|
+
return self.extract_edsl_result_entry_and_validate(agent_response_dict)
|
53
|
+
|
54
|
+
def _remove_from_cache(self, cache_key) -> None:
|
72
55
|
"""Remove an entry from the cache."""
|
73
|
-
cache_key = raw_response.get("cache_key", None)
|
74
56
|
if cache_key:
|
75
57
|
del self.cache.data[cache_key]
|
76
58
|
|
77
|
-
def
|
78
|
-
self, *, agent, question, scenario, raw_response, raw_model_response
|
79
|
-
) -> AgentResponseDict:
|
80
|
-
"""Return formatted raw response.
|
81
|
-
|
82
|
-
This cleans up the raw response to make it suitable to pass to AgentResponseDict.
|
83
|
-
"""
|
84
|
-
_ = agent
|
85
|
-
try:
|
86
|
-
response = question._validate_answer(raw_response)
|
87
|
-
except Exception as e:
|
88
|
-
"""If the response is invalid, remove it from the cache and raise the exception."""
|
89
|
-
self._remove_from_cache(raw_response)
|
90
|
-
raise e
|
91
|
-
|
59
|
+
def determine_answer(self, raw_answer: str) -> Any:
|
92
60
|
question_dict = self.survey.question_names_to_questions()
|
61
|
+
# iterates through the current answers and updates the question_dict (which is all questions)
|
93
62
|
for other_question, answer in self.current_answers.items():
|
94
63
|
if other_question in question_dict:
|
95
64
|
question_dict[other_question].answer = answer
|
96
65
|
else:
|
97
|
-
#
|
66
|
+
# it might be a comment
|
98
67
|
if (
|
99
68
|
new_question := other_question.split("_comment")[0]
|
100
69
|
) in question_dict:
|
101
70
|
question_dict[new_question].comment = answer
|
102
71
|
|
103
|
-
combined_dict = {**question_dict, **scenario}
|
104
|
-
answer
|
105
|
-
|
106
|
-
)
|
107
|
-
data = {
|
108
|
-
"answer": answer,
|
109
|
-
"comment": response.get(
|
110
|
-
"comment", ""
|
111
|
-
), # not all question have comment fields,
|
112
|
-
"question_name": question.question_name,
|
113
|
-
"prompts": self.get_prompts(),
|
114
|
-
"cached_response": raw_response.get("cached_response", None),
|
115
|
-
"usage": raw_response.get("usage", {}),
|
116
|
-
"raw_model_response": raw_model_response,
|
117
|
-
"cache_used": raw_response.get("cache_used", False),
|
118
|
-
"cache_key": raw_response.get("cache_key", None),
|
119
|
-
}
|
120
|
-
return AgentResponseDict(**data)
|
121
|
-
|
122
|
-
get_response = sync_wrapper(async_get_response)
|
123
|
-
answer_question = sync_wrapper(async_answer_question)
|
124
|
-
|
125
|
-
|
126
|
-
class InvigilatorSidecar(InvigilatorAI):
|
127
|
-
"""An invigilator that presents the 'raw' question to the agent
|
128
|
-
& uses a sidecar model to answer questions."""
|
129
|
-
|
130
|
-
async def async_answer_question(self, failed: bool = False) -> AgentResponseDict:
|
131
|
-
"""Answer a question using the AI model."""
|
132
|
-
from edsl import Model
|
133
|
-
|
134
|
-
advanced_model = self.sidecar_model
|
135
|
-
simple_model = self.model
|
136
|
-
question = self.question
|
137
|
-
human_readable_question = (
|
138
|
-
"Please answer this single question: " + question.human_readable()
|
139
|
-
)
|
140
|
-
print("Getting the simple model response to: ", human_readable_question)
|
141
|
-
raw_simple_response = await simple_model.async_execute_model_call(
|
142
|
-
user_prompt=human_readable_question,
|
143
|
-
system_prompt="""Pretend you are a human answering a question. Do not break character.""",
|
144
|
-
)
|
145
|
-
simple_response = simple_model.parse_response(raw_simple_response)
|
146
|
-
instructions = question.get_instructions()
|
147
|
-
|
148
|
-
main_model_prompt = Prompt(
|
149
|
-
text="""
|
150
|
-
A simpler language model was asked this question:
|
151
|
-
|
152
|
-
To the simpel model:
|
153
|
-
{{ human_readable_question }}
|
154
|
-
|
155
|
-
The simple model responded:
|
156
|
-
<response>
|
157
|
-
{{ simple_response }}
|
158
|
-
</response>
|
159
|
-
|
160
|
-
It was suppose to respond according to these instructions:
|
161
|
-
<instructions>
|
162
|
-
{{ instructions }}
|
163
|
-
</instructions>
|
164
|
-
|
165
|
-
Please format the simple model's response as it should have been formmated, given the instructions.
|
166
|
-
Only respond in valid JSON, like so {"answer": "SPAM!"} or {"answer": "SPAM!", "comment": "I am a robot."}
|
167
|
-
Do not inlcude the word 'json'
|
168
|
-
"""
|
169
|
-
)
|
170
|
-
|
171
|
-
d = {
|
172
|
-
"human_readable_question": human_readable_question,
|
173
|
-
"simple_response": simple_response,
|
174
|
-
"instructions": instructions,
|
175
|
-
}
|
176
|
-
|
177
|
-
print("The human-readable question is: ", human_readable_question)
|
178
|
-
print("The simple response is: ", simple_response)
|
179
|
-
|
180
|
-
raw_response_data = await advanced_model.async_execute_model_call(
|
181
|
-
user_prompt=main_model_prompt.render(d).text,
|
182
|
-
system_prompt="You are a helpful assistant.",
|
183
|
-
)
|
184
|
-
|
185
|
-
raw_response = await advanced_model.async_get_response(
|
186
|
-
user_prompt=main_model_prompt.render(d).text,
|
187
|
-
system_prompt="You are a helpful assistant.",
|
188
|
-
iteration=0,
|
189
|
-
cache=self.cache,
|
190
|
-
)
|
72
|
+
combined_dict = {**question_dict, **self.scenario}
|
73
|
+
# sometimes the answer is a code, so we need to translate it
|
74
|
+
return self.question._translate_answer_code_to_answer(raw_answer, combined_dict)
|
191
75
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
76
|
+
def extract_edsl_result_entry_and_validate(
|
77
|
+
self, agent_response_dict: AgentResponseDict
|
78
|
+
) -> EDSLResultObjectInput:
|
79
|
+
edsl_dict = agent_response_dict.edsl_dict._asdict()
|
80
|
+
exception_occurred = None
|
81
|
+
validated = False
|
82
|
+
try:
|
83
|
+
validated_edsl_dict = self.question._validate_answer(edsl_dict)
|
84
|
+
answer = self.determine_answer(validated_edsl_dict["answer"])
|
85
|
+
comment = validated_edsl_dict.get("comment", "")
|
86
|
+
validated = True
|
87
|
+
except QuestionAnswerValidationError as e:
|
88
|
+
answer = None
|
89
|
+
comment = "The response was not valid."
|
90
|
+
if self.raise_validation_errors:
|
91
|
+
exception_occurred = e
|
92
|
+
except Exception as non_validation_error:
|
93
|
+
answer = None
|
94
|
+
comment = "Some other error occurred."
|
95
|
+
exception_occurred = non_validation_error
|
96
|
+
finally:
|
97
|
+
# even if validation failes, we still return the result
|
98
|
+
data = {
|
99
|
+
"answer": answer,
|
100
|
+
"comment": comment,
|
101
|
+
"generated_tokens": agent_response_dict.edsl_dict.generated_tokens,
|
102
|
+
"question_name": self.question.question_name,
|
103
|
+
"prompts": self.get_prompts(),
|
104
|
+
"cached_response": agent_response_dict.model_outputs.cached_response,
|
105
|
+
"raw_model_response": agent_response_dict.model_outputs.response,
|
106
|
+
"cache_used": agent_response_dict.model_outputs.cache_used,
|
107
|
+
"cache_key": agent_response_dict.model_outputs.cache_key,
|
108
|
+
"validated": validated,
|
109
|
+
"exception_occurred": exception_occurred,
|
110
|
+
"cost": agent_response_dict.model_outputs.cost,
|
111
|
+
}
|
112
|
+
result = EDSLResultObjectInput(**data)
|
113
|
+
return result
|
205
114
|
|
206
|
-
# get_response = sync_wrapper(async_get_response)
|
207
115
|
answer_question = sync_wrapper(async_answer_question)
|
208
116
|
|
209
117
|
|
210
|
-
class InvigilatorDebug(InvigilatorBase):
|
211
|
-
"""An invigilator class for debugging purposes."""
|
212
|
-
|
213
|
-
async def async_answer_question(self, iteration: int = 0) -> AgentResponseDict:
|
214
|
-
"""Return the answer to the question."""
|
215
|
-
results = self.question._simulate_answer(human_readable=True)
|
216
|
-
results["prompts"] = self.get_prompts()
|
217
|
-
results["question_name"] = self.question.question_name
|
218
|
-
results["comment"] = "Debug comment"
|
219
|
-
return AgentResponseDict(**results)
|
220
|
-
|
221
|
-
def get_prompts(self) -> Dict[str, Prompt]:
|
222
|
-
"""Return the prompts used."""
|
223
|
-
return {
|
224
|
-
"user_prompt": Prompt("NA"),
|
225
|
-
"system_prompt": Prompt("NA"),
|
226
|
-
}
|
227
|
-
|
228
|
-
|
229
118
|
class InvigilatorHuman(InvigilatorBase):
|
230
119
|
"""An invigilator for when a human is answering the question."""
|
231
120
|
|
121
|
+
validate_response: bool = False
|
122
|
+
translate_response: bool = False
|
123
|
+
|
232
124
|
async def async_answer_question(self, iteration: int = 0) -> AgentResponseDict:
|
233
125
|
"""Return the answer to the question."""
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
"
|
238
|
-
|
239
|
-
|
126
|
+
comment = "This is a real survey response from a human."
|
127
|
+
|
128
|
+
def __repr__(self):
|
129
|
+
return f"{self.literal}"
|
130
|
+
|
131
|
+
exception_occurred = None
|
132
|
+
validated = False
|
240
133
|
try:
|
241
134
|
answer = self.agent.answer_question_directly(self.question, self.scenario)
|
242
|
-
|
135
|
+
self.raw_model_response = answer
|
136
|
+
|
137
|
+
if self.validate_response:
|
138
|
+
_ = self.question._validate_answer({"answer": answer})
|
139
|
+
if self.translate_response:
|
140
|
+
answer = self.question._translate_answer_code_to_answer(
|
141
|
+
answer, self.scenario
|
142
|
+
)
|
143
|
+
validated = True
|
144
|
+
except QuestionAnswerValidationError as e:
|
145
|
+
answer = None
|
146
|
+
if self.raise_validation_errors:
|
147
|
+
exception_occurred = e
|
243
148
|
except Exception as e:
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
149
|
+
answer = None
|
150
|
+
if self.raise_validation_errors:
|
151
|
+
exception_occurred = e
|
152
|
+
finally:
|
153
|
+
data = {
|
154
|
+
"generated_tokens": NotApplicable(),
|
155
|
+
"question_name": self.question.question_name,
|
156
|
+
"prompts": self.get_prompts(),
|
157
|
+
"cached_response": NotApplicable(),
|
158
|
+
"raw_model_response": NotApplicable(),
|
159
|
+
"cache_used": NotApplicable(),
|
160
|
+
"cache_key": NotApplicable(),
|
161
|
+
"answer": answer,
|
162
|
+
"comment": comment,
|
163
|
+
"validated": validated,
|
164
|
+
"exception_occurred": exception_occurred,
|
165
|
+
}
|
166
|
+
return EDSLResultObjectInput(**data)
|
251
167
|
|
252
168
|
|
253
169
|
class InvigilatorFunctional(InvigilatorBase):
|
@@ -256,22 +172,21 @@ class InvigilatorFunctional(InvigilatorBase):
|
|
256
172
|
async def async_answer_question(self, iteration: int = 0) -> AgentResponseDict:
|
257
173
|
"""Return the answer to the question."""
|
258
174
|
func = self.question.answer_question_directly
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
) from e
|
175
|
+
answer = func(scenario=self.scenario, agent_traits=self.agent.traits)
|
176
|
+
|
177
|
+
return EDSLResultObjectInput(
|
178
|
+
generated_tokens=str(answer),
|
179
|
+
question_name=self.question.question_name,
|
180
|
+
prompts=self.get_prompts(),
|
181
|
+
cached_response=NotApplicable(),
|
182
|
+
raw_model_response=NotApplicable(),
|
183
|
+
cache_used=NotApplicable(),
|
184
|
+
cache_key=NotApplicable(),
|
185
|
+
answer=answer["answer"],
|
186
|
+
comment="This is the result of a functional question.",
|
187
|
+
validated=True,
|
188
|
+
exception_occurred=None,
|
189
|
+
)
|
275
190
|
|
276
191
|
def get_prompts(self) -> Dict[str, Prompt]:
|
277
192
|
"""Return the prompts used."""
|