edsl 0.1.31.dev4__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. edsl/Base.py +9 -3
  2. edsl/TemplateLoader.py +24 -0
  3. edsl/__init__.py +8 -3
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +40 -8
  6. edsl/agents/AgentList.py +43 -0
  7. edsl/agents/Invigilator.py +136 -221
  8. edsl/agents/InvigilatorBase.py +148 -59
  9. edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +154 -85
  10. edsl/agents/__init__.py +1 -0
  11. edsl/auto/AutoStudy.py +117 -0
  12. edsl/auto/StageBase.py +230 -0
  13. edsl/auto/StageGenerateSurvey.py +178 -0
  14. edsl/auto/StageLabelQuestions.py +125 -0
  15. edsl/auto/StagePersona.py +61 -0
  16. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  17. edsl/auto/StagePersonaDimensionValues.py +74 -0
  18. edsl/auto/StagePersonaDimensions.py +69 -0
  19. edsl/auto/StageQuestions.py +73 -0
  20. edsl/auto/SurveyCreatorPipeline.py +21 -0
  21. edsl/auto/utilities.py +224 -0
  22. edsl/config.py +48 -47
  23. edsl/conjure/Conjure.py +6 -0
  24. edsl/coop/PriceFetcher.py +58 -0
  25. edsl/coop/coop.py +50 -7
  26. edsl/data/Cache.py +35 -1
  27. edsl/data/CacheHandler.py +3 -4
  28. edsl/data_transfer_models.py +73 -38
  29. edsl/enums.py +8 -0
  30. edsl/exceptions/general.py +10 -8
  31. edsl/exceptions/language_models.py +25 -1
  32. edsl/exceptions/questions.py +62 -5
  33. edsl/exceptions/results.py +4 -0
  34. edsl/inference_services/AnthropicService.py +13 -11
  35. edsl/inference_services/AwsBedrock.py +112 -0
  36. edsl/inference_services/AzureAI.py +214 -0
  37. edsl/inference_services/DeepInfraService.py +4 -3
  38. edsl/inference_services/GoogleService.py +16 -12
  39. edsl/inference_services/GroqService.py +5 -4
  40. edsl/inference_services/InferenceServiceABC.py +58 -3
  41. edsl/inference_services/InferenceServicesCollection.py +13 -8
  42. edsl/inference_services/MistralAIService.py +120 -0
  43. edsl/inference_services/OllamaService.py +18 -0
  44. edsl/inference_services/OpenAIService.py +55 -56
  45. edsl/inference_services/TestService.py +80 -0
  46. edsl/inference_services/TogetherAIService.py +170 -0
  47. edsl/inference_services/models_available_cache.py +25 -0
  48. edsl/inference_services/registry.py +19 -1
  49. edsl/jobs/Answers.py +10 -12
  50. edsl/jobs/FailedQuestion.py +78 -0
  51. edsl/jobs/Jobs.py +137 -41
  52. edsl/jobs/buckets/BucketCollection.py +24 -15
  53. edsl/jobs/buckets/TokenBucket.py +105 -18
  54. edsl/jobs/interviews/Interview.py +393 -83
  55. edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +22 -18
  56. edsl/jobs/interviews/InterviewExceptionEntry.py +167 -0
  57. edsl/jobs/runners/JobsRunnerAsyncio.py +152 -160
  58. edsl/jobs/runners/JobsRunnerStatus.py +331 -0
  59. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  60. edsl/jobs/tasks/TaskCreators.py +1 -1
  61. edsl/jobs/tasks/TaskHistory.py +205 -126
  62. edsl/language_models/LanguageModel.py +297 -177
  63. edsl/language_models/ModelList.py +2 -2
  64. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  65. edsl/language_models/fake_openai_call.py +15 -0
  66. edsl/language_models/fake_openai_service.py +61 -0
  67. edsl/language_models/registry.py +25 -8
  68. edsl/language_models/repair.py +0 -19
  69. edsl/language_models/utilities.py +61 -0
  70. edsl/notebooks/Notebook.py +20 -2
  71. edsl/prompts/Prompt.py +52 -2
  72. edsl/questions/AnswerValidatorMixin.py +23 -26
  73. edsl/questions/QuestionBase.py +330 -249
  74. edsl/questions/QuestionBaseGenMixin.py +133 -0
  75. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  76. edsl/questions/QuestionBudget.py +99 -42
  77. edsl/questions/QuestionCheckBox.py +227 -36
  78. edsl/questions/QuestionExtract.py +98 -28
  79. edsl/questions/QuestionFreeText.py +47 -31
  80. edsl/questions/QuestionFunctional.py +7 -0
  81. edsl/questions/QuestionList.py +141 -23
  82. edsl/questions/QuestionMultipleChoice.py +159 -66
  83. edsl/questions/QuestionNumerical.py +88 -47
  84. edsl/questions/QuestionRank.py +182 -25
  85. edsl/questions/Quick.py +41 -0
  86. edsl/questions/RegisterQuestionsMeta.py +31 -12
  87. edsl/questions/ResponseValidatorABC.py +170 -0
  88. edsl/questions/__init__.py +3 -4
  89. edsl/questions/decorators.py +21 -0
  90. edsl/questions/derived/QuestionLikertFive.py +10 -5
  91. edsl/questions/derived/QuestionLinearScale.py +15 -2
  92. edsl/questions/derived/QuestionTopK.py +10 -1
  93. edsl/questions/derived/QuestionYesNo.py +24 -3
  94. edsl/questions/descriptors.py +43 -7
  95. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  96. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  97. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  98. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  99. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  100. edsl/questions/prompt_templates/question_list.jinja +17 -0
  101. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  102. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  103. edsl/questions/question_registry.py +6 -2
  104. edsl/questions/templates/__init__.py +0 -0
  105. edsl/questions/templates/budget/__init__.py +0 -0
  106. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  107. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  108. edsl/questions/templates/checkbox/__init__.py +0 -0
  109. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  110. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  111. edsl/questions/templates/extract/__init__.py +0 -0
  112. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  113. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  114. edsl/questions/templates/free_text/__init__.py +0 -0
  115. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  116. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  117. edsl/questions/templates/likert_five/__init__.py +0 -0
  118. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  119. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  120. edsl/questions/templates/linear_scale/__init__.py +0 -0
  121. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  122. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  123. edsl/questions/templates/list/__init__.py +0 -0
  124. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  125. edsl/questions/templates/list/question_presentation.jinja +5 -0
  126. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  127. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  128. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  129. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  130. edsl/questions/templates/numerical/__init__.py +0 -0
  131. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  132. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  133. edsl/questions/templates/rank/__init__.py +0 -0
  134. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  135. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  136. edsl/questions/templates/top_k/__init__.py +0 -0
  137. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  138. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  139. edsl/questions/templates/yes_no/__init__.py +0 -0
  140. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  141. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  142. edsl/results/Dataset.py +20 -0
  143. edsl/results/DatasetExportMixin.py +58 -30
  144. edsl/results/DatasetTree.py +145 -0
  145. edsl/results/Result.py +32 -5
  146. edsl/results/Results.py +135 -46
  147. edsl/results/ResultsDBMixin.py +3 -3
  148. edsl/results/Selector.py +118 -0
  149. edsl/results/tree_explore.py +115 -0
  150. edsl/scenarios/FileStore.py +71 -10
  151. edsl/scenarios/Scenario.py +109 -24
  152. edsl/scenarios/ScenarioImageMixin.py +2 -2
  153. edsl/scenarios/ScenarioList.py +546 -21
  154. edsl/scenarios/ScenarioListExportMixin.py +24 -4
  155. edsl/scenarios/ScenarioListPdfMixin.py +153 -4
  156. edsl/study/SnapShot.py +8 -1
  157. edsl/study/Study.py +32 -0
  158. edsl/surveys/Rule.py +15 -3
  159. edsl/surveys/RuleCollection.py +21 -5
  160. edsl/surveys/Survey.py +707 -298
  161. edsl/surveys/SurveyExportMixin.py +71 -9
  162. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  163. edsl/surveys/SurveyQualtricsImport.py +284 -0
  164. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  165. edsl/surveys/instructions/Instruction.py +34 -0
  166. edsl/surveys/instructions/InstructionCollection.py +77 -0
  167. edsl/surveys/instructions/__init__.py +0 -0
  168. edsl/templates/error_reporting/base.html +24 -0
  169. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  170. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  171. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  172. edsl/templates/error_reporting/interview_details.html +116 -0
  173. edsl/templates/error_reporting/interviews.html +10 -0
  174. edsl/templates/error_reporting/overview.html +5 -0
  175. edsl/templates/error_reporting/performance_plot.html +2 -0
  176. edsl/templates/error_reporting/report.css +74 -0
  177. edsl/templates/error_reporting/report.html +118 -0
  178. edsl/templates/error_reporting/report.js +25 -0
  179. edsl/utilities/utilities.py +40 -1
  180. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/METADATA +8 -2
  181. edsl-0.1.33.dist-info/RECORD +295 -0
  182. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -271
  183. edsl/jobs/interviews/retry_management.py +0 -37
  184. edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -303
  185. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  186. edsl-0.1.31.dev4.dist-info/RECORD +0 -204
  187. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
  188. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -1,253 +1,169 @@
1
1
  """Module for creating Invigilators, which are objects to administer a question to an Agent."""
2
2
 
3
- import json
4
3
  from typing import Dict, Any, Optional
5
4
 
6
- from edsl.exceptions import AgentRespondedWithBadJSONError
7
5
  from edsl.prompts.Prompt import Prompt
8
6
  from edsl.utilities.decorators import sync_wrapper, jupyter_nb_handler
9
7
  from edsl.prompts.registry import get_classes as prompt_lookup
10
- from edsl.data_transfer_models import AgentResponseDict
11
- from edsl.exceptions.agents import FailedTaskException
12
- from edsl.agents.PromptConstructionMixin import PromptConstructorMixin
13
-
8
+ from edsl.exceptions.questions import QuestionAnswerValidationError
14
9
  from edsl.agents.InvigilatorBase import InvigilatorBase
10
+ from edsl.data_transfer_models import AgentResponseDict, EDSLResultObjectInput
11
+ from edsl.agents.PromptConstructor import PromptConstructor
12
+
15
13
 
14
+ class NotApplicable(str):
15
+ def __new__(cls):
16
+ instance = super().__new__(cls, "Not Applicable")
17
+ instance.literal = "Not Applicable"
18
+ return instance
16
19
 
17
- class InvigilatorAI(PromptConstructorMixin, InvigilatorBase):
20
+
21
+ class InvigilatorAI(InvigilatorBase):
18
22
  """An invigilator that uses an AI model to answer questions."""
19
23
 
24
+ def get_prompts(self) -> Dict[str, Prompt]:
25
+ """Return the prompts used."""
26
+ return self.prompt_constructor.get_prompts()
27
+
20
28
  async def async_answer_question(self) -> AgentResponseDict:
21
29
  """Answer a question using the AI model.
22
-
30
+
23
31
  >>> i = InvigilatorAI.example()
24
32
  >>> i.answer_question()
25
- {'message': '{"answer": "SPAM!"}'}
33
+ {'message': [{'text': 'SPAM!'}], 'usage': {'prompt_tokens': 1, 'completion_tokens': 1}}
26
34
  """
27
- params = self.get_prompts() | {"iteration": self.iteration}
28
- raw_response = await self.async_get_response(**params)
29
- data = {
30
- "agent": self.agent,
31
- "question": self.question,
32
- "scenario": self.scenario,
33
- "raw_response": raw_response,
34
- "raw_model_response": raw_response["raw_model_response"],
35
+ prompts = self.get_prompts()
36
+ params = {
37
+ "user_prompt": prompts["user_prompt"].text,
38
+ "system_prompt": prompts["system_prompt"].text,
35
39
  }
36
- response = self._format_raw_response(**data)
37
- #breakpoint()
38
- return AgentResponseDict(**response)
39
-
40
- async def async_get_response(
41
- self,
42
- user_prompt: Prompt,
43
- system_prompt: Prompt,
44
- iteration: int = 0,
45
- encoded_image=None,
46
- ) -> dict:
47
- """Call the LLM and gets a response. Used in the `answer_question` method.
48
- """
49
- try:
50
- params = {
51
- "user_prompt": user_prompt.text,
52
- "system_prompt": system_prompt.text,
53
- "iteration": iteration,
54
- "cache": self.cache,
55
- }
56
- if encoded_image:
57
- params["encoded_image"] = encoded_image
58
- response = await self.model.async_get_response(**params)
59
-
60
- # TODO: I *don't* think we need to delete the cache key here because I think
61
- # it will not have been set yet; the exception would have been raised before.
62
- except json.JSONDecodeError as e:
63
- raise AgentRespondedWithBadJSONError(
64
- f"Returned bad JSON: {e}"
65
- f"Prompt: {user_prompt}"
66
- f"System Prompt: {system_prompt}"
67
- )
68
-
69
- return response
70
-
71
- def _remove_from_cache(self, raw_response) -> None:
40
+ if "encoded_image" in prompts:
41
+ params["encoded_image"] = prompts["encoded_image"]
42
+
43
+ params.update({"iteration": self.iteration, "cache": self.cache})
44
+
45
+ agent_response_dict: AgentResponseDict = await self.model.async_get_response(
46
+ **params
47
+ )
48
+ # store to self in case validation failure
49
+ self.raw_model_response = agent_response_dict.model_outputs.response
50
+ self.generated_tokens = agent_response_dict.edsl_dict.generated_tokens
51
+
52
+ return self.extract_edsl_result_entry_and_validate(agent_response_dict)
53
+
54
+ def _remove_from_cache(self, cache_key) -> None:
72
55
  """Remove an entry from the cache."""
73
- cache_key = raw_response.get("cache_key", None)
74
56
  if cache_key:
75
57
  del self.cache.data[cache_key]
76
58
 
77
- def _format_raw_response(
78
- self, *, agent, question, scenario, raw_response, raw_model_response
79
- ) -> AgentResponseDict:
80
- """Return formatted raw response.
81
-
82
- This cleans up the raw response to make it suitable to pass to AgentResponseDict.
83
- """
84
- _ = agent
85
- try:
86
- response = question._validate_answer(raw_response)
87
- except Exception as e:
88
- """If the response is invalid, remove it from the cache and raise the exception."""
89
- self._remove_from_cache(raw_response)
90
- raise e
91
-
59
+ def determine_answer(self, raw_answer: str) -> Any:
92
60
  question_dict = self.survey.question_names_to_questions()
61
+ # iterates through the current answers and updates the question_dict (which is all questions)
93
62
  for other_question, answer in self.current_answers.items():
94
63
  if other_question in question_dict:
95
64
  question_dict[other_question].answer = answer
96
65
  else:
97
- # adds a comment to the question
66
+ # it might be a comment
98
67
  if (
99
68
  new_question := other_question.split("_comment")[0]
100
69
  ) in question_dict:
101
70
  question_dict[new_question].comment = answer
102
71
 
103
- combined_dict = {**question_dict, **scenario}
104
- answer = question._translate_answer_code_to_answer(
105
- response["answer"], combined_dict
106
- )
107
- data = {
108
- "answer": answer,
109
- "comment": response.get(
110
- "comment", ""
111
- ), # not all question have comment fields,
112
- "question_name": question.question_name,
113
- "prompts": self.get_prompts(),
114
- "cached_response": raw_response.get("cached_response", None),
115
- "usage": raw_response.get("usage", {}),
116
- "raw_model_response": raw_model_response,
117
- "cache_used": raw_response.get("cache_used", False),
118
- "cache_key": raw_response.get("cache_key", None),
119
- }
120
- return AgentResponseDict(**data)
121
-
122
- get_response = sync_wrapper(async_get_response)
123
- answer_question = sync_wrapper(async_answer_question)
124
-
125
-
126
- class InvigilatorSidecar(InvigilatorAI):
127
- """An invigilator that presents the 'raw' question to the agent
128
- & uses a sidecar model to answer questions."""
129
-
130
- async def async_answer_question(self, failed: bool = False) -> AgentResponseDict:
131
- """Answer a question using the AI model."""
132
- from edsl import Model
133
-
134
- advanced_model = self.sidecar_model
135
- simple_model = self.model
136
- question = self.question
137
- human_readable_question = (
138
- "Please answer this single question: " + question.human_readable()
139
- )
140
- print("Getting the simple model response to: ", human_readable_question)
141
- raw_simple_response = await simple_model.async_execute_model_call(
142
- user_prompt=human_readable_question,
143
- system_prompt="""Pretend you are a human answering a question. Do not break character.""",
144
- )
145
- simple_response = simple_model.parse_response(raw_simple_response)
146
- instructions = question.get_instructions()
147
-
148
- main_model_prompt = Prompt(
149
- text="""
150
- A simpler language model was asked this question:
151
-
152
- To the simpel model:
153
- {{ human_readable_question }}
154
-
155
- The simple model responded:
156
- <response>
157
- {{ simple_response }}
158
- </response>
159
-
160
- It was suppose to respond according to these instructions:
161
- <instructions>
162
- {{ instructions }}
163
- </instructions>
164
-
165
- Please format the simple model's response as it should have been formmated, given the instructions.
166
- Only respond in valid JSON, like so {"answer": "SPAM!"} or {"answer": "SPAM!", "comment": "I am a robot."}
167
- Do not inlcude the word 'json'
168
- """
169
- )
170
-
171
- d = {
172
- "human_readable_question": human_readable_question,
173
- "simple_response": simple_response,
174
- "instructions": instructions,
175
- }
176
-
177
- print("The human-readable question is: ", human_readable_question)
178
- print("The simple response is: ", simple_response)
179
-
180
- raw_response_data = await advanced_model.async_execute_model_call(
181
- user_prompt=main_model_prompt.render(d).text,
182
- system_prompt="You are a helpful assistant.",
183
- )
184
-
185
- raw_response = await advanced_model.async_get_response(
186
- user_prompt=main_model_prompt.render(d).text,
187
- system_prompt="You are a helpful assistant.",
188
- iteration=0,
189
- cache=self.cache,
190
- )
72
+ combined_dict = {**question_dict, **self.scenario}
73
+ # sometimes the answer is a code, so we need to translate it
74
+ return self.question._translate_answer_code_to_answer(raw_answer, combined_dict)
191
75
 
192
- data = {
193
- "agent": self.agent,
194
- "question": self.question,
195
- "scenario": self.scenario,
196
- }
197
- raw_response_data = {
198
- "raw_response": raw_response,
199
- "raw_model_response": raw_response["raw_model_response"],
200
- }
201
- params = data | raw_response_data
202
- response = self._format_raw_response(**params)
203
- response.update({"simple_model_raw_response": simple_response})
204
- return AgentResponseDict(**response)
76
+ def extract_edsl_result_entry_and_validate(
77
+ self, agent_response_dict: AgentResponseDict
78
+ ) -> EDSLResultObjectInput:
79
+ edsl_dict = agent_response_dict.edsl_dict._asdict()
80
+ exception_occurred = None
81
+ validated = False
82
+ try:
83
+ validated_edsl_dict = self.question._validate_answer(edsl_dict)
84
+ answer = self.determine_answer(validated_edsl_dict["answer"])
85
+ comment = validated_edsl_dict.get("comment", "")
86
+ validated = True
87
+ except QuestionAnswerValidationError as e:
88
+ answer = None
89
+ comment = "The response was not valid."
90
+ if self.raise_validation_errors:
91
+ exception_occurred = e
92
+ except Exception as non_validation_error:
93
+ answer = None
94
+ comment = "Some other error occurred."
95
+ exception_occurred = non_validation_error
96
+ finally:
97
+ # even if validation failes, we still return the result
98
+ data = {
99
+ "answer": answer,
100
+ "comment": comment,
101
+ "generated_tokens": agent_response_dict.edsl_dict.generated_tokens,
102
+ "question_name": self.question.question_name,
103
+ "prompts": self.get_prompts(),
104
+ "cached_response": agent_response_dict.model_outputs.cached_response,
105
+ "raw_model_response": agent_response_dict.model_outputs.response,
106
+ "cache_used": agent_response_dict.model_outputs.cache_used,
107
+ "cache_key": agent_response_dict.model_outputs.cache_key,
108
+ "validated": validated,
109
+ "exception_occurred": exception_occurred,
110
+ "cost": agent_response_dict.model_outputs.cost,
111
+ }
112
+ result = EDSLResultObjectInput(**data)
113
+ return result
205
114
 
206
- # get_response = sync_wrapper(async_get_response)
207
115
  answer_question = sync_wrapper(async_answer_question)
208
116
 
209
117
 
210
- class InvigilatorDebug(InvigilatorBase):
211
- """An invigilator class for debugging purposes."""
212
-
213
- async def async_answer_question(self, iteration: int = 0) -> AgentResponseDict:
214
- """Return the answer to the question."""
215
- results = self.question._simulate_answer(human_readable=True)
216
- results["prompts"] = self.get_prompts()
217
- results["question_name"] = self.question.question_name
218
- results["comment"] = "Debug comment"
219
- return AgentResponseDict(**results)
220
-
221
- def get_prompts(self) -> Dict[str, Prompt]:
222
- """Return the prompts used."""
223
- return {
224
- "user_prompt": Prompt("NA"),
225
- "system_prompt": Prompt("NA"),
226
- }
227
-
228
-
229
118
  class InvigilatorHuman(InvigilatorBase):
230
119
  """An invigilator for when a human is answering the question."""
231
120
 
121
+ validate_response: bool = False
122
+ translate_response: bool = False
123
+
232
124
  async def async_answer_question(self, iteration: int = 0) -> AgentResponseDict:
233
125
  """Return the answer to the question."""
234
- data = {
235
- "comment": "This is a real survey response from a human.",
236
- "answer": None,
237
- "prompts": self.get_prompts(),
238
- "question_name": self.question.question_name,
239
- }
126
+ comment = "This is a real survey response from a human."
127
+
128
+ def __repr__(self):
129
+ return f"{self.literal}"
130
+
131
+ exception_occurred = None
132
+ validated = False
240
133
  try:
241
134
  answer = self.agent.answer_question_directly(self.question, self.scenario)
242
- return AgentResponseDict(**(data | {"answer": answer}))
135
+ self.raw_model_response = answer
136
+
137
+ if self.validate_response:
138
+ _ = self.question._validate_answer({"answer": answer})
139
+ if self.translate_response:
140
+ answer = self.question._translate_answer_code_to_answer(
141
+ answer, self.scenario
142
+ )
143
+ validated = True
144
+ except QuestionAnswerValidationError as e:
145
+ answer = None
146
+ if self.raise_validation_errors:
147
+ exception_occurred = e
243
148
  except Exception as e:
244
- agent_response_dict = AgentResponseDict(
245
- **(data | {"answer": None, "comment": str(e)})
246
- )
247
- raise FailedTaskException(
248
- f"Failed to get response. The exception is {str(e)}",
249
- agent_response_dict,
250
- ) from e
149
+ answer = None
150
+ if self.raise_validation_errors:
151
+ exception_occurred = e
152
+ finally:
153
+ data = {
154
+ "generated_tokens": NotApplicable(),
155
+ "question_name": self.question.question_name,
156
+ "prompts": self.get_prompts(),
157
+ "cached_response": NotApplicable(),
158
+ "raw_model_response": NotApplicable(),
159
+ "cache_used": NotApplicable(),
160
+ "cache_key": NotApplicable(),
161
+ "answer": answer,
162
+ "comment": comment,
163
+ "validated": validated,
164
+ "exception_occurred": exception_occurred,
165
+ }
166
+ return EDSLResultObjectInput(**data)
251
167
 
252
168
 
253
169
  class InvigilatorFunctional(InvigilatorBase):
@@ -256,22 +172,21 @@ class InvigilatorFunctional(InvigilatorBase):
256
172
  async def async_answer_question(self, iteration: int = 0) -> AgentResponseDict:
257
173
  """Return the answer to the question."""
258
174
  func = self.question.answer_question_directly
259
- data = {
260
- "comment": "Functional.",
261
- "prompts": self.get_prompts(),
262
- "question_name": self.question.question_name,
263
- }
264
- try:
265
- answer = func(scenario=self.scenario, agent_traits=self.agent.traits)
266
- return AgentResponseDict(**(data | answer))
267
- except Exception as e:
268
- agent_response_dict = AgentResponseDict(
269
- **(data | {"answer": None, "comment": str(e)})
270
- )
271
- raise FailedTaskException(
272
- f"Failed to get response. The exception is {str(e)}",
273
- agent_response_dict,
274
- ) from e
175
+ answer = func(scenario=self.scenario, agent_traits=self.agent.traits)
176
+
177
+ return EDSLResultObjectInput(
178
+ generated_tokens=str(answer),
179
+ question_name=self.question.question_name,
180
+ prompts=self.get_prompts(),
181
+ cached_response=NotApplicable(),
182
+ raw_model_response=NotApplicable(),
183
+ cache_used=NotApplicable(),
184
+ cache_key=NotApplicable(),
185
+ answer=answer["answer"],
186
+ comment="This is the result of a functional question.",
187
+ validated=True,
188
+ exception_occurred=None,
189
+ )
275
190
 
276
191
  def get_prompts(self) -> Dict[str, Prompt]:
277
192
  """Return the prompts used."""