edsl 0.1.33.dev1__py3-none-any.whl → 0.1.33.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/TemplateLoader.py +24 -0
- edsl/__init__.py +8 -4
- edsl/agents/Agent.py +46 -14
- edsl/agents/AgentList.py +43 -0
- edsl/agents/Invigilator.py +125 -212
- edsl/agents/InvigilatorBase.py +140 -32
- edsl/agents/PromptConstructionMixin.py +43 -66
- edsl/agents/__init__.py +1 -0
- edsl/auto/AutoStudy.py +117 -0
- edsl/auto/StageBase.py +230 -0
- edsl/auto/StageGenerateSurvey.py +178 -0
- edsl/auto/StageLabelQuestions.py +125 -0
- edsl/auto/StagePersona.py +61 -0
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
- edsl/auto/StagePersonaDimensionValues.py +74 -0
- edsl/auto/StagePersonaDimensions.py +69 -0
- edsl/auto/StageQuestions.py +73 -0
- edsl/auto/SurveyCreatorPipeline.py +21 -0
- edsl/auto/utilities.py +224 -0
- edsl/config.py +38 -39
- edsl/coop/PriceFetcher.py +58 -0
- edsl/coop/coop.py +39 -5
- edsl/data/Cache.py +35 -1
- edsl/data_transfer_models.py +120 -38
- edsl/enums.py +2 -0
- edsl/exceptions/language_models.py +25 -1
- edsl/exceptions/questions.py +62 -5
- edsl/exceptions/results.py +4 -0
- edsl/inference_services/AnthropicService.py +13 -11
- edsl/inference_services/AwsBedrock.py +19 -17
- edsl/inference_services/AzureAI.py +37 -20
- edsl/inference_services/GoogleService.py +16 -12
- edsl/inference_services/GroqService.py +2 -0
- edsl/inference_services/InferenceServiceABC.py +24 -0
- edsl/inference_services/MistralAIService.py +120 -0
- edsl/inference_services/OpenAIService.py +41 -50
- edsl/inference_services/TestService.py +71 -0
- edsl/inference_services/models_available_cache.py +0 -6
- edsl/inference_services/registry.py +4 -0
- edsl/jobs/Answers.py +10 -12
- edsl/jobs/FailedQuestion.py +78 -0
- edsl/jobs/Jobs.py +18 -13
- edsl/jobs/buckets/TokenBucket.py +39 -14
- edsl/jobs/interviews/Interview.py +297 -77
- edsl/jobs/interviews/InterviewExceptionEntry.py +83 -19
- edsl/jobs/interviews/interview_exception_tracking.py +0 -70
- edsl/jobs/interviews/retry_management.py +3 -1
- edsl/jobs/runners/JobsRunnerAsyncio.py +116 -70
- edsl/jobs/runners/JobsRunnerStatusMixin.py +1 -1
- edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
- edsl/jobs/tasks/TaskHistory.py +131 -213
- edsl/language_models/LanguageModel.py +239 -129
- edsl/language_models/ModelList.py +2 -2
- edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
- edsl/language_models/fake_openai_call.py +15 -0
- edsl/language_models/fake_openai_service.py +61 -0
- edsl/language_models/registry.py +15 -2
- edsl/language_models/repair.py +0 -19
- edsl/language_models/utilities.py +61 -0
- edsl/prompts/Prompt.py +52 -2
- edsl/questions/AnswerValidatorMixin.py +23 -26
- edsl/questions/QuestionBase.py +273 -242
- edsl/questions/QuestionBaseGenMixin.py +133 -0
- edsl/questions/QuestionBasePromptsMixin.py +266 -0
- edsl/questions/QuestionBudget.py +6 -0
- edsl/questions/QuestionCheckBox.py +227 -35
- edsl/questions/QuestionExtract.py +98 -27
- edsl/questions/QuestionFreeText.py +46 -29
- edsl/questions/QuestionFunctional.py +7 -0
- edsl/questions/QuestionList.py +141 -22
- edsl/questions/QuestionMultipleChoice.py +173 -64
- edsl/questions/QuestionNumerical.py +87 -46
- edsl/questions/QuestionRank.py +182 -24
- edsl/questions/RegisterQuestionsMeta.py +31 -12
- edsl/questions/ResponseValidatorABC.py +169 -0
- edsl/questions/__init__.py +3 -4
- edsl/questions/decorators.py +21 -0
- edsl/questions/derived/QuestionLikertFive.py +10 -5
- edsl/questions/derived/QuestionLinearScale.py +11 -1
- edsl/questions/derived/QuestionTopK.py +6 -0
- edsl/questions/derived/QuestionYesNo.py +16 -1
- edsl/questions/descriptors.py +43 -7
- edsl/questions/prompt_templates/question_budget.jinja +13 -0
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
- edsl/questions/prompt_templates/question_extract.jinja +11 -0
- edsl/questions/prompt_templates/question_free_text.jinja +3 -0
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
- edsl/questions/prompt_templates/question_list.jinja +17 -0
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
- edsl/questions/prompt_templates/question_numerical.jinja +37 -0
- edsl/questions/question_registry.py +6 -2
- edsl/questions/templates/__init__.py +0 -0
- edsl/questions/templates/checkbox/__init__.py +0 -0
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
- edsl/questions/templates/extract/answering_instructions.jinja +7 -0
- edsl/questions/templates/extract/question_presentation.jinja +1 -0
- edsl/questions/templates/free_text/__init__.py +0 -0
- edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
- edsl/questions/templates/free_text/question_presentation.jinja +1 -0
- edsl/questions/templates/likert_five/__init__.py +0 -0
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
- edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
- edsl/questions/templates/linear_scale/__init__.py +0 -0
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
- edsl/questions/templates/list/__init__.py +0 -0
- edsl/questions/templates/list/answering_instructions.jinja +4 -0
- edsl/questions/templates/list/question_presentation.jinja +5 -0
- edsl/questions/templates/multiple_choice/__init__.py +0 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
- edsl/questions/templates/multiple_choice/html.jinja +0 -0
- edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
- edsl/questions/templates/numerical/__init__.py +0 -0
- edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
- edsl/questions/templates/numerical/question_presentation.jinja +7 -0
- edsl/questions/templates/rank/answering_instructions.jinja +11 -0
- edsl/questions/templates/rank/question_presentation.jinja +15 -0
- edsl/questions/templates/top_k/__init__.py +0 -0
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
- edsl/questions/templates/top_k/question_presentation.jinja +22 -0
- edsl/questions/templates/yes_no/__init__.py +0 -0
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
- edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
- edsl/results/Dataset.py +20 -0
- edsl/results/DatasetExportMixin.py +41 -47
- edsl/results/DatasetTree.py +145 -0
- edsl/results/Result.py +32 -5
- edsl/results/Results.py +131 -45
- edsl/results/ResultsDBMixin.py +3 -3
- edsl/results/Selector.py +118 -0
- edsl/results/tree_explore.py +115 -0
- edsl/scenarios/Scenario.py +10 -4
- edsl/scenarios/ScenarioList.py +348 -39
- edsl/scenarios/ScenarioListExportMixin.py +9 -0
- edsl/study/SnapShot.py +8 -1
- edsl/surveys/RuleCollection.py +2 -2
- edsl/surveys/Survey.py +634 -315
- edsl/surveys/SurveyExportMixin.py +71 -9
- edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
- edsl/surveys/SurveyQualtricsImport.py +75 -4
- edsl/surveys/instructions/ChangeInstruction.py +47 -0
- edsl/surveys/instructions/Instruction.py +34 -0
- edsl/surveys/instructions/InstructionCollection.py +77 -0
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/templates/error_reporting/base.html +24 -0
- edsl/templates/error_reporting/exceptions_by_model.html +35 -0
- edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
- edsl/templates/error_reporting/exceptions_by_type.html +17 -0
- edsl/templates/error_reporting/interview_details.html +111 -0
- edsl/templates/error_reporting/interviews.html +10 -0
- edsl/templates/error_reporting/overview.html +5 -0
- edsl/templates/error_reporting/performance_plot.html +2 -0
- edsl/templates/error_reporting/report.css +74 -0
- edsl/templates/error_reporting/report.html +118 -0
- edsl/templates/error_reporting/report.js +25 -0
- {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/METADATA +4 -2
- edsl-0.1.33.dev2.dist-info/RECORD +289 -0
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
- edsl/utilities/gcp_bucket/simple_example.py +0 -9
- edsl-0.1.33.dev1.dist-info/RECORD +0 -209
- {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/LICENSE +0 -0
- {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/WHEEL +0 -0
@@ -55,6 +55,7 @@ class QuestionTaskCreator(UserList):
|
|
55
55
|
|
56
56
|
"""
|
57
57
|
super().__init__([])
|
58
|
+
# answer_question_func is the 'interview.answer_question_and_record_task" method
|
58
59
|
self.answer_question_func = answer_question_func
|
59
60
|
self.question = question
|
60
61
|
self.iteration = iteration
|
@@ -87,10 +88,10 @@ class QuestionTaskCreator(UserList):
|
|
87
88
|
"""
|
88
89
|
self.append(task)
|
89
90
|
|
90
|
-
def generate_task(self
|
91
|
+
def generate_task(self) -> asyncio.Task:
|
91
92
|
"""Create a task that depends on the passed-in dependencies."""
|
92
93
|
task = asyncio.create_task(
|
93
|
-
self._run_task_async(
|
94
|
+
self._run_task_async(), name=self.question.question_name
|
94
95
|
)
|
95
96
|
task.depends_on = [t.get_name() for t in self]
|
96
97
|
return task
|
@@ -103,7 +104,7 @@ class QuestionTaskCreator(UserList):
|
|
103
104
|
"""Returns the token usage for the task.
|
104
105
|
|
105
106
|
>>> qt = QuestionTaskCreator.example()
|
106
|
-
>>> answers = asyncio.run(qt._run_focal_task(
|
107
|
+
>>> answers = asyncio.run(qt._run_focal_task())
|
107
108
|
>>> qt.token_usage()
|
108
109
|
{'cached_tokens': TokenUsage(from_cache=True, prompt_tokens=0, completion_tokens=0), 'new_tokens': TokenUsage(from_cache=False, prompt_tokens=0, completion_tokens=0)}
|
109
110
|
"""
|
@@ -111,15 +112,15 @@ class QuestionTaskCreator(UserList):
|
|
111
112
|
cached_tokens=self.cached_token_usage, new_tokens=self.new_token_usage
|
112
113
|
)
|
113
114
|
|
114
|
-
async def _run_focal_task(self
|
115
|
+
async def _run_focal_task(self) -> Answers:
|
115
116
|
"""Run the focal task i.e., the question that we are interested in answering.
|
116
117
|
|
117
118
|
It is only called after all the dependency tasks are completed.
|
118
119
|
|
119
120
|
>>> qt = QuestionTaskCreator.example()
|
120
|
-
>>> answers = asyncio.run(qt._run_focal_task(
|
121
|
-
>>> answers
|
122
|
-
'
|
121
|
+
>>> answers = asyncio.run(qt._run_focal_task())
|
122
|
+
>>> answers.answer
|
123
|
+
'This is an example answer'
|
123
124
|
"""
|
124
125
|
|
125
126
|
requested_tokens = self.estimated_tokens()
|
@@ -132,19 +133,19 @@ class QuestionTaskCreator(UserList):
|
|
132
133
|
self.waiting = True
|
133
134
|
self.task_status = TaskStatus.WAITING_FOR_REQUEST_CAPACITY
|
134
135
|
|
135
|
-
await self.
|
136
|
+
await self.requests_bucket.get_tokens(1, cheat_bucket_capacity=True)
|
136
137
|
|
137
138
|
self.task_status = TaskStatus.API_CALL_IN_PROGRESS
|
138
139
|
try:
|
139
140
|
results = await self.answer_question_func(
|
140
|
-
question=self.question,
|
141
|
+
question=self.question, task=None # self
|
141
142
|
)
|
142
143
|
self.task_status = TaskStatus.SUCCESS
|
143
144
|
except Exception as e:
|
144
145
|
self.task_status = TaskStatus.FAILED
|
145
146
|
raise e
|
146
147
|
|
147
|
-
if results.
|
148
|
+
if results.cache_used:
|
148
149
|
self.tokens_bucket.add_tokens(requested_tokens)
|
149
150
|
self.requests_bucket.add_tokens(1)
|
150
151
|
self.from_cache = True
|
@@ -155,17 +156,18 @@ class QuestionTaskCreator(UserList):
|
|
155
156
|
self.tokens_bucket.turbo_mode_off()
|
156
157
|
self.requests_bucket.turbo_mode_off()
|
157
158
|
|
158
|
-
|
159
|
+
# breakpoint()
|
160
|
+
# _ = results.pop("cached_response", None)
|
159
161
|
|
160
|
-
tracker = self.cached_token_usage if self.from_cache else self.new_token_usage
|
162
|
+
# tracker = self.cached_token_usage if self.from_cache else self.new_token_usage
|
161
163
|
|
162
164
|
# TODO: This is hacky. The 'func' call should return an object that definitely has a 'usage' key.
|
163
|
-
usage = results.get("usage", {"prompt_tokens": 0, "completion_tokens": 0})
|
164
|
-
prompt_tokens = usage.get("prompt_tokens", 0)
|
165
|
-
completion_tokens = usage.get("completion_tokens", 0)
|
166
|
-
tracker.add_tokens(
|
167
|
-
|
168
|
-
)
|
165
|
+
# usage = results.get("usage", {"prompt_tokens": 0, "completion_tokens": 0})
|
166
|
+
# prompt_tokens = usage.get("prompt_tokens", 0)
|
167
|
+
# completion_tokens = usage.get("completion_tokens", 0)
|
168
|
+
# tracker.add_tokens(
|
169
|
+
# prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
|
170
|
+
# )
|
169
171
|
|
170
172
|
return results
|
171
173
|
|
@@ -177,8 +179,13 @@ class QuestionTaskCreator(UserList):
|
|
177
179
|
|
178
180
|
m = ModelBuckets.infinity_bucket()
|
179
181
|
|
180
|
-
|
181
|
-
|
182
|
+
from collections import namedtuple
|
183
|
+
|
184
|
+
AnswerDict = namedtuple("AnswerDict", ["answer", "cache_used"])
|
185
|
+
answer = AnswerDict(answer="This is an example answer", cache_used=False)
|
186
|
+
|
187
|
+
async def answer_question_func(question, task):
|
188
|
+
return answer
|
182
189
|
|
183
190
|
return cls(
|
184
191
|
question=QuestionFreeText.example(),
|
@@ -188,7 +195,7 @@ class QuestionTaskCreator(UserList):
|
|
188
195
|
iteration=0,
|
189
196
|
)
|
190
197
|
|
191
|
-
async def _run_task_async(self
|
198
|
+
async def _run_task_async(self) -> None:
|
192
199
|
"""Run the task asynchronously, awaiting the tasks that must be completed before this one can be run.
|
193
200
|
|
194
201
|
>>> qt1 = QuestionTaskCreator.example()
|
@@ -231,8 +238,6 @@ class QuestionTaskCreator(UserList):
|
|
231
238
|
if isinstance(result, Exception):
|
232
239
|
raise result
|
233
240
|
|
234
|
-
return await self._run_focal_task(debug)
|
235
|
-
|
236
241
|
except asyncio.CancelledError:
|
237
242
|
self.task_status = TaskStatus.CANCELLED
|
238
243
|
raise
|
@@ -244,6 +249,8 @@ class QuestionTaskCreator(UserList):
|
|
244
249
|
f"Required tasks failed for {self.question.question_name}"
|
245
250
|
) from e
|
246
251
|
|
252
|
+
return await self._run_focal_task()
|
253
|
+
|
247
254
|
|
248
255
|
if __name__ == "__main__":
|
249
256
|
import doctest
|
edsl/jobs/tasks/TaskHistory.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
-
from edsl.jobs.tasks.task_status_enum import TaskStatus
|
2
1
|
from typing import List, Optional
|
3
2
|
from io import BytesIO
|
3
|
+
import webbrowser
|
4
|
+
import os
|
4
5
|
import base64
|
6
|
+
from importlib import resources
|
7
|
+
from edsl.jobs.tasks.task_status_enum import TaskStatus
|
5
8
|
|
6
9
|
|
7
10
|
class TaskHistory:
|
@@ -30,7 +33,12 @@ class TaskHistory:
|
|
30
33
|
|
31
34
|
from edsl.config import CONFIG
|
32
35
|
|
33
|
-
results = j.run(
|
36
|
+
results = j.run(
|
37
|
+
print_exceptions=False,
|
38
|
+
skip_retry=True,
|
39
|
+
cache=False,
|
40
|
+
raise_validation_errors=True,
|
41
|
+
)
|
34
42
|
|
35
43
|
return cls(results.task_history.total_interviews)
|
36
44
|
|
@@ -188,58 +196,14 @@ class TaskHistory:
|
|
188
196
|
plt.show()
|
189
197
|
|
190
198
|
def css(self):
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
line-height: 1.6;
|
195
|
-
background-color: #f9f9f9;
|
196
|
-
color: #333;
|
197
|
-
margin: 20px;
|
198
|
-
}
|
199
|
-
|
200
|
-
.interview {
|
201
|
-
font-size: 1.5em;
|
202
|
-
margin-bottom: 10px;
|
203
|
-
padding: 10px;
|
204
|
-
background-color: #e3f2fd;
|
205
|
-
border-left: 5px solid #2196f3;
|
206
|
-
}
|
207
|
-
|
208
|
-
.question {
|
209
|
-
font-size: 1.2em;
|
210
|
-
margin-bottom: 10px;
|
211
|
-
padding: 10px;
|
212
|
-
background-color: #fff9c4;
|
213
|
-
border-left: 5px solid #ffeb3b;
|
214
|
-
}
|
215
|
-
|
216
|
-
.exception-detail {
|
217
|
-
margin-bottom: 10px;
|
218
|
-
padding: 10px;
|
219
|
-
background-color: #ffebee;
|
220
|
-
border-left: 5px solid #f44336;
|
221
|
-
}
|
222
|
-
|
223
|
-
.question-detail {
|
224
|
-
border: 3px solid black; /* Adjust the thickness by changing the number */
|
225
|
-
padding: 10px; /* Optional: Adds some padding inside the border */
|
226
|
-
}
|
227
|
-
|
228
|
-
.exception-detail div {
|
229
|
-
margin-bottom: 5px;
|
230
|
-
}
|
199
|
+
env = resources.files("edsl").joinpath("templates/error_reporting")
|
200
|
+
css = env.joinpath("report.css").read_text()
|
201
|
+
return css
|
231
202
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
.exception-time,
|
238
|
-
.exception-traceback {
|
239
|
-
font-style: italic;
|
240
|
-
color: #555;
|
241
|
-
}
|
242
|
-
"""
|
203
|
+
def javascript(self):
|
204
|
+
env = resources.files("edsl").joinpath("templates/error_reporting")
|
205
|
+
js = env.joinpath("report.js").read_text()
|
206
|
+
return js
|
243
207
|
|
244
208
|
@property
|
245
209
|
def exceptions_by_type(self) -> dict:
|
@@ -248,206 +212,136 @@ class TaskHistory:
|
|
248
212
|
for interview in self.total_interviews:
|
249
213
|
for question_name, exceptions in interview.exceptions.items():
|
250
214
|
for exception in exceptions:
|
251
|
-
exception_type = exception
|
215
|
+
exception_type = exception.exception.__class__.__name__
|
216
|
+
# exception_type = exception["exception"]
|
217
|
+
# breakpoint()
|
252
218
|
if exception_type in exceptions_by_type:
|
253
219
|
exceptions_by_type[exception_type] += 1
|
254
220
|
else:
|
255
221
|
exceptions_by_type[exception_type] = 1
|
256
222
|
return exceptions_by_type
|
257
223
|
|
224
|
+
@property
|
225
|
+
def exceptions_by_service(self) -> dict:
|
226
|
+
"""Return a dictionary of exceptions tallied by service."""
|
227
|
+
exceptions_by_service = {}
|
228
|
+
for interview in self.total_interviews:
|
229
|
+
service = interview.model._inference_service_
|
230
|
+
if service not in exceptions_by_service:
|
231
|
+
exceptions_by_service[service] = 0
|
232
|
+
if interview.exceptions != {}:
|
233
|
+
exceptions_by_service[service] += len(interview.exceptions)
|
234
|
+
return exceptions_by_service
|
235
|
+
|
258
236
|
@property
|
259
237
|
def exceptions_by_question_name(self) -> dict:
|
260
238
|
"""Return a dictionary of exceptions tallied by question name."""
|
261
239
|
exceptions_by_question_name = {}
|
262
240
|
for interview in self.total_interviews:
|
263
241
|
for question_name, exceptions in interview.exceptions.items():
|
264
|
-
|
265
|
-
|
266
|
-
|
242
|
+
question_type = interview.survey.get_question(
|
243
|
+
question_name
|
244
|
+
).question_type
|
245
|
+
# breakpoint()
|
246
|
+
if (question_name, question_type) not in exceptions_by_question_name:
|
247
|
+
exceptions_by_question_name[(question_name, question_type)] = 0
|
248
|
+
exceptions_by_question_name[(question_name, question_type)] += len(
|
249
|
+
exceptions
|
250
|
+
)
|
267
251
|
|
268
252
|
for question in self.total_interviews[0].survey.questions:
|
269
|
-
if
|
270
|
-
|
271
|
-
|
253
|
+
if (
|
254
|
+
question.question_name,
|
255
|
+
question.question_type,
|
256
|
+
) not in exceptions_by_question_name:
|
257
|
+
exceptions_by_question_name[
|
258
|
+
(question.question_name, question.question_type)
|
259
|
+
] = 0
|
260
|
+
|
261
|
+
sorted_exceptions_by_question_name = {
|
262
|
+
k: v
|
263
|
+
for k, v in sorted(
|
264
|
+
exceptions_by_question_name.items(),
|
265
|
+
key=lambda item: item[1],
|
266
|
+
reverse=True,
|
267
|
+
)
|
268
|
+
}
|
269
|
+
return sorted_exceptions_by_question_name
|
272
270
|
|
273
271
|
@property
|
274
272
|
def exceptions_by_model(self) -> dict:
|
275
273
|
"""Return a dictionary of exceptions tallied by model and question name."""
|
276
274
|
exceptions_by_model = {}
|
277
275
|
for interview in self.total_interviews:
|
278
|
-
model = interview.model
|
279
|
-
|
280
|
-
|
276
|
+
model = interview.model.model
|
277
|
+
service = interview.model._inference_service_
|
278
|
+
if (service, model) not in exceptions_by_model:
|
279
|
+
exceptions_by_model[(service, model)] = 0
|
281
280
|
if interview.exceptions != {}:
|
282
|
-
exceptions_by_model[model
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
"""Return an HTML report."""
|
293
|
-
|
294
|
-
from IPython.display import display, HTML
|
295
|
-
import tempfile
|
296
|
-
import os
|
297
|
-
from edsl.utilities.utilities import is_notebook
|
298
|
-
from jinja2 import Template
|
281
|
+
exceptions_by_model[(service, model)] += len(interview.exceptions)
|
282
|
+
|
283
|
+
# sort the exceptions by model
|
284
|
+
sorted_exceptions_by_model = {
|
285
|
+
k: v
|
286
|
+
for k, v in sorted(
|
287
|
+
exceptions_by_model.items(), key=lambda item: item[1], reverse=True
|
288
|
+
)
|
289
|
+
}
|
290
|
+
return sorted_exceptions_by_model
|
299
291
|
|
292
|
+
def generate_html_report(self, css: Optional[str]):
|
300
293
|
performance_plot_html = self.plot(num_periods=100, get_embedded_html=True)
|
301
294
|
|
302
295
|
if css is None:
|
303
296
|
css = self.css()
|
304
297
|
|
305
|
-
models_used = set([i.model for index, i in self._interviews.items()])
|
298
|
+
models_used = set([i.model.model for index, i in self._interviews.items()])
|
306
299
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
<head>
|
312
|
-
<meta charset="UTF-8">
|
313
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
314
|
-
<title>Exception Details</title>
|
315
|
-
<style>
|
316
|
-
{{ css }}
|
317
|
-
</style>
|
318
|
-
</head>
|
319
|
-
<body>
|
320
|
-
<h1>Overview</h1>
|
321
|
-
<p>There were {{ interviews|length }} total interviews. The number of interviews with exceptions was {{ num_exceptions }}.</p>
|
322
|
-
<p>The models used were: {{ models_used }}.</p>
|
323
|
-
<p>For documentation on dealing with exceptions on Expected Parrot,
|
324
|
-
see <a href="https://docs.expectedparrot.com/en/latest/exceptions.html">here</a>.</p>
|
325
|
-
|
326
|
-
<h2>Exceptions by Type</h2>
|
327
|
-
<table>
|
328
|
-
<thead>
|
329
|
-
<tr>
|
330
|
-
<th>Exception Type</th>
|
331
|
-
<th>Number</th>
|
332
|
-
</tr>
|
333
|
-
</thead>
|
334
|
-
<tbody>
|
335
|
-
{% for exception_type, exceptions in exceptions_by_type.items() %}
|
336
|
-
<tr>
|
337
|
-
<td>{{ exception_type }}</td>
|
338
|
-
<td>{{ exceptions }}</td>
|
339
|
-
</tr>
|
340
|
-
{% endfor %}
|
341
|
-
</tbody>
|
342
|
-
</table>
|
343
|
-
|
344
|
-
|
345
|
-
<h2>Exceptions by Model</h2>
|
346
|
-
<table>
|
347
|
-
<thead>
|
348
|
-
<tr>
|
349
|
-
<th>Model</th>
|
350
|
-
<th>Number</th>
|
351
|
-
</tr>
|
352
|
-
</thead>
|
353
|
-
<tbody>
|
354
|
-
{% for model, exceptions in exceptions_by_model.items() %}
|
355
|
-
<tr>
|
356
|
-
<td>{{ model }}</td>
|
357
|
-
<td>{{ exceptions }}</td>
|
358
|
-
</tr>
|
359
|
-
{% endfor %}
|
360
|
-
</tbody>
|
361
|
-
</table>
|
362
|
-
|
363
|
-
|
364
|
-
<h2>Exceptions by Question Name</h2>
|
365
|
-
<table>
|
366
|
-
<thead>
|
367
|
-
<tr>
|
368
|
-
<th>Question Name</th>
|
369
|
-
<th>Number of Exceptions</th>
|
370
|
-
</tr>
|
371
|
-
</thead>
|
372
|
-
<tbody>
|
373
|
-
{% for question_name, exception_count in exceptions_by_question_name.items() %}
|
374
|
-
<tr>
|
375
|
-
<td>{{ question_name }}</td>
|
376
|
-
<td>{{ exception_count }}</td>
|
377
|
-
</tr>
|
378
|
-
{% endfor %}
|
379
|
-
</tbody>
|
380
|
-
</table>
|
381
|
-
|
382
|
-
|
383
|
-
{% for index, interview in interviews.items() %}
|
384
|
-
{% if interview.exceptions != {} %}
|
385
|
-
<div class="interview">Interview: {{ index }} </div>
|
386
|
-
<h1>Failing questions</h1>
|
387
|
-
{% endif %}
|
388
|
-
{% for question, exceptions in interview.exceptions.items() %}
|
389
|
-
<div class="question">question_name: {{ question }}</div>
|
390
|
-
|
391
|
-
<h2>Question</h2>
|
392
|
-
<div class="question-detail">
|
393
|
-
{{ interview.survey.get_question(question).html(interview.scenario,interview.agent) }}
|
394
|
-
</div>
|
395
|
-
|
396
|
-
<h2>Scenario</h2>
|
397
|
-
<div class="scenario">
|
398
|
-
{{ interview.scenario._repr_html_() }}
|
399
|
-
</div>
|
400
|
-
|
401
|
-
<h2>Agent</h2>
|
402
|
-
<div class="agent">
|
403
|
-
{{ interview.agent._repr_html_() }}
|
404
|
-
</div>
|
405
|
-
|
406
|
-
<h2>Model</h2>
|
407
|
-
<div class="model">
|
408
|
-
{{ interview.model._repr_html_() }}
|
409
|
-
</div>
|
410
|
-
|
411
|
-
<h2>Exception details</h2>
|
412
|
-
|
413
|
-
{% for exception_message in exceptions %}
|
414
|
-
<div class="exception-detail">
|
415
|
-
<div class="exception-exception">Exception: {{ exception_message.exception }}</div>
|
416
|
-
<div class="exception-time">Time: {{ exception_message.time }}</div>
|
417
|
-
<div class="exception-traceback">Traceback: <pre>{{ exception_message.traceback }} </pre></div>
|
418
|
-
</div>
|
419
|
-
{% endfor %}
|
420
|
-
{% endfor %}
|
421
|
-
{% endfor %}
|
422
|
-
|
423
|
-
<h1>Performance Plot</h1>
|
424
|
-
{{ performance_plot_html }}
|
425
|
-
</body>
|
426
|
-
</html>
|
427
|
-
"""
|
428
|
-
)
|
300
|
+
from jinja2 import Environment, FileSystemLoader
|
301
|
+
from edsl.TemplateLoader import TemplateLoader
|
302
|
+
|
303
|
+
env = Environment(loader=TemplateLoader("edsl", "templates/error_reporting"))
|
429
304
|
|
430
|
-
#
|
305
|
+
# Load and render a template
|
306
|
+
template = env.get_template("base.html")
|
307
|
+
# rendered_template = template.render(your_data=your_data)
|
431
308
|
|
432
309
|
# Render the template with data
|
433
310
|
output = template.render(
|
434
311
|
interviews=self._interviews,
|
435
312
|
css=css,
|
313
|
+
javascript=self.javascript(),
|
436
314
|
num_exceptions=len(self.exceptions),
|
437
315
|
performance_plot_html=performance_plot_html,
|
438
316
|
exceptions_by_type=self.exceptions_by_type,
|
439
317
|
exceptions_by_question_name=self.exceptions_by_question_name,
|
440
318
|
exceptions_by_model=self.exceptions_by_model,
|
319
|
+
exceptions_by_service=self.exceptions_by_service,
|
441
320
|
models_used=models_used,
|
442
321
|
)
|
322
|
+
return output
|
323
|
+
|
324
|
+
def html(
|
325
|
+
self,
|
326
|
+
filename: Optional[str] = None,
|
327
|
+
return_link=False,
|
328
|
+
css=None,
|
329
|
+
cta="Open Report in New Tab",
|
330
|
+
open_in_browser=True,
|
331
|
+
):
|
332
|
+
"""Return an HTML report."""
|
333
|
+
|
334
|
+
from IPython.display import display, HTML
|
335
|
+
import tempfile
|
336
|
+
import os
|
337
|
+
from edsl.utilities.utilities import is_notebook
|
338
|
+
|
339
|
+
output = self.generate_html_report(css)
|
443
340
|
|
444
341
|
# Save the rendered output to a file
|
445
342
|
with open("output.html", "w") as f:
|
446
343
|
f.write(output)
|
447
344
|
|
448
|
-
if css is None:
|
449
|
-
css = self.css()
|
450
|
-
|
451
345
|
if filename is None:
|
452
346
|
current_directory = os.getcwd()
|
453
347
|
filename = tempfile.NamedTemporaryFile(
|
@@ -456,10 +350,7 @@ class TaskHistory:
|
|
456
350
|
|
457
351
|
with open(filename, "w") as f:
|
458
352
|
with open(filename, "w") as f:
|
459
|
-
# f.write(html_header)
|
460
|
-
# f.write(self._repr_html_())
|
461
353
|
f.write(output)
|
462
|
-
# f.write(html_footer)
|
463
354
|
|
464
355
|
if is_notebook():
|
465
356
|
import html
|
@@ -472,17 +363,44 @@ class TaskHistory:
|
|
472
363
|
<iframe srcdoc="{ escaped_output }" style="width: 800px; height: 600px;"></iframe>
|
473
364
|
"""
|
474
365
|
display(HTML(iframe))
|
475
|
-
# display(HTML(output))
|
476
366
|
else:
|
477
367
|
print(f"Exception report saved to {filename}")
|
478
|
-
import webbrowser
|
479
|
-
import os
|
480
368
|
|
369
|
+
if open_in_browser:
|
481
370
|
webbrowser.open(f"file://{os.path.abspath(filename)}")
|
482
371
|
|
483
372
|
if return_link:
|
484
373
|
return filename
|
485
374
|
|
375
|
+
def notebook(self):
|
376
|
+
"""Create a notebook with the HTML content embedded in the first cell, then delete the cell content while keeping the output."""
|
377
|
+
from nbformat import v4 as nbf
|
378
|
+
from nbconvert.preprocessors import ExecutePreprocessor
|
379
|
+
import nbformat
|
380
|
+
import os
|
381
|
+
|
382
|
+
# Use the existing html method to generate the HTML content
|
383
|
+
output_html = self.generate_html_report(css=None)
|
384
|
+
nb = nbf.new_notebook()
|
385
|
+
|
386
|
+
# Add a code cell that renders the HTML content
|
387
|
+
code_cell = nbf.new_code_cell(
|
388
|
+
f"""
|
389
|
+
from IPython.display import HTML, display
|
390
|
+
display(HTML('''{output_html}'''))
|
391
|
+
"""
|
392
|
+
)
|
393
|
+
nb.cells.append(code_cell)
|
394
|
+
|
395
|
+
# Execute the notebook
|
396
|
+
ep = ExecutePreprocessor(timeout=600, kernel_name="python3")
|
397
|
+
ep.preprocess(nb, {"metadata": {"path": os.getcwd()}})
|
398
|
+
|
399
|
+
# After execution, clear the cell's source code
|
400
|
+
nb.cells[0].source = ""
|
401
|
+
|
402
|
+
return nb
|
403
|
+
|
486
404
|
|
487
405
|
if __name__ == "__main__":
|
488
406
|
import doctest
|