edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +413 -332
- edsl/BaseDiff.py +260 -260
- edsl/TemplateLoader.py +24 -24
- edsl/__init__.py +57 -49
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +1071 -867
- edsl/agents/AgentList.py +551 -413
- edsl/agents/Invigilator.py +284 -233
- edsl/agents/InvigilatorBase.py +257 -270
- edsl/agents/PromptConstructor.py +272 -354
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +2 -3
- edsl/agents/descriptors.py +99 -99
- edsl/agents/prompt_helpers.py +129 -129
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +130 -117
- edsl/auto/StageBase.py +243 -230
- edsl/auto/StageGenerateSurvey.py +178 -178
- edsl/auto/StageLabelQuestions.py +125 -125
- edsl/auto/StagePersona.py +61 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
- edsl/auto/StagePersonaDimensionValues.py +74 -74
- edsl/auto/StagePersonaDimensions.py +69 -69
- edsl/auto/StageQuestions.py +74 -73
- edsl/auto/SurveyCreatorPipeline.py +21 -21
- edsl/auto/utilities.py +218 -224
- edsl/base/Base.py +279 -279
- edsl/config.py +177 -157
- edsl/conversation/Conversation.py +290 -290
- edsl/conversation/car_buying.py +59 -58
- edsl/conversation/chips.py +95 -95
- edsl/conversation/mug_negotiation.py +81 -81
- edsl/conversation/next_speaker_utilities.py +93 -93
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +54 -54
- edsl/coop/__init__.py +2 -2
- edsl/coop/coop.py +1106 -1028
- edsl/coop/utils.py +131 -131
- edsl/data/Cache.py +573 -555
- edsl/data/CacheEntry.py +230 -233
- edsl/data/CacheHandler.py +168 -149
- edsl/data/RemoteCacheSync.py +186 -78
- edsl/data/SQLiteDict.py +292 -292
- edsl/data/__init__.py +5 -4
- edsl/data/orm.py +10 -10
- edsl/data_transfer_models.py +74 -73
- edsl/enums.py +202 -175
- edsl/exceptions/BaseException.py +21 -21
- edsl/exceptions/__init__.py +54 -54
- edsl/exceptions/agents.py +54 -42
- edsl/exceptions/cache.py +5 -5
- edsl/exceptions/configuration.py +16 -16
- edsl/exceptions/coop.py +10 -10
- edsl/exceptions/data.py +14 -14
- edsl/exceptions/general.py +34 -34
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/jobs.py +33 -33
- edsl/exceptions/language_models.py +63 -63
- edsl/exceptions/prompts.py +15 -15
- edsl/exceptions/questions.py +109 -91
- edsl/exceptions/results.py +29 -29
- edsl/exceptions/scenarios.py +29 -22
- edsl/exceptions/surveys.py +37 -37
- edsl/inference_services/AnthropicService.py +106 -87
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +118 -120
- edsl/inference_services/AzureAI.py +215 -217
- edsl/inference_services/DeepInfraService.py +18 -18
- edsl/inference_services/GoogleService.py +143 -148
- edsl/inference_services/GroqService.py +20 -20
- edsl/inference_services/InferenceServiceABC.py +80 -147
- edsl/inference_services/InferenceServicesCollection.py +138 -97
- edsl/inference_services/MistralAIService.py +120 -123
- edsl/inference_services/OllamaService.py +18 -18
- edsl/inference_services/OpenAIService.py +236 -224
- edsl/inference_services/PerplexityService.py +160 -163
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +90 -89
- edsl/inference_services/TogetherAIService.py +172 -170
- edsl/inference_services/data_structures.py +134 -0
- edsl/inference_services/models_available_cache.py +118 -118
- edsl/inference_services/rate_limits_cache.py +25 -25
- edsl/inference_services/registry.py +41 -41
- edsl/inference_services/write_available.py +10 -10
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +43 -56
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +823 -898
- edsl/jobs/JobsChecks.py +172 -147
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +270 -268
- edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/__init__.py +1 -1
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +104 -63
- edsl/jobs/buckets/ModelBuckets.py +65 -65
- edsl/jobs/buckets/TokenBucket.py +283 -251
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +396 -661
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
- edsl/jobs/interviews/InterviewStatistic.py +63 -63
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
- edsl/jobs/interviews/InterviewStatusLog.py +92 -92
- edsl/jobs/interviews/ReportErrors.py +66 -66
- edsl/jobs/interviews/interview_status_enum.py +9 -9
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
- edsl/jobs/runners/JobsRunnerStatus.py +297 -330
- edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
- edsl/jobs/tasks/TaskCreators.py +64 -64
- edsl/jobs/tasks/TaskHistory.py +470 -450
- edsl/jobs/tasks/TaskStatusLog.py +23 -23
- edsl/jobs/tasks/task_status_enum.py +161 -163
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
- edsl/jobs/tokens/TokenUsage.py +34 -34
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +626 -668
- edsl/language_models/ModelList.py +164 -155
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +2 -3
- edsl/language_models/fake_openai_call.py +15 -15
- edsl/language_models/fake_openai_service.py +61 -61
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +156 -156
- edsl/language_models/utilities.py +65 -64
- edsl/notebooks/Notebook.py +263 -258
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/notebooks/__init__.py +1 -1
- edsl/prompts/Prompt.py +352 -362
- edsl/prompts/__init__.py +2 -2
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +518 -664
- edsl/questions/QuestionBasePromptsMixin.py +221 -217
- edsl/questions/QuestionBudget.py +227 -227
- edsl/questions/QuestionCheckBox.py +359 -359
- edsl/questions/QuestionExtract.py +180 -182
- edsl/questions/QuestionFreeText.py +113 -114
- edsl/questions/QuestionFunctional.py +166 -166
- edsl/questions/QuestionList.py +223 -231
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +330 -286
- edsl/questions/QuestionNumerical.py +151 -153
- edsl/questions/QuestionRank.py +314 -324
- edsl/questions/Quick.py +41 -41
- edsl/questions/SimpleAskMixin.py +74 -73
- edsl/questions/__init__.py +27 -26
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
- edsl/questions/compose_questions.py +98 -98
- edsl/questions/data_structures.py +20 -0
- edsl/questions/decorators.py +21 -21
- edsl/questions/derived/QuestionLikertFive.py +76 -76
- edsl/questions/derived/QuestionLinearScale.py +90 -87
- edsl/questions/derived/QuestionTopK.py +93 -93
- edsl/questions/derived/QuestionYesNo.py +82 -82
- edsl/questions/descriptors.py +427 -413
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/prompt_templates/question_budget.jinja +13 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
- edsl/questions/prompt_templates/question_extract.jinja +11 -11
- edsl/questions/prompt_templates/question_free_text.jinja +3 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
- edsl/questions/prompt_templates/question_list.jinja +17 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
- edsl/questions/prompt_templates/question_numerical.jinja +36 -36
- edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
- edsl/questions/question_registry.py +177 -177
- edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
- edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/settings.py +12 -12
- edsl/questions/templates/budget/answering_instructions.jinja +7 -7
- edsl/questions/templates/budget/question_presentation.jinja +7 -7
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
- edsl/questions/templates/extract/answering_instructions.jinja +7 -7
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
- edsl/questions/templates/list/answering_instructions.jinja +3 -3
- edsl/questions/templates/list/question_presentation.jinja +5 -5
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
- edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
- edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
- edsl/questions/templates/numerical/question_presentation.jinja +6 -6
- edsl/questions/templates/rank/answering_instructions.jinja +11 -11
- edsl/questions/templates/rank/question_presentation.jinja +15 -15
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
- edsl/questions/templates/top_k/question_presentation.jinja +22 -22
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
- edsl/results/CSSParameterizer.py +108 -108
- edsl/results/Dataset.py +587 -424
- edsl/results/DatasetExportMixin.py +594 -731
- edsl/results/DatasetTree.py +295 -275
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +557 -465
- edsl/results/Results.py +1183 -1165
- edsl/results/ResultsExportMixin.py +45 -43
- edsl/results/ResultsGGMixin.py +121 -121
- edsl/results/TableDisplay.py +125 -198
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +2 -2
- edsl/results/file_exports.py +252 -0
- edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
- edsl/results/{Selector.py → results_selector.py} +145 -135
- edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_display.css +77 -77
- edsl/results/table_renderers.py +118 -0
- edsl/results/tree_explore.py +115 -115
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +511 -632
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +498 -601
- edsl/scenarios/ScenarioHtmlMixin.py +65 -64
- edsl/scenarios/ScenarioList.py +1458 -1287
- edsl/scenarios/ScenarioListExportMixin.py +45 -52
- edsl/scenarios/ScenarioListPdfMixin.py +239 -261
- edsl/scenarios/__init__.py +3 -4
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +38 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/shared.py +1 -1
- edsl/study/ObjectEntry.py +173 -173
- edsl/study/ProofOfWork.py +113 -113
- edsl/study/SnapShot.py +80 -80
- edsl/study/Study.py +521 -528
- edsl/study/__init__.py +4 -4
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/DAG.py +148 -148
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/Memory.py +31 -31
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/MemoryPlan.py +244 -244
- edsl/surveys/Rule.py +327 -326
- edsl/surveys/RuleCollection.py +385 -387
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +1280 -1801
- edsl/surveys/SurveyCSS.py +273 -261
- edsl/surveys/SurveyExportMixin.py +259 -259
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
- edsl/surveys/SurveyQualtricsImport.py +284 -284
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +5 -3
- edsl/surveys/base.py +53 -53
- edsl/surveys/descriptors.py +60 -56
- edsl/surveys/instructions/ChangeInstruction.py +48 -49
- edsl/surveys/instructions/Instruction.py +56 -65
- edsl/surveys/instructions/InstructionCollection.py +82 -77
- edsl/templates/error_reporting/base.html +23 -23
- edsl/templates/error_reporting/exceptions_by_model.html +34 -34
- edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
- edsl/templates/error_reporting/exceptions_by_type.html +16 -16
- edsl/templates/error_reporting/interview_details.html +115 -115
- edsl/templates/error_reporting/interviews.html +19 -19
- edsl/templates/error_reporting/overview.html +4 -4
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +73 -73
- edsl/templates/error_reporting/report.html +117 -117
- edsl/templates/error_reporting/report.js +25 -25
- edsl/tools/__init__.py +1 -1
- edsl/tools/clusters.py +192 -192
- edsl/tools/embeddings.py +27 -27
- edsl/tools/embeddings_plotting.py +118 -118
- edsl/tools/plotting.py +112 -112
- edsl/tools/summarize.py +18 -18
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/SystemInfo.py +28 -28
- edsl/utilities/__init__.py +22 -22
- edsl/utilities/ast_utilities.py +25 -25
- edsl/utilities/data/Registry.py +6 -6
- edsl/utilities/data/__init__.py +1 -1
- edsl/utilities/data/scooter_results.json +1 -1
- edsl/utilities/decorators.py +77 -77
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
- edsl/utilities/interface.py +627 -627
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/naming_utilities.py +263 -263
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/repair_functions.py +28 -28
- edsl/utilities/restricted_python.py +70 -70
- edsl/utilities/utilities.py +436 -424
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/LICENSE +21 -21
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/METADATA +13 -11
- edsl-0.1.39.dev5.dist-info/RECORD +358 -0
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/WHEEL +1 -1
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/registry.py +0 -190
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.39.dev3.dist-info/RECORD +0 -277
edsl/results/Result.py
CHANGED
@@ -1,465 +1,557 @@
|
|
1
|
-
# """This module contains the Result class, which captures the result of one interview."""
|
2
|
-
from __future__ import annotations
|
3
|
-
|
4
|
-
from
|
5
|
-
from
|
6
|
-
from edsl.Base import Base
|
7
|
-
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
self.
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
self.
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
@
|
153
|
-
def
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
"
|
186
|
-
"
|
187
|
-
"iteration"
|
188
|
-
"
|
189
|
-
"
|
190
|
-
"
|
191
|
-
"comment": self.comments_dict,
|
192
|
-
"generated_tokens": self.generated_tokens,
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
return self.
|
211
|
-
|
212
|
-
def
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
def
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
def
|
248
|
-
"""Return the value
|
249
|
-
|
250
|
-
>>> r = Result.example()
|
251
|
-
>>> r.
|
252
|
-
'OK'
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
for data_type
|
293
|
-
for key
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
{
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
>>> r
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
return value
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
prompt_obj
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
from
|
384
|
-
|
385
|
-
from edsl.
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
1
|
+
# """This module contains the Result class, which captures the result of one interview."""
|
2
|
+
from __future__ import annotations
|
3
|
+
import inspect
|
4
|
+
from collections import UserDict
|
5
|
+
from typing import Any, Type, Callable, Optional, TYPE_CHECKING, Union
|
6
|
+
from edsl.Base import Base
|
7
|
+
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from edsl.agents.Agent import Agent
|
11
|
+
from edsl.scenarios.Scenario import Scenario
|
12
|
+
from edsl.language_models.LanguageModel import LanguageModel
|
13
|
+
from edsl.prompts.Prompt import Prompt
|
14
|
+
from edsl.surveys.Survey import Survey
|
15
|
+
|
16
|
+
|
17
|
+
QuestionName = str
|
18
|
+
AnswerValue = Any
|
19
|
+
|
20
|
+
|
21
|
+
class AgentNamer:
|
22
|
+
"""Maintains a registry of agent names to ensure unique naming."""
|
23
|
+
|
24
|
+
def __init__(self):
|
25
|
+
self._registry = {}
|
26
|
+
|
27
|
+
def get_name(self, agent: "Agent") -> str:
|
28
|
+
"""Get or create a unique name for an agent."""
|
29
|
+
agent_id = id(agent)
|
30
|
+
if agent_id not in self._registry:
|
31
|
+
self._registry[agent_id] = f"Agent_{len(self._registry)}"
|
32
|
+
return self._registry[agent_id]
|
33
|
+
|
34
|
+
|
35
|
+
# Global instance for agent naming
|
36
|
+
agent_namer = AgentNamer().get_name
|
37
|
+
|
38
|
+
|
39
|
+
class Result(Base, UserDict):
|
40
|
+
"""
|
41
|
+
This class captures the result of one interview.
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(
|
45
|
+
self,
|
46
|
+
agent: "Agent",
|
47
|
+
scenario: "Scenario",
|
48
|
+
model: "LanguageModel",
|
49
|
+
iteration: int,
|
50
|
+
answer: dict[QuestionName, AnswerValue],
|
51
|
+
prompt: dict[QuestionName, str] = None,
|
52
|
+
raw_model_response: Optional[dict] = None,
|
53
|
+
survey: Optional["Survey"] = None,
|
54
|
+
question_to_attributes: Optional[dict[QuestionName, Any]] = None,
|
55
|
+
generated_tokens: Optional[dict] = None,
|
56
|
+
comments_dict: Optional[dict] = None,
|
57
|
+
cache_used_dict: Optional[dict[QuestionName, bool]] = None,
|
58
|
+
indices: Optional[dict] = None,
|
59
|
+
):
|
60
|
+
"""Initialize a Result object.
|
61
|
+
|
62
|
+
:param agent: The Agent object.
|
63
|
+
:param scenario: The Scenario object.
|
64
|
+
:param model: The LanguageModel object.
|
65
|
+
:param iteration: The iteration number.
|
66
|
+
:param answer: The answer string.
|
67
|
+
:param prompt: A dictionary of prompts.
|
68
|
+
:param raw_model_response: The raw model response.
|
69
|
+
:param survey: The Survey object.
|
70
|
+
:param question_to_attributes: A dictionary of question attributes.
|
71
|
+
:param generated_tokens: A dictionary of generated tokens.
|
72
|
+
:param comments_dict: A dictionary of comments.
|
73
|
+
:param cache_used_dict: A dictionary of cache usage.
|
74
|
+
:param indices: A dictionary of indices.
|
75
|
+
|
76
|
+
"""
|
77
|
+
self.question_to_attributes = (
|
78
|
+
question_to_attributes or self._create_question_to_attributes(survey)
|
79
|
+
)
|
80
|
+
|
81
|
+
data = {
|
82
|
+
"agent": agent,
|
83
|
+
"scenario": scenario,
|
84
|
+
"model": model,
|
85
|
+
"iteration": iteration,
|
86
|
+
"answer": answer,
|
87
|
+
"prompt": prompt or {},
|
88
|
+
"raw_model_response": raw_model_response or {},
|
89
|
+
"question_to_attributes": question_to_attributes,
|
90
|
+
"generated_tokens": generated_tokens or {},
|
91
|
+
"comments_dict": comments_dict or {},
|
92
|
+
"cache_used_dict": cache_used_dict or {},
|
93
|
+
}
|
94
|
+
super().__init__(**data)
|
95
|
+
self.indices = indices
|
96
|
+
self._sub_dicts = self._construct_sub_dicts()
|
97
|
+
(
|
98
|
+
self._combined_dict,
|
99
|
+
self._problem_keys,
|
100
|
+
) = self._compute_combined_dict_and_problem_keys()
|
101
|
+
|
102
|
+
@staticmethod
|
103
|
+
def _create_question_to_attributes(survey):
|
104
|
+
"""Create a dictionary of question attributes."""
|
105
|
+
if survey is None:
|
106
|
+
return {}
|
107
|
+
return {
|
108
|
+
q.question_name: {
|
109
|
+
"question_text": q.question_text,
|
110
|
+
"question_type": q.question_type,
|
111
|
+
"question_options": (
|
112
|
+
None if not hasattr(q, "question_options") else q.question_options
|
113
|
+
),
|
114
|
+
}
|
115
|
+
for q in survey.questions
|
116
|
+
}
|
117
|
+
|
118
|
+
@property
|
119
|
+
def agent(self) -> "Agent":
|
120
|
+
"""Return the Agent object."""
|
121
|
+
return self.data["agent"]
|
122
|
+
|
123
|
+
@property
|
124
|
+
def scenario(self) -> "Scenario":
|
125
|
+
"""Return the Scenario object."""
|
126
|
+
return self.data["scenario"]
|
127
|
+
|
128
|
+
@property
|
129
|
+
def model(self) -> "LanguageModel":
|
130
|
+
"""Return the LanguageModel object."""
|
131
|
+
return self.data["model"]
|
132
|
+
|
133
|
+
@property
|
134
|
+
def answer(self) -> dict[QuestionName, AnswerValue]:
|
135
|
+
"""Return the answers."""
|
136
|
+
return self.data["answer"]
|
137
|
+
|
138
|
+
@staticmethod
|
139
|
+
def _create_agent_sub_dict(agent) -> dict:
|
140
|
+
"""Create a dictionary of agent details"""
|
141
|
+
if agent.name is None:
|
142
|
+
agent_name = agent_namer(agent)
|
143
|
+
else:
|
144
|
+
agent_name = agent.name
|
145
|
+
|
146
|
+
return {
|
147
|
+
"agent": agent.traits
|
148
|
+
| {"agent_name": agent_name}
|
149
|
+
| {"agent_instruction": agent.instruction},
|
150
|
+
}
|
151
|
+
|
152
|
+
@staticmethod
|
153
|
+
def _create_model_sub_dict(model) -> dict:
|
154
|
+
return {
|
155
|
+
"model": model.parameters | {"model": model.model},
|
156
|
+
}
|
157
|
+
|
158
|
+
@staticmethod
|
159
|
+
def _iteration_sub_dict(iteration) -> dict:
|
160
|
+
return {
|
161
|
+
"iteration": {"iteration": iteration},
|
162
|
+
}
|
163
|
+
|
164
|
+
def _construct_sub_dicts(self) -> dict[str, dict]:
|
165
|
+
"""Construct a dictionary of sub-dictionaries for the Result object."""
|
166
|
+
sub_dicts_needing_new_keys = {
|
167
|
+
"question_text": {},
|
168
|
+
"question_options": {},
|
169
|
+
"question_type": {},
|
170
|
+
}
|
171
|
+
|
172
|
+
for question_name in self.data["answer"]:
|
173
|
+
if question_name in self.question_to_attributes:
|
174
|
+
for dictionary_name in sub_dicts_needing_new_keys:
|
175
|
+
new_key = question_name + "_" + dictionary_name
|
176
|
+
sub_dicts_needing_new_keys[dictionary_name][new_key] = (
|
177
|
+
self.question_to_attributes[question_name][dictionary_name]
|
178
|
+
)
|
179
|
+
|
180
|
+
new_cache_dict = {
|
181
|
+
f"{k}_cache_used": v for k, v in self.data["cache_used_dict"].items()
|
182
|
+
}
|
183
|
+
|
184
|
+
d = {
|
185
|
+
**self._create_agent_sub_dict(self.data["agent"]),
|
186
|
+
**self._create_model_sub_dict(self.data["model"]),
|
187
|
+
**self._iteration_sub_dict(self.data["iteration"]),
|
188
|
+
"scenario": self.data["scenario"],
|
189
|
+
"answer": self.data["answer"],
|
190
|
+
"prompt": self.data["prompt"],
|
191
|
+
"comment": self.data["comments_dict"],
|
192
|
+
"generated_tokens": self.data["generated_tokens"],
|
193
|
+
"raw_model_response": self.data["raw_model_response"],
|
194
|
+
"question_text": sub_dicts_needing_new_keys["question_text"],
|
195
|
+
"question_options": sub_dicts_needing_new_keys["question_options"],
|
196
|
+
"question_type": sub_dicts_needing_new_keys["question_type"],
|
197
|
+
"cache_used": new_cache_dict,
|
198
|
+
}
|
199
|
+
if hasattr(self, "indices") and self.indices is not None:
|
200
|
+
d["agent"].update({"agent_index": self.indices["agent"]})
|
201
|
+
d["scenario"].update({"scenario_index": self.indices["scenario"]})
|
202
|
+
d["model"].update({"model_index": self.indices["model"]})
|
203
|
+
return d
|
204
|
+
|
205
|
+
@property
|
206
|
+
def sub_dicts(self) -> dict[str, dict]:
|
207
|
+
"""Return a dictionary where keys are strings for each of the main class attributes/objects."""
|
208
|
+
if self._sub_dicts is None:
|
209
|
+
self._sub_dicts = self._construct_sub_dicts()
|
210
|
+
return self._sub_dicts
|
211
|
+
|
212
|
+
def check_expression(self, expression: str) -> None:
|
213
|
+
for key in self.problem_keys:
|
214
|
+
if key in expression and not key + "." in expression:
|
215
|
+
raise ValueError(
|
216
|
+
f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead."
|
217
|
+
)
|
218
|
+
return None
|
219
|
+
|
220
|
+
def code(self):
|
221
|
+
"""Return a string of code that can be used to recreate the Result object."""
|
222
|
+
raise NotImplementedError
|
223
|
+
|
224
|
+
@property
|
225
|
+
def problem_keys(self) -> list[str]:
|
226
|
+
"""Return a list of keys that are problematic."""
|
227
|
+
return self._problem_keys
|
228
|
+
|
229
|
+
def _compute_combined_dict_and_problem_keys(
|
230
|
+
self,
|
231
|
+
) -> tuple[dict[str, Any], list[str]]:
|
232
|
+
combined = {}
|
233
|
+
problem_keys = []
|
234
|
+
for key, sub_dict in self.sub_dicts.items():
|
235
|
+
combined.update(sub_dict)
|
236
|
+
# in some cases, the sub_dict might have keys that conflict with the main dict
|
237
|
+
if key in combined:
|
238
|
+
# The key is already in the combined dict
|
239
|
+
problem_keys = problem_keys + [key]
|
240
|
+
|
241
|
+
combined.update({key: sub_dict})
|
242
|
+
# I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
|
243
|
+
# dot notation to access the subdicts.
|
244
|
+
return combined, problem_keys
|
245
|
+
|
246
|
+
@property
|
247
|
+
def combined_dict(self) -> dict[str, Any]:
|
248
|
+
"""Return a dictionary that includes all sub_dicts, but also puts the key-value pairs in each sub_dict as a key_value pair in the combined dictionary.
|
249
|
+
|
250
|
+
>>> r = Result.example()
|
251
|
+
>>> r.combined_dict['how_feeling']
|
252
|
+
'OK'
|
253
|
+
"""
|
254
|
+
if self._combined_dict is None or self._problem_keys is None:
|
255
|
+
(
|
256
|
+
self._combined_dict,
|
257
|
+
self._problem_keys,
|
258
|
+
) = self._compute_combined_dict_and_problem_keys()
|
259
|
+
return self._combined_dict
|
260
|
+
|
261
|
+
@property
|
262
|
+
def problem_keys(self) -> list[str]:
|
263
|
+
"""Return a list of keys that are problematic."""
|
264
|
+
if self._combined_dict is None or self._problem_keys is None:
|
265
|
+
self._compute_combined_dict_and_problem_keys()
|
266
|
+
return self._problem_keys
|
267
|
+
|
268
|
+
def get_value(self, data_type: str, key: str) -> Any:
|
269
|
+
"""Return the value for a given data type and key.
|
270
|
+
|
271
|
+
>>> r = Result.example()
|
272
|
+
>>> r.get_value("answer", "how_feeling")
|
273
|
+
'OK'
|
274
|
+
|
275
|
+
- data types can be "agent", "scenario", "model", or "answer"
|
276
|
+
- keys are relevant attributes of the Objects the data types represent
|
277
|
+
"""
|
278
|
+
return self.sub_dicts[data_type][key]
|
279
|
+
|
280
|
+
@property
|
281
|
+
def key_to_data_type(self) -> dict[str, str]:
|
282
|
+
"""Return a dictionary where keys are object attributes and values are the data type (object) that the attribute is associated with.
|
283
|
+
|
284
|
+
>>> r = Result.example()
|
285
|
+
>>> r.key_to_data_type["how_feeling"]
|
286
|
+
'answer'
|
287
|
+
|
288
|
+
"""
|
289
|
+
d = {}
|
290
|
+
problem_keys = []
|
291
|
+
data_types = sorted(self.sub_dicts.keys())
|
292
|
+
for data_type in data_types:
|
293
|
+
for key in self.sub_dicts[data_type]:
|
294
|
+
if key in d:
|
295
|
+
import warnings
|
296
|
+
|
297
|
+
warnings.warn(
|
298
|
+
f"Key '{key}' of data type '{data_type}' is already in use. Renaming to {key}_{data_type}"
|
299
|
+
)
|
300
|
+
problem_keys.append((key, data_type))
|
301
|
+
key = f"{key}_{data_type}"
|
302
|
+
d[key] = data_type
|
303
|
+
|
304
|
+
for key, data_type in problem_keys:
|
305
|
+
self.sub_dicts[data_type][f"{key}_{data_type}"] = self.sub_dicts[
|
306
|
+
data_type
|
307
|
+
].pop(key)
|
308
|
+
return d
|
309
|
+
|
310
|
+
def copy(self) -> Result:
|
311
|
+
"""Return a copy of the Result object.
|
312
|
+
|
313
|
+
>>> r = Result.example()
|
314
|
+
>>> r2 = r.copy()
|
315
|
+
>>> r == r2
|
316
|
+
True
|
317
|
+
>>> id(r) == id(r2)
|
318
|
+
False
|
319
|
+
"""
|
320
|
+
return Result.from_dict(self.to_dict())
|
321
|
+
|
322
|
+
def __eq__(self, other) -> bool:
|
323
|
+
"""Return True if the Result object is equal to another Result object.
|
324
|
+
|
325
|
+
>>> r = Result.example()
|
326
|
+
>>> r == r
|
327
|
+
True
|
328
|
+
|
329
|
+
"""
|
330
|
+
return hash(self) == hash(other)
|
331
|
+
|
332
|
+
def to_dict(
|
333
|
+
self, add_edsl_version: bool = True, include_cache_info: bool = False
|
334
|
+
) -> dict[str, Any]:
|
335
|
+
"""Return a dictionary representation of the Result object.
|
336
|
+
|
337
|
+
>>> r = Result.example()
|
338
|
+
>>> r.to_dict()['scenario']
|
339
|
+
{'period': 'morning', 'scenario_index': 0, 'edsl_version': '...', 'edsl_class_name': 'Scenario'}
|
340
|
+
"""
|
341
|
+
|
342
|
+
def convert_value(value, add_edsl_version=True):
|
343
|
+
if hasattr(value, "to_dict"):
|
344
|
+
return value.to_dict(add_edsl_version=add_edsl_version)
|
345
|
+
else:
|
346
|
+
return value
|
347
|
+
|
348
|
+
d = {}
|
349
|
+
for key, value in self.items():
|
350
|
+
d[key] = convert_value(value, add_edsl_version=add_edsl_version)
|
351
|
+
|
352
|
+
if key == "prompt":
|
353
|
+
new_prompt_dict = {}
|
354
|
+
for prompt_name, prompt_obj in value.items():
|
355
|
+
new_prompt_dict[prompt_name] = (
|
356
|
+
prompt_obj
|
357
|
+
if not hasattr(prompt_obj, "to_dict")
|
358
|
+
else prompt_obj.to_dict()
|
359
|
+
)
|
360
|
+
d[key] = new_prompt_dict
|
361
|
+
if add_edsl_version:
|
362
|
+
from edsl import __version__
|
363
|
+
|
364
|
+
d["edsl_version"] = __version__
|
365
|
+
d["edsl_class_name"] = "Result"
|
366
|
+
|
367
|
+
if include_cache_info:
|
368
|
+
d["cache_used_dict"] = self.data["cache_used_dict"]
|
369
|
+
else:
|
370
|
+
d.pop("cache_used_dict", None)
|
371
|
+
|
372
|
+
return d
|
373
|
+
|
374
|
+
def __hash__(self):
|
375
|
+
"""Return a hash of the Result object."""
|
376
|
+
from edsl.utilities.utilities import dict_hash
|
377
|
+
|
378
|
+
return dict_hash(self.to_dict(add_edsl_version=False, include_cache_info=False))
|
379
|
+
|
380
|
+
@classmethod
|
381
|
+
@remove_edsl_version
|
382
|
+
def from_dict(self, json_dict: dict) -> Result:
|
383
|
+
"""Return a Result object from a dictionary representation."""
|
384
|
+
|
385
|
+
from edsl.agents.Agent import Agent
|
386
|
+
from edsl.scenarios.Scenario import Scenario
|
387
|
+
from edsl.language_models.LanguageModel import LanguageModel
|
388
|
+
from edsl.prompts.Prompt import Prompt
|
389
|
+
|
390
|
+
prompt_data = json_dict.get("prompt", {})
|
391
|
+
prompt_d = {}
|
392
|
+
for prompt_name, prompt_obj in prompt_data.items():
|
393
|
+
prompt_d[prompt_name] = Prompt.from_dict(prompt_obj)
|
394
|
+
|
395
|
+
result = Result(
|
396
|
+
agent=Agent.from_dict(json_dict["agent"]),
|
397
|
+
scenario=Scenario.from_dict(json_dict["scenario"]),
|
398
|
+
model=LanguageModel.from_dict(json_dict["model"]),
|
399
|
+
iteration=json_dict["iteration"],
|
400
|
+
answer=json_dict["answer"],
|
401
|
+
prompt=prompt_d, # json_dict["prompt"],
|
402
|
+
raw_model_response=json_dict.get(
|
403
|
+
"raw_model_response", {"raw_model_response": "No raw model response"}
|
404
|
+
),
|
405
|
+
question_to_attributes=json_dict.get("question_to_attributes", None),
|
406
|
+
generated_tokens=json_dict.get("generated_tokens", {}),
|
407
|
+
comments_dict=json_dict.get("comments_dict", {}),
|
408
|
+
cache_used_dict=json_dict.get("cache_used_dict", {}),
|
409
|
+
)
|
410
|
+
return result
|
411
|
+
|
412
|
+
def __repr__(self):
|
413
|
+
"""Return a string representation of the Result object."""
|
414
|
+
params = ", ".join(f"{key}={repr(value)}" for key, value in self.data.items())
|
415
|
+
return f"{self.__class__.__name__}({params})"
|
416
|
+
|
417
|
+
@classmethod
|
418
|
+
def example(cls):
|
419
|
+
"""Return an example Result object.
|
420
|
+
|
421
|
+
>>> Result.example()
|
422
|
+
Result(...)
|
423
|
+
|
424
|
+
"""
|
425
|
+
from edsl.results.Results import Results
|
426
|
+
|
427
|
+
return Results.example()[0]
|
428
|
+
|
429
|
+
def score(self, scoring_function: Callable) -> Union[int, float]:
|
430
|
+
"""Score the result using a passed-in scoring function.
|
431
|
+
|
432
|
+
>>> def f(status): return 1 if status == 'Joyful' else 0
|
433
|
+
>>> Result.example().score(f)
|
434
|
+
1
|
435
|
+
"""
|
436
|
+
signature = inspect.signature(scoring_function)
|
437
|
+
params = {}
|
438
|
+
for k, v in signature.parameters.items():
|
439
|
+
if k in self.combined_dict:
|
440
|
+
params[k] = self.combined_dict[k]
|
441
|
+
elif v.default is not v.empty:
|
442
|
+
params[k] = v.default
|
443
|
+
else:
|
444
|
+
raise ValueError(f"Parameter {k} not found in Result object")
|
445
|
+
return scoring_function(**params)
|
446
|
+
|
447
|
+
@classmethod
|
448
|
+
def from_interview(
|
449
|
+
cls, interview, extracted_answers, model_response_objects
|
450
|
+
) -> Result:
|
451
|
+
"""Return a Result object from an interview dictionary."""
|
452
|
+
|
453
|
+
def get_question_results(
|
454
|
+
model_response_objects,
|
455
|
+
) -> dict[str, "EDSLResultObjectInput"]:
|
456
|
+
"""Maps the question name to the EDSLResultObjectInput."""
|
457
|
+
question_results = {}
|
458
|
+
for result in model_response_objects:
|
459
|
+
question_results[result.question_name] = result
|
460
|
+
return question_results
|
461
|
+
|
462
|
+
def get_generated_tokens_dict(answer_key_names) -> dict[str, str]:
|
463
|
+
generated_tokens_dict = {
|
464
|
+
k + "_generated_tokens": question_results[k].generated_tokens
|
465
|
+
for k in answer_key_names
|
466
|
+
}
|
467
|
+
return generated_tokens_dict
|
468
|
+
|
469
|
+
def get_comments_dict(answer_key_names) -> dict[str, str]:
|
470
|
+
comments_dict = {
|
471
|
+
k + "_comment": question_results[k].comment for k in answer_key_names
|
472
|
+
}
|
473
|
+
return comments_dict
|
474
|
+
|
475
|
+
def get_question_name_to_prompts(
|
476
|
+
model_response_objects,
|
477
|
+
) -> dict[str, dict[str, str]]:
|
478
|
+
question_name_to_prompts = dict({})
|
479
|
+
for result in model_response_objects:
|
480
|
+
question_name = result.question_name
|
481
|
+
question_name_to_prompts[question_name] = {
|
482
|
+
"user_prompt": result.prompts["user_prompt"],
|
483
|
+
"system_prompt": result.prompts["system_prompt"],
|
484
|
+
}
|
485
|
+
return question_name_to_prompts
|
486
|
+
|
487
|
+
def get_prompt_dictionary(answer_key_names, question_name_to_prompts):
|
488
|
+
prompt_dictionary = {}
|
489
|
+
for answer_key_name in answer_key_names:
|
490
|
+
prompt_dictionary[answer_key_name + "_user_prompt"] = (
|
491
|
+
question_name_to_prompts[answer_key_name]["user_prompt"]
|
492
|
+
)
|
493
|
+
prompt_dictionary[answer_key_name + "_system_prompt"] = (
|
494
|
+
question_name_to_prompts[answer_key_name]["system_prompt"]
|
495
|
+
)
|
496
|
+
return prompt_dictionary
|
497
|
+
|
498
|
+
def get_raw_model_results_and_cache_used_dictionary(model_response_objects):
|
499
|
+
raw_model_results_dictionary = {}
|
500
|
+
cache_used_dictionary = {}
|
501
|
+
for result in model_response_objects:
|
502
|
+
question_name = result.question_name
|
503
|
+
raw_model_results_dictionary[question_name + "_raw_model_response"] = (
|
504
|
+
result.raw_model_response
|
505
|
+
)
|
506
|
+
raw_model_results_dictionary[question_name + "_cost"] = result.cost
|
507
|
+
one_use_buys = (
|
508
|
+
"NA"
|
509
|
+
if isinstance(result.cost, str)
|
510
|
+
or result.cost == 0
|
511
|
+
or result.cost is None
|
512
|
+
else 1.0 / result.cost
|
513
|
+
)
|
514
|
+
raw_model_results_dictionary[question_name + "_one_usd_buys"] = (
|
515
|
+
one_use_buys
|
516
|
+
)
|
517
|
+
cache_used_dictionary[question_name] = result.cache_used
|
518
|
+
|
519
|
+
return raw_model_results_dictionary, cache_used_dictionary
|
520
|
+
|
521
|
+
question_results = get_question_results(model_response_objects)
|
522
|
+
answer_key_names = list(question_results.keys())
|
523
|
+
generated_tokens_dict = get_generated_tokens_dict(answer_key_names)
|
524
|
+
comments_dict = get_comments_dict(answer_key_names)
|
525
|
+
answer_dict = {k: extracted_answers[k] for k in answer_key_names}
|
526
|
+
|
527
|
+
question_name_to_prompts = get_question_name_to_prompts(model_response_objects)
|
528
|
+
prompt_dictionary = get_prompt_dictionary(
|
529
|
+
answer_key_names, question_name_to_prompts
|
530
|
+
)
|
531
|
+
raw_model_results_dictionary, cache_used_dictionary = (
|
532
|
+
get_raw_model_results_and_cache_used_dictionary(model_response_objects)
|
533
|
+
)
|
534
|
+
|
535
|
+
result = cls(
|
536
|
+
agent=interview.agent,
|
537
|
+
scenario=interview.scenario,
|
538
|
+
model=interview.model,
|
539
|
+
iteration=interview.iteration,
|
540
|
+
# Computed objects
|
541
|
+
answer=answer_dict,
|
542
|
+
prompt=prompt_dictionary,
|
543
|
+
raw_model_response=raw_model_results_dictionary,
|
544
|
+
survey=interview.survey,
|
545
|
+
generated_tokens=generated_tokens_dict,
|
546
|
+
comments_dict=comments_dict,
|
547
|
+
cache_used_dict=cache_used_dictionary,
|
548
|
+
indices=interview.indices,
|
549
|
+
)
|
550
|
+
result.interview_hash = interview.initial_hash
|
551
|
+
return result
|
552
|
+
|
553
|
+
|
554
|
+
if __name__ == "__main__":
|
555
|
+
import doctest
|
556
|
+
|
557
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|