edsl 0.1.15__py3-none-any.whl → 0.1.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +348 -38
- edsl/BaseDiff.py +260 -0
- edsl/TemplateLoader.py +24 -0
- edsl/__init__.py +45 -10
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +842 -144
- edsl/agents/AgentList.py +521 -25
- edsl/agents/Invigilator.py +250 -374
- edsl/agents/InvigilatorBase.py +257 -0
- edsl/agents/PromptConstructor.py +272 -0
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/descriptors.py +43 -13
- edsl/agents/prompt_helpers.py +129 -0
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +130 -0
- edsl/auto/StageBase.py +243 -0
- edsl/auto/StageGenerateSurvey.py +178 -0
- edsl/auto/StageLabelQuestions.py +125 -0
- edsl/auto/StagePersona.py +61 -0
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
- edsl/auto/StagePersonaDimensionValues.py +74 -0
- edsl/auto/StagePersonaDimensions.py +69 -0
- edsl/auto/StageQuestions.py +74 -0
- edsl/auto/SurveyCreatorPipeline.py +21 -0
- edsl/auto/utilities.py +218 -0
- edsl/base/Base.py +279 -0
- edsl/config.py +115 -113
- edsl/conversation/Conversation.py +290 -0
- edsl/conversation/car_buying.py +59 -0
- edsl/conversation/chips.py +95 -0
- edsl/conversation/mug_negotiation.py +81 -0
- edsl/conversation/next_speaker_utilities.py +93 -0
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +54 -0
- edsl/coop/__init__.py +1 -0
- edsl/coop/coop.py +1029 -134
- edsl/coop/utils.py +131 -0
- edsl/data/Cache.py +560 -89
- edsl/data/CacheEntry.py +230 -0
- edsl/data/CacheHandler.py +168 -0
- edsl/data/RemoteCacheSync.py +186 -0
- edsl/data/SQLiteDict.py +292 -0
- edsl/data/__init__.py +5 -3
- edsl/data/orm.py +6 -33
- edsl/data_transfer_models.py +74 -27
- edsl/enums.py +165 -8
- edsl/exceptions/BaseException.py +21 -0
- edsl/exceptions/__init__.py +52 -46
- edsl/exceptions/agents.py +33 -15
- edsl/exceptions/cache.py +5 -0
- edsl/exceptions/coop.py +8 -0
- edsl/exceptions/general.py +34 -0
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/jobs.py +15 -0
- edsl/exceptions/language_models.py +46 -1
- edsl/exceptions/questions.py +80 -5
- edsl/exceptions/results.py +16 -5
- edsl/exceptions/scenarios.py +29 -0
- edsl/exceptions/surveys.py +13 -10
- edsl/inference_services/AnthropicService.py +106 -0
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +118 -0
- edsl/inference_services/AzureAI.py +215 -0
- edsl/inference_services/DeepInfraService.py +18 -0
- edsl/inference_services/GoogleService.py +143 -0
- edsl/inference_services/GroqService.py +20 -0
- edsl/inference_services/InferenceServiceABC.py +80 -0
- edsl/inference_services/InferenceServicesCollection.py +138 -0
- edsl/inference_services/MistralAIService.py +120 -0
- edsl/inference_services/OllamaService.py +18 -0
- edsl/inference_services/OpenAIService.py +236 -0
- edsl/inference_services/PerplexityService.py +160 -0
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +90 -0
- edsl/inference_services/TogetherAIService.py +172 -0
- edsl/inference_services/data_structures.py +134 -0
- edsl/inference_services/models_available_cache.py +118 -0
- edsl/inference_services/rate_limits_cache.py +25 -0
- edsl/inference_services/registry.py +41 -0
- edsl/inference_services/write_available.py +10 -0
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +21 -20
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +684 -206
- edsl/jobs/JobsChecks.py +172 -0
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +270 -0
- edsl/jobs/JobsRemoteInferenceHandler.py +311 -0
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +104 -0
- edsl/jobs/buckets/ModelBuckets.py +65 -0
- edsl/jobs/buckets/TokenBucket.py +283 -0
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +392 -0
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -0
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -0
- edsl/jobs/interviews/InterviewStatistic.py +63 -0
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -0
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -0
- edsl/jobs/interviews/InterviewStatusLog.py +92 -0
- edsl/jobs/interviews/ReportErrors.py +66 -0
- edsl/jobs/interviews/interview_status_enum.py +9 -0
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +151 -110
- edsl/jobs/runners/JobsRunnerStatus.py +298 -0
- edsl/jobs/tasks/QuestionTaskCreator.py +244 -0
- edsl/jobs/tasks/TaskCreators.py +64 -0
- edsl/jobs/tasks/TaskHistory.py +470 -0
- edsl/jobs/tasks/TaskStatusLog.py +23 -0
- edsl/jobs/tasks/task_status_enum.py +161 -0
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -0
- edsl/jobs/tokens/TokenUsage.py +34 -0
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +507 -386
- edsl/language_models/ModelList.py +164 -0
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -0
- edsl/language_models/__init__.py +1 -8
- edsl/language_models/fake_openai_call.py +15 -0
- edsl/language_models/fake_openai_service.py +61 -0
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +109 -41
- edsl/language_models/utilities.py +65 -0
- edsl/notebooks/Notebook.py +263 -0
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/notebooks/__init__.py +1 -0
- edsl/prompts/Prompt.py +222 -93
- edsl/prompts/__init__.py +1 -1
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +518 -0
- edsl/questions/QuestionBasePromptsMixin.py +221 -0
- edsl/questions/QuestionBudget.py +164 -67
- edsl/questions/QuestionCheckBox.py +281 -62
- edsl/questions/QuestionDict.py +343 -0
- edsl/questions/QuestionExtract.py +136 -50
- edsl/questions/QuestionFreeText.py +79 -55
- edsl/questions/QuestionFunctional.py +138 -41
- edsl/questions/QuestionList.py +184 -57
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +293 -69
- edsl/questions/QuestionNumerical.py +109 -56
- edsl/questions/QuestionRank.py +244 -49
- edsl/questions/Quick.py +41 -0
- edsl/questions/SimpleAskMixin.py +74 -0
- edsl/questions/__init__.py +9 -6
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +153 -38
- edsl/questions/compose_questions.py +13 -7
- edsl/questions/data_structures.py +20 -0
- edsl/questions/decorators.py +21 -0
- edsl/questions/derived/QuestionLikertFive.py +28 -26
- edsl/questions/derived/QuestionLinearScale.py +41 -28
- edsl/questions/derived/QuestionTopK.py +34 -26
- edsl/questions/derived/QuestionYesNo.py +40 -27
- edsl/questions/descriptors.py +228 -74
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/prompt_templates/question_budget.jinja +13 -0
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
- edsl/questions/prompt_templates/question_extract.jinja +11 -0
- edsl/questions/prompt_templates/question_free_text.jinja +3 -0
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
- edsl/questions/prompt_templates/question_list.jinja +17 -0
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
- edsl/questions/prompt_templates/question_numerical.jinja +37 -0
- edsl/questions/question_base_gen_mixin.py +168 -0
- edsl/questions/question_registry.py +130 -46
- edsl/questions/register_questions_meta.py +71 -0
- edsl/questions/response_validator_abc.py +188 -0
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/settings.py +5 -2
- edsl/questions/templates/__init__.py +0 -0
- edsl/questions/templates/budget/__init__.py +0 -0
- edsl/questions/templates/budget/answering_instructions.jinja +7 -0
- edsl/questions/templates/budget/question_presentation.jinja +7 -0
- edsl/questions/templates/checkbox/__init__.py +0 -0
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
- edsl/questions/templates/dict/__init__.py +0 -0
- edsl/questions/templates/dict/answering_instructions.jinja +21 -0
- edsl/questions/templates/dict/question_presentation.jinja +1 -0
- edsl/questions/templates/extract/__init__.py +0 -0
- edsl/questions/templates/extract/answering_instructions.jinja +7 -0
- edsl/questions/templates/extract/question_presentation.jinja +1 -0
- edsl/questions/templates/free_text/__init__.py +0 -0
- edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
- edsl/questions/templates/free_text/question_presentation.jinja +1 -0
- edsl/questions/templates/likert_five/__init__.py +0 -0
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
- edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
- edsl/questions/templates/linear_scale/__init__.py +0 -0
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
- edsl/questions/templates/list/__init__.py +0 -0
- edsl/questions/templates/list/answering_instructions.jinja +4 -0
- edsl/questions/templates/list/question_presentation.jinja +5 -0
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/questions/templates/multiple_choice/__init__.py +0 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
- edsl/questions/templates/multiple_choice/html.jinja +0 -0
- edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
- edsl/questions/templates/numerical/__init__.py +0 -0
- edsl/questions/templates/numerical/answering_instructions.jinja +7 -0
- edsl/questions/templates/numerical/question_presentation.jinja +7 -0
- edsl/questions/templates/rank/__init__.py +0 -0
- edsl/questions/templates/rank/answering_instructions.jinja +11 -0
- edsl/questions/templates/rank/question_presentation.jinja +15 -0
- edsl/questions/templates/top_k/__init__.py +0 -0
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
- edsl/questions/templates/top_k/question_presentation.jinja +22 -0
- edsl/questions/templates/yes_no/__init__.py +0 -0
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
- edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
- edsl/results/CSSParameterizer.py +108 -0
- edsl/results/Dataset.py +550 -19
- edsl/results/DatasetExportMixin.py +594 -0
- edsl/results/DatasetTree.py +295 -0
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +477 -173
- edsl/results/Results.py +987 -269
- edsl/results/ResultsExportMixin.py +28 -125
- edsl/results/ResultsGGMixin.py +83 -15
- edsl/results/TableDisplay.py +125 -0
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +1 -1
- edsl/results/file_exports.py +252 -0
- edsl/results/results_fetch_mixin.py +33 -0
- edsl/results/results_selector.py +145 -0
- edsl/results/results_tools_mixin.py +98 -0
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_display.css +78 -0
- edsl/results/table_renderers.py +118 -0
- edsl/results/tree_explore.py +115 -0
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +543 -0
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +431 -62
- edsl/scenarios/ScenarioHtmlMixin.py +65 -0
- edsl/scenarios/ScenarioList.py +1415 -45
- edsl/scenarios/ScenarioListExportMixin.py +45 -0
- edsl/scenarios/ScenarioListPdfMixin.py +239 -0
- edsl/scenarios/__init__.py +2 -0
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +49 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/scenario_join.py +131 -0
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/shared.py +1 -0
- edsl/study/ObjectEntry.py +173 -0
- edsl/study/ProofOfWork.py +113 -0
- edsl/study/SnapShot.py +80 -0
- edsl/study/Study.py +521 -0
- edsl/study/__init__.py +4 -0
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/DAG.py +92 -11
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/Memory.py +9 -4
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/MemoryPlan.py +156 -35
- edsl/surveys/Rule.py +221 -74
- edsl/surveys/RuleCollection.py +241 -61
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +1079 -339
- edsl/surveys/SurveyCSS.py +273 -0
- edsl/surveys/SurveyExportMixin.py +235 -40
- edsl/surveys/SurveyFlowVisualization.py +181 -0
- edsl/surveys/SurveyQualtricsImport.py +284 -0
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +4 -2
- edsl/surveys/base.py +19 -3
- edsl/surveys/descriptors.py +17 -6
- edsl/surveys/instructions/ChangeInstruction.py +48 -0
- edsl/surveys/instructions/Instruction.py +56 -0
- edsl/surveys/instructions/InstructionCollection.py +82 -0
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/templates/error_reporting/base.html +24 -0
- edsl/templates/error_reporting/exceptions_by_model.html +35 -0
- edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
- edsl/templates/error_reporting/exceptions_by_type.html +17 -0
- edsl/templates/error_reporting/interview_details.html +116 -0
- edsl/templates/error_reporting/interviews.html +19 -0
- edsl/templates/error_reporting/overview.html +5 -0
- edsl/templates/error_reporting/performance_plot.html +2 -0
- edsl/templates/error_reporting/report.css +74 -0
- edsl/templates/error_reporting/report.html +118 -0
- edsl/templates/error_reporting/report.js +25 -0
- edsl/tools/__init__.py +1 -0
- edsl/tools/clusters.py +192 -0
- edsl/tools/embeddings.py +27 -0
- edsl/tools/embeddings_plotting.py +118 -0
- edsl/tools/plotting.py +112 -0
- edsl/tools/summarize.py +18 -0
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/SystemInfo.py +5 -0
- edsl/utilities/__init__.py +21 -20
- edsl/utilities/ast_utilities.py +3 -0
- edsl/utilities/data/Registry.py +2 -0
- edsl/utilities/decorators.py +41 -0
- edsl/utilities/gcp_bucket/__init__.py +0 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
- edsl/utilities/interface.py +310 -60
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/naming_utilities.py +263 -0
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/repair_functions.py +28 -0
- edsl/utilities/restricted_python.py +70 -0
- edsl/utilities/utilities.py +203 -13
- edsl-0.1.40.dist-info/METADATA +111 -0
- edsl-0.1.40.dist-info/RECORD +362 -0
- {edsl-0.1.15.dist-info → edsl-0.1.40.dist-info}/WHEEL +1 -1
- edsl/agents/AgentListExportMixin.py +0 -24
- edsl/coop/old.py +0 -31
- edsl/data/Database.py +0 -141
- edsl/data/crud.py +0 -121
- edsl/jobs/Interview.py +0 -435
- edsl/jobs/JobsRunner.py +0 -63
- edsl/jobs/JobsRunnerStatusMixin.py +0 -115
- edsl/jobs/base.py +0 -47
- edsl/jobs/buckets.py +0 -178
- edsl/jobs/runners/JobsRunnerDryRun.py +0 -19
- edsl/jobs/runners/JobsRunnerStreaming.py +0 -54
- edsl/jobs/task_management.py +0 -215
- edsl/jobs/token_tracking.py +0 -78
- edsl/language_models/DeepInfra.py +0 -69
- edsl/language_models/OpenAI.py +0 -98
- edsl/language_models/model_interfaces/GeminiPro.py +0 -66
- edsl/language_models/model_interfaces/LanguageModelOpenAIFour.py +0 -8
- edsl/language_models/model_interfaces/LanguageModelOpenAIThreeFiveTurbo.py +0 -8
- edsl/language_models/model_interfaces/LlamaTwo13B.py +0 -21
- edsl/language_models/model_interfaces/LlamaTwo70B.py +0 -21
- edsl/language_models/model_interfaces/Mixtral8x7B.py +0 -24
- edsl/language_models/registry.py +0 -81
- edsl/language_models/schemas.py +0 -15
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/prompts/QuestionInstructionsBase.py +0 -6
- edsl/prompts/library/agent_instructions.py +0 -29
- edsl/prompts/library/agent_persona.py +0 -17
- edsl/prompts/library/question_budget.py +0 -26
- edsl/prompts/library/question_checkbox.py +0 -32
- edsl/prompts/library/question_extract.py +0 -19
- edsl/prompts/library/question_freetext.py +0 -14
- edsl/prompts/library/question_linear_scale.py +0 -20
- edsl/prompts/library/question_list.py +0 -22
- edsl/prompts/library/question_multiple_choice.py +0 -44
- edsl/prompts/library/question_numerical.py +0 -31
- edsl/prompts/library/question_rank.py +0 -21
- edsl/prompts/prompt_config.py +0 -33
- edsl/prompts/registry.py +0 -185
- edsl/questions/Question.py +0 -240
- edsl/report/InputOutputDataTypes.py +0 -134
- edsl/report/RegressionMixin.py +0 -28
- edsl/report/ReportOutputs.py +0 -1228
- edsl/report/ResultsFetchMixin.py +0 -106
- edsl/report/ResultsOutputMixin.py +0 -14
- edsl/report/demo.ipynb +0 -645
- edsl/results/ResultsDBMixin.py +0 -184
- edsl/surveys/SurveyFlowVisualizationMixin.py +0 -92
- edsl/trackers/Tracker.py +0 -91
- edsl/trackers/TrackerAPI.py +0 -196
- edsl/trackers/TrackerTasks.py +0 -70
- edsl/utilities/pastebin.py +0 -141
- edsl-0.1.15.dist-info/METADATA +0 -69
- edsl-0.1.15.dist-info/RECORD +0 -142
- /edsl/{language_models/model_interfaces → inference_services}/__init__.py +0 -0
- /edsl/{report/__init__.py → jobs/runners/JobsRunnerStatusData.py} +0 -0
- /edsl/{trackers/__init__.py → language_models/ServiceDataSources.py} +0 -0
- {edsl-0.1.15.dist-info → edsl-0.1.40.dist-info}/LICENSE +0 -0
edsl/results/Result.py
CHANGED
@@ -1,73 +1,84 @@
|
|
1
|
+
# """This module contains the Result class, which captures the result of one interview."""
|
1
2
|
from __future__ import annotations
|
3
|
+
import inspect
|
2
4
|
from collections import UserDict
|
3
|
-
from typing import Any, Type
|
4
|
-
|
5
|
-
from rich.table import Table
|
6
|
-
|
7
|
-
from IPython.display import display
|
8
|
-
|
9
|
-
from edsl.agents import Agent
|
10
|
-
from edsl.language_models import LanguageModel
|
11
|
-
from edsl.scenarios import Scenario
|
12
|
-
|
13
|
-
from edsl.utilities import is_notebook
|
14
|
-
|
5
|
+
from typing import Any, Type, Callable, Optional, TYPE_CHECKING, Union
|
15
6
|
from edsl.Base import Base
|
7
|
+
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
16
8
|
|
17
|
-
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from edsl.agents.Agent import Agent
|
11
|
+
from edsl.scenarios.Scenario import Scenario
|
12
|
+
from edsl.language_models.LanguageModel import LanguageModel
|
13
|
+
from edsl.prompts.Prompt import Prompt
|
14
|
+
from edsl.surveys.Survey import Survey
|
18
15
|
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
"""Displays an object as a table."""
|
23
|
-
table = Table(title="")
|
24
|
-
table.add_column("Attribute", style="bold")
|
25
|
-
table.add_column("Value")
|
17
|
+
QuestionName = str
|
18
|
+
AnswerValue = Any
|
26
19
|
|
27
|
-
to_display = self
|
28
|
-
for attr_name, attr_value in to_display.items():
|
29
|
-
table.add_row(attr_name, repr(attr_value))
|
30
20
|
|
31
|
-
|
21
|
+
class AgentNamer:
|
22
|
+
"""Maintains a registry of agent names to ensure unique naming."""
|
32
23
|
|
24
|
+
def __init__(self):
|
25
|
+
self._registry = {}
|
33
26
|
|
34
|
-
def
|
35
|
-
|
36
|
-
|
27
|
+
def get_name(self, agent: "Agent") -> str:
|
28
|
+
"""Get or create a unique name for an agent."""
|
29
|
+
agent_id = id(agent)
|
30
|
+
if agent_id not in self._registry:
|
31
|
+
self._registry[agent_id] = f"Agent_{len(self._registry)}"
|
32
|
+
return self._registry[agent_id]
|
37
33
|
|
38
|
-
def agent_namer(agent):
|
39
|
-
nonlocal agent_dict
|
40
|
-
agent_count = len(agent_dict)
|
41
|
-
if id(agent) in agent_dict:
|
42
|
-
return agent_dict[id(agent)]
|
43
|
-
else:
|
44
|
-
agent_dict[id(agent)] = f"Agent_{agent_count}"
|
45
|
-
return agent_dict[id(agent)]
|
46
34
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
agent_namer = agent_namer_closure()
|
35
|
+
# Global instance for agent naming
|
36
|
+
agent_namer = AgentNamer().get_name
|
51
37
|
|
52
38
|
|
53
39
|
class Result(Base, UserDict):
|
54
40
|
"""
|
55
41
|
This class captures the result of one interview.
|
56
|
-
- Its main data is an Agent, a Scenario, a Model, an Iteration, and an Answer.
|
57
|
-
- These are stored both in the UserDict and as attributes.
|
58
42
|
"""
|
59
43
|
|
60
44
|
def __init__(
|
61
45
|
self,
|
62
|
-
agent: Agent,
|
63
|
-
scenario: Scenario,
|
64
|
-
model:
|
46
|
+
agent: "Agent",
|
47
|
+
scenario: "Scenario",
|
48
|
+
model: "LanguageModel",
|
65
49
|
iteration: int,
|
66
|
-
answer:
|
67
|
-
prompt: dict[
|
68
|
-
raw_model_response=None,
|
50
|
+
answer: dict[QuestionName, AnswerValue],
|
51
|
+
prompt: dict[QuestionName, str] = None,
|
52
|
+
raw_model_response: Optional[dict] = None,
|
53
|
+
survey: Optional["Survey"] = None,
|
54
|
+
question_to_attributes: Optional[dict[QuestionName, Any]] = None,
|
55
|
+
generated_tokens: Optional[dict] = None,
|
56
|
+
comments_dict: Optional[dict] = None,
|
57
|
+
cache_used_dict: Optional[dict[QuestionName, bool]] = None,
|
58
|
+
indices: Optional[dict] = None,
|
59
|
+
cache_keys: Optional[dict[QuestionName, str]] = None,
|
69
60
|
):
|
70
|
-
|
61
|
+
"""Initialize a Result object.
|
62
|
+
|
63
|
+
:param agent: The Agent object.
|
64
|
+
:param scenario: The Scenario object.
|
65
|
+
:param model: The LanguageModel object.
|
66
|
+
:param iteration: The iteration number.
|
67
|
+
:param answer: The answer string.
|
68
|
+
:param prompt: A dictionary of prompts.
|
69
|
+
:param raw_model_response: The raw model response.
|
70
|
+
:param survey: The Survey object.
|
71
|
+
:param question_to_attributes: A dictionary of question attributes.
|
72
|
+
:param generated_tokens: A dictionary of generated tokens.
|
73
|
+
:param comments_dict: A dictionary of comments.
|
74
|
+
:param cache_used_dict: A dictionary of cache usage.
|
75
|
+
:param indices: A dictionary of indices.
|
76
|
+
|
77
|
+
"""
|
78
|
+
self.question_to_attributes = (
|
79
|
+
question_to_attributes or self._create_question_to_attributes(survey)
|
80
|
+
)
|
81
|
+
|
71
82
|
data = {
|
72
83
|
"agent": agent,
|
73
84
|
"scenario": scenario,
|
@@ -76,194 +87,487 @@ class Result(Base, UserDict):
|
|
76
87
|
"answer": answer,
|
77
88
|
"prompt": prompt or {},
|
78
89
|
"raw_model_response": raw_model_response or {},
|
90
|
+
"question_to_attributes": question_to_attributes,
|
91
|
+
"generated_tokens": generated_tokens or {},
|
92
|
+
"comments_dict": comments_dict or {},
|
93
|
+
"cache_used_dict": cache_used_dict or {},
|
94
|
+
"cache_keys": cache_keys or {},
|
79
95
|
}
|
80
96
|
super().__init__(**data)
|
81
|
-
|
82
|
-
self.
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
97
|
+
self.indices = indices
|
98
|
+
self._sub_dicts = self._construct_sub_dicts()
|
99
|
+
(
|
100
|
+
self._combined_dict,
|
101
|
+
self._problem_keys,
|
102
|
+
) = self._compute_combined_dict_and_problem_keys()
|
103
|
+
|
104
|
+
@staticmethod
|
105
|
+
def _create_question_to_attributes(survey):
|
106
|
+
"""Create a dictionary of question attributes."""
|
107
|
+
if survey is None:
|
108
|
+
return {}
|
109
|
+
return {
|
110
|
+
q.question_name: {
|
111
|
+
"question_text": q.question_text,
|
112
|
+
"question_type": q.question_type,
|
113
|
+
"question_options": (
|
114
|
+
None if not hasattr(q, "question_options") else q.question_options
|
115
|
+
),
|
116
|
+
}
|
117
|
+
for q in survey.questions
|
118
|
+
}
|
119
|
+
|
93
120
|
@property
|
94
|
-
def
|
95
|
-
"""
|
121
|
+
def agent(self) -> "Agent":
|
122
|
+
"""Return the Agent object."""
|
123
|
+
return self.data["agent"]
|
124
|
+
|
125
|
+
@property
|
126
|
+
def scenario(self) -> "Scenario":
|
127
|
+
"""Return the Scenario object."""
|
128
|
+
return self.data["scenario"]
|
129
|
+
|
130
|
+
@property
|
131
|
+
def model(self) -> "LanguageModel":
|
132
|
+
"""Return the LanguageModel object."""
|
133
|
+
return self.data["model"]
|
96
134
|
|
97
|
-
|
98
|
-
|
135
|
+
@property
|
136
|
+
def answer(self) -> dict[QuestionName, AnswerValue]:
|
137
|
+
"""Return the answers."""
|
138
|
+
return self.data["answer"]
|
139
|
+
|
140
|
+
@staticmethod
|
141
|
+
def _create_agent_sub_dict(agent) -> dict:
|
142
|
+
"""Create a dictionary of agent details"""
|
143
|
+
if agent.name is None:
|
144
|
+
agent_name = agent_namer(agent)
|
99
145
|
else:
|
100
|
-
agent_name =
|
146
|
+
agent_name = agent.name
|
147
|
+
|
148
|
+
return {
|
149
|
+
"agent": agent.traits
|
150
|
+
| {"agent_name": agent_name}
|
151
|
+
| {"agent_instruction": agent.instruction},
|
152
|
+
}
|
153
|
+
|
154
|
+
@staticmethod
|
155
|
+
def _create_model_sub_dict(model) -> dict:
|
156
|
+
return {
|
157
|
+
"model": model.parameters | {"model": model.model},
|
158
|
+
}
|
101
159
|
|
160
|
+
@staticmethod
|
161
|
+
def _iteration_sub_dict(iteration) -> dict:
|
102
162
|
return {
|
103
|
-
"
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
163
|
+
"iteration": {"iteration": iteration},
|
164
|
+
}
|
165
|
+
|
166
|
+
def _construct_sub_dicts(self) -> dict[str, dict]:
|
167
|
+
"""Construct a dictionary of sub-dictionaries for the Result object."""
|
168
|
+
|
169
|
+
sub_dicts_needing_new_keys = {
|
170
|
+
"question_text": {},
|
171
|
+
"question_options": {},
|
172
|
+
"question_type": {},
|
109
173
|
}
|
110
174
|
|
175
|
+
for question_name in self.data["answer"]:
|
176
|
+
if question_name in self.question_to_attributes:
|
177
|
+
for dictionary_name in sub_dicts_needing_new_keys:
|
178
|
+
new_key = question_name + "_" + dictionary_name
|
179
|
+
sub_dicts_needing_new_keys[dictionary_name][new_key] = (
|
180
|
+
self.question_to_attributes[question_name][dictionary_name]
|
181
|
+
)
|
182
|
+
|
183
|
+
new_cache_dict = {
|
184
|
+
f"{k}_cache_used": v for k, v in self.data["cache_used_dict"].items()
|
185
|
+
}
|
186
|
+
|
187
|
+
cache_keys = {f"{k}_cache_key": v for k, v in self.data["cache_keys"].items()}
|
188
|
+
|
189
|
+
d = {
|
190
|
+
**self._create_agent_sub_dict(self.data["agent"]),
|
191
|
+
**self._create_model_sub_dict(self.data["model"]),
|
192
|
+
**self._iteration_sub_dict(self.data["iteration"]),
|
193
|
+
"scenario": self.data["scenario"],
|
194
|
+
"answer": self.data["answer"],
|
195
|
+
"prompt": self.data["prompt"],
|
196
|
+
"comment": self.data["comments_dict"],
|
197
|
+
"generated_tokens": self.data["generated_tokens"],
|
198
|
+
"raw_model_response": self.data["raw_model_response"],
|
199
|
+
"question_text": sub_dicts_needing_new_keys["question_text"],
|
200
|
+
"question_options": sub_dicts_needing_new_keys["question_options"],
|
201
|
+
"question_type": sub_dicts_needing_new_keys["question_type"],
|
202
|
+
"cache_used": new_cache_dict,
|
203
|
+
"cache_keys": cache_keys,
|
204
|
+
}
|
205
|
+
if hasattr(self, "indices") and self.indices is not None:
|
206
|
+
d["agent"].update({"agent_index": self.indices["agent"]})
|
207
|
+
d["scenario"].update({"scenario_index": self.indices["scenario"]})
|
208
|
+
d["model"].update({"model_index": self.indices["model"]})
|
209
|
+
|
210
|
+
return d
|
211
|
+
|
212
|
+
@property
|
213
|
+
def sub_dicts(self) -> dict[str, dict]:
|
214
|
+
"""Return a dictionary where keys are strings for each of the main class attributes/objects."""
|
215
|
+
if self._sub_dicts is None:
|
216
|
+
self._sub_dicts = self._construct_sub_dicts()
|
217
|
+
return self._sub_dicts
|
218
|
+
|
219
|
+
def check_expression(self, expression: str) -> None:
|
220
|
+
for key in self.problem_keys:
|
221
|
+
if key in expression and not key + "." in expression:
|
222
|
+
raise ValueError(
|
223
|
+
f"Key by iself {key} is problematic. Use the full key {key + '.' + key} name instead."
|
224
|
+
)
|
225
|
+
return None
|
226
|
+
|
111
227
|
def code(self):
|
228
|
+
"""Return a string of code that can be used to recreate the Result object."""
|
112
229
|
raise NotImplementedError
|
113
230
|
|
114
231
|
@property
|
115
|
-
def
|
116
|
-
"""
|
232
|
+
def problem_keys(self) -> list[str]:
|
233
|
+
"""Return a list of keys that are problematic."""
|
234
|
+
return self._problem_keys
|
235
|
+
|
236
|
+
def _compute_combined_dict_and_problem_keys(
|
237
|
+
self,
|
238
|
+
) -> tuple[dict[str, Any], list[str]]:
|
117
239
|
combined = {}
|
240
|
+
problem_keys = []
|
118
241
|
for key, sub_dict in self.sub_dicts.items():
|
119
242
|
combined.update(sub_dict)
|
243
|
+
# in some cases, the sub_dict might have keys that conflict with the main dict
|
244
|
+
if key in combined:
|
245
|
+
# The key is already in the combined dict
|
246
|
+
problem_keys = problem_keys + [key]
|
247
|
+
|
120
248
|
combined.update({key: sub_dict})
|
121
|
-
|
249
|
+
# I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
|
250
|
+
# dot notation to access the subdicts.
|
251
|
+
return combined, problem_keys
|
252
|
+
|
253
|
+
@property
|
254
|
+
def combined_dict(self) -> dict[str, Any]:
|
255
|
+
"""Return a dictionary that includes all sub_dicts, but also puts the key-value pairs in each sub_dict as a key_value pair in the combined dictionary.
|
256
|
+
|
257
|
+
>>> r = Result.example()
|
258
|
+
>>> r.combined_dict['how_feeling']
|
259
|
+
'OK'
|
260
|
+
"""
|
261
|
+
if self._combined_dict is None or self._problem_keys is None:
|
262
|
+
(
|
263
|
+
self._combined_dict,
|
264
|
+
self._problem_keys,
|
265
|
+
) = self._compute_combined_dict_and_problem_keys()
|
266
|
+
return self._combined_dict
|
267
|
+
|
268
|
+
@property
|
269
|
+
def problem_keys(self) -> list[str]:
|
270
|
+
"""Return a list of keys that are problematic."""
|
271
|
+
if self._combined_dict is None or self._problem_keys is None:
|
272
|
+
self._compute_combined_dict_and_problem_keys()
|
273
|
+
return self._problem_keys
|
122
274
|
|
123
275
|
def get_value(self, data_type: str, key: str) -> Any:
|
124
|
-
"""
|
276
|
+
"""Return the value for a given data type and key.
|
277
|
+
|
278
|
+
>>> r = Result.example()
|
279
|
+
>>> r.get_value("answer", "how_feeling")
|
280
|
+
'OK'
|
281
|
+
|
125
282
|
- data types can be "agent", "scenario", "model", or "answer"
|
126
283
|
- keys are relevant attributes of the Objects the data types represent
|
127
|
-
results.get_value("answer", "how_feeling") will return "Good" or "Bad" or whatnot
|
128
284
|
"""
|
129
285
|
return self.sub_dicts[data_type][key]
|
130
286
|
|
131
287
|
@property
|
132
288
|
def key_to_data_type(self) -> dict[str, str]:
|
133
|
-
"""
|
289
|
+
"""Return a dictionary where keys are object attributes and values are the data type (object) that the attribute is associated with.
|
290
|
+
|
291
|
+
>>> r = Result.example()
|
292
|
+
>>> r.key_to_data_type["how_feeling"]
|
293
|
+
'answer'
|
294
|
+
|
295
|
+
"""
|
134
296
|
d = {}
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
"model",
|
139
|
-
"answer",
|
140
|
-
"prompt",
|
141
|
-
"raw_model_response",
|
142
|
-
]:
|
297
|
+
problem_keys = []
|
298
|
+
data_types = sorted(self.sub_dicts.keys())
|
299
|
+
for data_type in data_types:
|
143
300
|
for key in self.sub_dicts[data_type]:
|
301
|
+
if key in d:
|
302
|
+
import warnings
|
303
|
+
|
304
|
+
warnings.warn(
|
305
|
+
f"Key '{key}' of data type '{data_type}' is already in use. Renaming to {key}_{data_type}"
|
306
|
+
)
|
307
|
+
problem_keys.append((key, data_type))
|
308
|
+
key = f"{key}_{data_type}"
|
144
309
|
d[key] = data_type
|
145
|
-
return d
|
146
310
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
311
|
+
for key, data_type in problem_keys:
|
312
|
+
self.sub_dicts[data_type][f"{key}_{data_type}"] = self.sub_dicts[
|
313
|
+
data_type
|
314
|
+
].pop(key)
|
315
|
+
return d
|
151
316
|
|
152
|
-
###############
|
153
|
-
# Useful
|
154
|
-
###############
|
155
317
|
def copy(self) -> Result:
|
156
|
-
"""
|
318
|
+
"""Return a copy of the Result object.
|
319
|
+
|
320
|
+
>>> r = Result.example()
|
321
|
+
>>> r2 = r.copy()
|
322
|
+
>>> r == r2
|
323
|
+
True
|
324
|
+
>>> id(r) == id(r2)
|
325
|
+
False
|
326
|
+
"""
|
157
327
|
return Result.from_dict(self.to_dict())
|
158
328
|
|
159
|
-
def __eq__(self, other):
|
160
|
-
|
329
|
+
def __eq__(self, other) -> bool:
|
330
|
+
"""Return True if the Result object is equal to another Result object.
|
161
331
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
"""
|
167
|
-
return
|
168
|
-
|
169
|
-
|
332
|
+
>>> r = Result.example()
|
333
|
+
>>> r == r
|
334
|
+
True
|
335
|
+
|
336
|
+
"""
|
337
|
+
return hash(self) == hash(other)
|
338
|
+
|
339
|
+
def to_dict(
|
340
|
+
self, add_edsl_version: bool = True, include_cache_info: bool = False
|
341
|
+
) -> dict[str, Any]:
|
342
|
+
"""Return a dictionary representation of the Result object.
|
343
|
+
|
344
|
+
>>> r = Result.example()
|
345
|
+
>>> r.to_dict()['scenario']
|
346
|
+
{'period': 'morning', 'scenario_index': 0, 'edsl_version': '...', 'edsl_class_name': 'Scenario'}
|
347
|
+
"""
|
348
|
+
|
349
|
+
def convert_value(value, add_edsl_version=True):
|
350
|
+
if hasattr(value, "to_dict"):
|
351
|
+
return value.to_dict(add_edsl_version=add_edsl_version)
|
352
|
+
else:
|
353
|
+
return value
|
354
|
+
|
355
|
+
d = {}
|
356
|
+
for key, value in self.items():
|
357
|
+
d[key] = convert_value(value, add_edsl_version=add_edsl_version)
|
358
|
+
|
359
|
+
if key == "prompt":
|
360
|
+
new_prompt_dict = {}
|
361
|
+
for prompt_name, prompt_obj in value.items():
|
362
|
+
new_prompt_dict[prompt_name] = (
|
363
|
+
prompt_obj
|
364
|
+
if not hasattr(prompt_obj, "to_dict")
|
365
|
+
else prompt_obj.to_dict()
|
366
|
+
)
|
367
|
+
d[key] = new_prompt_dict
|
368
|
+
if add_edsl_version:
|
369
|
+
from edsl import __version__
|
370
|
+
|
371
|
+
d["edsl_version"] = __version__
|
372
|
+
d["edsl_class_name"] = "Result"
|
373
|
+
|
374
|
+
if include_cache_info:
|
375
|
+
d["cache_used_dict"] = self.data["cache_used_dict"]
|
376
|
+
else:
|
377
|
+
d.pop("cache_used_dict", None)
|
378
|
+
|
379
|
+
return d
|
380
|
+
|
381
|
+
def __hash__(self):
|
382
|
+
"""Return a hash of the Result object."""
|
383
|
+
from edsl.utilities.utilities import dict_hash
|
384
|
+
|
385
|
+
return dict_hash(self.to_dict(add_edsl_version=False, include_cache_info=False))
|
170
386
|
|
171
387
|
@classmethod
|
388
|
+
@remove_edsl_version
|
172
389
|
def from_dict(self, json_dict: dict) -> Result:
|
173
|
-
"""
|
390
|
+
"""Return a Result object from a dictionary representation."""
|
391
|
+
|
392
|
+
from edsl.agents.Agent import Agent
|
393
|
+
from edsl.scenarios.Scenario import Scenario
|
394
|
+
from edsl.language_models.LanguageModel import LanguageModel
|
395
|
+
from edsl.prompts.Prompt import Prompt
|
396
|
+
|
397
|
+
prompt_data = json_dict.get("prompt", {})
|
398
|
+
prompt_d = {}
|
399
|
+
for prompt_name, prompt_obj in prompt_data.items():
|
400
|
+
prompt_d[prompt_name] = Prompt.from_dict(prompt_obj)
|
401
|
+
|
174
402
|
result = Result(
|
175
403
|
agent=Agent.from_dict(json_dict["agent"]),
|
176
404
|
scenario=Scenario.from_dict(json_dict["scenario"]),
|
177
405
|
model=LanguageModel.from_dict(json_dict["model"]),
|
178
406
|
iteration=json_dict["iteration"],
|
179
407
|
answer=json_dict["answer"],
|
180
|
-
prompt=json_dict["prompt"],
|
408
|
+
prompt=prompt_d, # json_dict["prompt"],
|
181
409
|
raw_model_response=json_dict.get(
|
182
410
|
"raw_model_response", {"raw_model_response": "No raw model response"}
|
183
411
|
),
|
412
|
+
question_to_attributes=json_dict.get("question_to_attributes", None),
|
413
|
+
generated_tokens=json_dict.get("generated_tokens", {}),
|
414
|
+
comments_dict=json_dict.get("comments_dict", {}),
|
415
|
+
cache_used_dict=json_dict.get("cache_used_dict", {}),
|
416
|
+
cache_keys=json_dict.get("cache_keys", {}),
|
184
417
|
)
|
185
418
|
return result
|
186
419
|
|
187
|
-
def rich_print(self):
|
188
|
-
"""Displays an object as a table."""
|
189
|
-
# from edsl.utilities import print_dict_with_rich
|
190
|
-
from rich import print
|
191
|
-
|
192
|
-
table = Table(title="Result")
|
193
|
-
table.add_column("Attribute", style="bold")
|
194
|
-
table.add_column("Value")
|
195
|
-
|
196
|
-
to_display = self.__dict__.copy()
|
197
|
-
data = to_display.pop("data", None)
|
198
|
-
for attr_name, attr_value in to_display.items():
|
199
|
-
if hasattr(attr_value, "rich_print"):
|
200
|
-
table.add_row(attr_name, attr_value.rich_print())
|
201
|
-
elif isinstance(attr_value, dict):
|
202
|
-
a = PromptDict(attr_value)
|
203
|
-
table.add_row(attr_name, a.rich_print())
|
204
|
-
else:
|
205
|
-
table.add_row(attr_name, repr(attr_value))
|
206
|
-
return table
|
207
|
-
|
208
420
|
def __repr__(self):
|
209
|
-
|
421
|
+
"""Return a string representation of the Result object."""
|
422
|
+
params = ", ".join(f"{key}={repr(value)}" for key, value in self.data.items())
|
423
|
+
return f"{self.__class__.__name__}({params})"
|
210
424
|
|
211
425
|
@classmethod
|
212
426
|
def example(cls):
|
213
|
-
|
427
|
+
"""Return an example Result object.
|
214
428
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
def main():
|
219
|
-
from edsl.results.Result import Result
|
220
|
-
import json
|
429
|
+
>>> Result.example()
|
430
|
+
Result(...)
|
221
431
|
|
222
|
-
|
223
|
-
|
224
|
-
{
|
225
|
-
"agent": {
|
226
|
-
"traits": {
|
227
|
-
"status": "Unhappy"
|
228
|
-
}
|
229
|
-
},
|
230
|
-
"scenario": {
|
231
|
-
"period": "morning"
|
232
|
-
},
|
233
|
-
"model": {
|
234
|
-
"model": "gpt-3.5-turbo",
|
235
|
-
"parameters": {
|
236
|
-
"temperature": 0.5,
|
237
|
-
"max_tokens": 1000,
|
238
|
-
"top_p": 1,
|
239
|
-
"frequency_penalty": 0,
|
240
|
-
"presence_penalty": 0,
|
241
|
-
"use_cache": true
|
242
|
-
}
|
243
|
-
},
|
244
|
-
"iteration": 0,
|
245
|
-
"answer": {
|
246
|
-
"how_feeling": "Bad"
|
247
|
-
},
|
248
|
-
"prompt": {"how_feeling_user_prompt": "How are you feeling today?", "how_feeling_system_prompt": "Answer the question"}
|
249
|
-
}
|
250
|
-
"""
|
432
|
+
"""
|
433
|
+
from edsl.results.Results import Results
|
251
434
|
|
252
|
-
|
435
|
+
return Results.example()[0]
|
253
436
|
|
254
|
-
|
255
|
-
|
437
|
+
def score(self, scoring_function: Callable) -> Union[int, float]:
|
438
|
+
"""Score the result using a passed-in scoring function.
|
256
439
|
|
257
|
-
|
258
|
-
|
440
|
+
>>> def f(status): return 1 if status == 'Joyful' else 0
|
441
|
+
>>> Result.example().score(f)
|
442
|
+
1
|
443
|
+
"""
|
444
|
+
signature = inspect.signature(scoring_function)
|
445
|
+
params = {}
|
446
|
+
for k, v in signature.parameters.items():
|
447
|
+
if k in self.combined_dict:
|
448
|
+
params[k] = self.combined_dict[k]
|
449
|
+
elif v.default is not v.empty:
|
450
|
+
params[k] = v.default
|
451
|
+
else:
|
452
|
+
raise ValueError(f"Parameter {k} not found in Result object")
|
453
|
+
return scoring_function(**params)
|
259
454
|
|
260
|
-
|
261
|
-
|
455
|
+
@classmethod
|
456
|
+
def from_interview(
|
457
|
+
cls, interview, extracted_answers, model_response_objects
|
458
|
+
) -> Result:
|
459
|
+
"""Return a Result object from an interview dictionary."""
|
460
|
+
|
461
|
+
def get_question_results(
|
462
|
+
model_response_objects,
|
463
|
+
) -> dict[str, "EDSLResultObjectInput"]:
|
464
|
+
"""Maps the question name to the EDSLResultObjectInput."""
|
465
|
+
question_results = {}
|
466
|
+
for result in model_response_objects:
|
467
|
+
question_results[result.question_name] = result
|
468
|
+
return question_results
|
469
|
+
|
470
|
+
def get_cache_keys(model_response_objects) -> dict[str, bool]:
|
471
|
+
cache_keys = {}
|
472
|
+
for result in model_response_objects:
|
473
|
+
cache_keys[result.question_name] = result.cache_key
|
474
|
+
return cache_keys
|
475
|
+
|
476
|
+
def get_generated_tokens_dict(answer_key_names) -> dict[str, str]:
|
477
|
+
generated_tokens_dict = {
|
478
|
+
k + "_generated_tokens": question_results[k].generated_tokens
|
479
|
+
for k in answer_key_names
|
480
|
+
}
|
481
|
+
return generated_tokens_dict
|
262
482
|
|
263
|
-
|
483
|
+
def get_comments_dict(answer_key_names) -> dict[str, str]:
|
484
|
+
comments_dict = {
|
485
|
+
k + "_comment": question_results[k].comment for k in answer_key_names
|
486
|
+
}
|
487
|
+
return comments_dict
|
488
|
+
|
489
|
+
def get_question_name_to_prompts(
|
490
|
+
model_response_objects,
|
491
|
+
) -> dict[str, dict[str, str]]:
|
492
|
+
question_name_to_prompts = dict({})
|
493
|
+
for result in model_response_objects:
|
494
|
+
question_name = result.question_name
|
495
|
+
question_name_to_prompts[question_name] = {
|
496
|
+
"user_prompt": result.prompts["user_prompt"],
|
497
|
+
"system_prompt": result.prompts["system_prompt"],
|
498
|
+
}
|
499
|
+
return question_name_to_prompts
|
500
|
+
|
501
|
+
def get_prompt_dictionary(answer_key_names, question_name_to_prompts):
|
502
|
+
prompt_dictionary = {}
|
503
|
+
for answer_key_name in answer_key_names:
|
504
|
+
prompt_dictionary[answer_key_name + "_user_prompt"] = (
|
505
|
+
question_name_to_prompts[answer_key_name]["user_prompt"]
|
506
|
+
)
|
507
|
+
prompt_dictionary[answer_key_name + "_system_prompt"] = (
|
508
|
+
question_name_to_prompts[answer_key_name]["system_prompt"]
|
509
|
+
)
|
510
|
+
return prompt_dictionary
|
511
|
+
|
512
|
+
def get_raw_model_results_and_cache_used_dictionary(model_response_objects):
|
513
|
+
raw_model_results_dictionary = {}
|
514
|
+
cache_used_dictionary = {}
|
515
|
+
for result in model_response_objects:
|
516
|
+
question_name = result.question_name
|
517
|
+
raw_model_results_dictionary[question_name + "_raw_model_response"] = (
|
518
|
+
result.raw_model_response
|
519
|
+
)
|
520
|
+
raw_model_results_dictionary[question_name + "_cost"] = result.cost
|
521
|
+
one_use_buys = (
|
522
|
+
"NA"
|
523
|
+
if isinstance(result.cost, str)
|
524
|
+
or result.cost == 0
|
525
|
+
or result.cost is None
|
526
|
+
else 1.0 / result.cost
|
527
|
+
)
|
528
|
+
raw_model_results_dictionary[question_name + "_one_usd_buys"] = (
|
529
|
+
one_use_buys
|
530
|
+
)
|
531
|
+
cache_used_dictionary[question_name] = result.cache_used
|
532
|
+
|
533
|
+
return raw_model_results_dictionary, cache_used_dictionary
|
534
|
+
|
535
|
+
question_results = get_question_results(model_response_objects)
|
536
|
+
answer_key_names = list(question_results.keys())
|
537
|
+
generated_tokens_dict = get_generated_tokens_dict(answer_key_names)
|
538
|
+
comments_dict = get_comments_dict(answer_key_names)
|
539
|
+
answer_dict = {k: extracted_answers[k] for k in answer_key_names}
|
540
|
+
cache_keys = get_cache_keys(model_response_objects)
|
541
|
+
|
542
|
+
question_name_to_prompts = get_question_name_to_prompts(model_response_objects)
|
543
|
+
prompt_dictionary = get_prompt_dictionary(
|
544
|
+
answer_key_names, question_name_to_prompts
|
545
|
+
)
|
546
|
+
raw_model_results_dictionary, cache_used_dictionary = (
|
547
|
+
get_raw_model_results_and_cache_used_dictionary(model_response_objects)
|
548
|
+
)
|
264
549
|
|
265
|
-
|
550
|
+
result = cls(
|
551
|
+
agent=interview.agent,
|
552
|
+
scenario=interview.scenario,
|
553
|
+
model=interview.model,
|
554
|
+
iteration=interview.iteration,
|
555
|
+
# Computed objects
|
556
|
+
answer=answer_dict,
|
557
|
+
prompt=prompt_dictionary,
|
558
|
+
raw_model_response=raw_model_results_dictionary,
|
559
|
+
survey=interview.survey,
|
560
|
+
generated_tokens=generated_tokens_dict,
|
561
|
+
comments_dict=comments_dict,
|
562
|
+
cache_used_dict=cache_used_dictionary,
|
563
|
+
indices=interview.indices,
|
564
|
+
cache_keys=cache_keys,
|
565
|
+
)
|
566
|
+
result.interview_hash = interview.initial_hash
|
567
|
+
return result
|
266
568
|
|
267
569
|
|
268
570
|
if __name__ == "__main__":
|
269
|
-
|
571
|
+
import doctest
|
572
|
+
|
573
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|