edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +430 -113
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/dataset/dataset_operations_mixin.py +1492 -0
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +321 -155
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +10 -16
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +420 -216
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/file_store.py +755 -0
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +20 -21
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
- edsl-0.1.48.dist-info/RECORD +347 -0
- edsl/Base.py +0 -426
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/auto/AutoStudy.py +0 -130
- edsl/auto/StageBase.py +0 -243
- edsl/auto/StageGenerateSurvey.py +0 -178
- edsl/auto/StageLabelQuestions.py +0 -125
- edsl/auto/StagePersona.py +0 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
- edsl/auto/StagePersonaDimensionValues.py +0 -74
- edsl/auto/StagePersonaDimensions.py +0 -69
- edsl/auto/StageQuestions.py +0 -74
- edsl/auto/SurveyCreatorPipeline.py +0 -21
- edsl/auto/utilities.py +0 -218
- edsl/base/Base.py +0 -279
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -580
- edsl/data/CacheEntry.py +0 -230
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -539
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/DatasetExportMixin.py +0 -911
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/FileStore.py +0 -564
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1306
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.46.dist-info/RECORD +0 -366
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -1,23 +1,46 @@
|
|
1
|
-
|
1
|
+
"""
|
2
|
+
This module contains the Result class, which captures the result of one interview.
|
3
|
+
|
4
|
+
The Result class is a fundamental building block in EDSL that stores all the data
|
5
|
+
associated with a single agent interview. Each Result object contains:
|
6
|
+
|
7
|
+
1. The agent that was interviewed
|
8
|
+
2. The scenario that was presented to the agent
|
9
|
+
3. The language model that was used to generate the agent's responses
|
10
|
+
4. The answers provided to the questions
|
11
|
+
5. The prompts used to generate those answers
|
12
|
+
6. Raw model responses and token usage statistics
|
13
|
+
7. Metadata about the questions and caching behavior
|
14
|
+
|
15
|
+
Results are typically created automatically when running interviews through the
|
16
|
+
Jobs system, and multiple Result objects are collected into a Results collection
|
17
|
+
for analysis.
|
18
|
+
|
19
|
+
The Result class inherits from both Base (for serialization) and UserDict (for
|
20
|
+
dictionary-like behavior), allowing it to be accessed like a dictionary while
|
21
|
+
maintaining a rich object model.
|
22
|
+
"""
|
2
23
|
from __future__ import annotations
|
3
24
|
import inspect
|
4
25
|
from collections import UserDict
|
5
26
|
from typing import Any, Type, Callable, Optional, TYPE_CHECKING, Union
|
6
|
-
from edsl.Base import Base
|
7
|
-
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
8
27
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
from edsl.surveys.Survey import Survey
|
28
|
+
from ..base import Base
|
29
|
+
from ..utilities import remove_edsl_version
|
30
|
+
from ..agents import Agent
|
31
|
+
from ..scenarios import Scenario
|
32
|
+
from ..surveys import Survey
|
15
33
|
|
34
|
+
if TYPE_CHECKING:
|
35
|
+
from ..agents import Agent
|
36
|
+
from ..scenarios import Scenario
|
37
|
+
from ..language_models import LanguageModel
|
38
|
+
from ..prompts import Prompt
|
39
|
+
from ..surveys import Survey
|
16
40
|
|
17
41
|
QuestionName = str
|
18
42
|
AnswerValue = Any
|
19
43
|
|
20
|
-
|
21
44
|
class AgentNamer:
|
22
45
|
"""Maintains a registry of agent names to ensure unique naming."""
|
23
46
|
|
@@ -38,7 +61,24 @@ agent_namer = AgentNamer().get_name
|
|
38
61
|
|
39
62
|
class Result(Base, UserDict):
|
40
63
|
"""
|
41
|
-
|
64
|
+
The Result class captures the complete data from one agent interview.
|
65
|
+
|
66
|
+
A Result object stores the agent, scenario, language model, and all answers
|
67
|
+
provided during an interview, along with metadata such as token usage,
|
68
|
+
caching information, and raw model responses. It provides a rich interface
|
69
|
+
for accessing this data and supports serialization for storage and retrieval.
|
70
|
+
|
71
|
+
Key features:
|
72
|
+
|
73
|
+
- Dictionary-like access to all data through the UserDict interface
|
74
|
+
- Properties for convenient access to common attributes (agent, scenario, model, answer)
|
75
|
+
- Rich data structure with sub-dictionaries for organization
|
76
|
+
- Support for scoring results against reference answers
|
77
|
+
- Serialization to/from dictionaries for storage
|
78
|
+
|
79
|
+
Results are typically created by the Jobs system when running interviews and
|
80
|
+
collected into a Results collection for analysis. You rarely need to create
|
81
|
+
Result objects manually.
|
42
82
|
"""
|
43
83
|
|
44
84
|
def __init__(
|
@@ -275,24 +315,52 @@ class Result(Base, UserDict):
|
|
275
315
|
|
276
316
|
def get_value(self, data_type: str, key: str) -> Any:
|
277
317
|
"""Return the value for a given data type and key.
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
318
|
+
|
319
|
+
This method provides a consistent way to access values across different
|
320
|
+
sub-dictionaries in the Result object. It's particularly useful when you
|
321
|
+
need to programmatically access values without knowing which data type
|
322
|
+
a particular key belongs to.
|
323
|
+
|
324
|
+
Parameters:
|
325
|
+
data_type: The category of data to retrieve from, one of:
|
326
|
+
"agent", "scenario", "model", "answer", "prompt", "comment",
|
327
|
+
"generated_tokens", "raw_model_response", "question_text",
|
328
|
+
"question_options", "question_type", "cache_used", "cache_keys"
|
329
|
+
key: The specific attribute name within that data type
|
330
|
+
|
331
|
+
Returns:
|
332
|
+
The value associated with the key in the specified data type
|
333
|
+
|
334
|
+
Examples:
|
335
|
+
>>> r = Result.example()
|
336
|
+
>>> r.get_value("answer", "how_feeling")
|
337
|
+
'OK'
|
338
|
+
>>> r.get_value("scenario", "period")
|
339
|
+
'morning'
|
285
340
|
"""
|
286
341
|
return self.sub_dicts[data_type][key]
|
287
342
|
|
288
343
|
@property
|
289
344
|
def key_to_data_type(self) -> dict[str, str]:
|
290
|
-
"""
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
345
|
+
"""A mapping of attribute names to their container data types.
|
346
|
+
|
347
|
+
This property returns a dictionary that maps each attribute name (like 'how_feeling')
|
348
|
+
to its containing data type or category (like 'answer'). This is useful for
|
349
|
+
determining which part of the Result object a particular attribute belongs to,
|
350
|
+
especially when working with data programmatically.
|
351
|
+
|
352
|
+
If a key name appears in multiple data types, the property will automatically
|
353
|
+
rename the conflicting keys by appending the data type name to avoid ambiguity.
|
354
|
+
|
355
|
+
Returns:
|
356
|
+
A dictionary mapping attribute names to their data types
|
357
|
+
|
358
|
+
Examples:
|
359
|
+
>>> r = Result.example()
|
360
|
+
>>> r.key_to_data_type["how_feeling"]
|
361
|
+
'answer'
|
362
|
+
>>> r.key_to_data_type["model"]
|
363
|
+
'model'
|
296
364
|
"""
|
297
365
|
d = {}
|
298
366
|
problem_keys = []
|
@@ -371,7 +439,7 @@ class Result(Base, UserDict):
|
|
371
439
|
d["indices"] = self.indices
|
372
440
|
|
373
441
|
if add_edsl_version:
|
374
|
-
from
|
442
|
+
from .. import __version__
|
375
443
|
|
376
444
|
d["edsl_version"] = __version__
|
377
445
|
d["edsl_class_name"] = "Result"
|
@@ -385,7 +453,7 @@ class Result(Base, UserDict):
|
|
385
453
|
|
386
454
|
def __hash__(self):
|
387
455
|
"""Return a hash of the Result object."""
|
388
|
-
from
|
456
|
+
from ..utilities.utilities import dict_hash
|
389
457
|
|
390
458
|
return dict_hash(self.to_dict(add_edsl_version=False, include_cache_info=False))
|
391
459
|
|
@@ -394,10 +462,10 @@ class Result(Base, UserDict):
|
|
394
462
|
def from_dict(self, json_dict: dict) -> Result:
|
395
463
|
"""Return a Result object from a dictionary representation."""
|
396
464
|
|
397
|
-
from
|
398
|
-
from
|
399
|
-
from
|
400
|
-
from
|
465
|
+
from ..agents import Agent
|
466
|
+
from ..scenarios import Scenario
|
467
|
+
from ..language_models import LanguageModel
|
468
|
+
from ..prompts import Prompt
|
401
469
|
|
402
470
|
prompt_data = json_dict.get("prompt", {})
|
403
471
|
prompt_d = {}
|
@@ -436,24 +504,42 @@ class Result(Base, UserDict):
|
|
436
504
|
Result(...)
|
437
505
|
|
438
506
|
"""
|
439
|
-
from
|
507
|
+
from .results import Results
|
440
508
|
|
441
509
|
return Results.example()[0]
|
442
510
|
|
443
|
-
def score_with_answer_key(self, answer_key: dict) ->
|
444
|
-
"""Score the result
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
511
|
+
def score_with_answer_key(self, answer_key: dict) -> dict[str, int]:
|
512
|
+
"""Score the result against a reference answer key.
|
513
|
+
|
514
|
+
This method evaluates the correctness of answers by comparing them to a
|
515
|
+
provided answer key. It returns a dictionary with counts of correct,
|
516
|
+
incorrect, and missing answers.
|
517
|
+
|
518
|
+
The answer key can contain either single values or lists of acceptable values.
|
519
|
+
If a list is provided, the answer is considered correct if it matches any
|
520
|
+
value in the list.
|
521
|
+
|
522
|
+
Parameters:
|
523
|
+
answer_key: A dictionary mapping question names to expected answers.
|
524
|
+
Values can be single items or lists of acceptable answers.
|
525
|
+
|
526
|
+
Returns:
|
527
|
+
A dictionary with keys 'correct', 'incorrect', and 'missing', indicating
|
528
|
+
the counts of each answer type.
|
529
|
+
|
530
|
+
Examples:
|
531
|
+
>>> Result.example()['answer']
|
532
|
+
{'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
|
533
|
+
|
534
|
+
>>> # Using exact match answer key
|
535
|
+
>>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
|
536
|
+
>>> Result.example().score_with_answer_key(answer_key)
|
537
|
+
{'correct': 2, 'incorrect': 0, 'missing': 0}
|
538
|
+
|
539
|
+
>>> # Using answer key with multiple acceptable answers
|
540
|
+
>>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': ['Great', 'Good']}
|
541
|
+
>>> Result.example().score_with_answer_key(answer_key)
|
542
|
+
{'correct': 2, 'incorrect': 0, 'missing': 0}
|
457
543
|
"""
|
458
544
|
final_scores = {'correct': 0, 'incorrect': 0, 'missing': 0}
|
459
545
|
for question_name, answer in self.answer.items():
|