edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +116 -197
- edsl/__init__.py +7 -15
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +147 -351
- edsl/agents/AgentList.py +73 -211
- edsl/agents/Invigilator.py +50 -101
- edsl/agents/InvigilatorBase.py +70 -62
- edsl/agents/PromptConstructor.py +225 -143
- edsl/agents/__init__.py +1 -0
- edsl/agents/prompt_helpers.py +3 -3
- edsl/auto/AutoStudy.py +5 -18
- edsl/auto/StageBase.py +40 -53
- edsl/auto/StageQuestions.py +1 -2
- edsl/auto/utilities.py +6 -0
- edsl/config.py +2 -22
- edsl/conversation/car_buying.py +1 -2
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +47 -125
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +27 -45
- edsl/data/CacheEntry.py +15 -12
- edsl/data/CacheHandler.py +12 -31
- edsl/data/RemoteCacheSync.py +46 -154
- edsl/data/__init__.py +3 -4
- edsl/data_transfer_models.py +1 -2
- edsl/enums.py +0 -27
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +0 -12
- edsl/exceptions/questions.py +6 -24
- edsl/exceptions/scenarios.py +0 -7
- edsl/inference_services/AnthropicService.py +19 -38
- edsl/inference_services/AwsBedrock.py +2 -0
- edsl/inference_services/AzureAI.py +2 -0
- edsl/inference_services/GoogleService.py +12 -7
- edsl/inference_services/InferenceServiceABC.py +85 -18
- edsl/inference_services/InferenceServicesCollection.py +79 -120
- edsl/inference_services/MistralAIService.py +3 -0
- edsl/inference_services/OpenAIService.py +35 -47
- edsl/inference_services/PerplexityService.py +3 -0
- edsl/inference_services/TestService.py +10 -11
- edsl/inference_services/TogetherAIService.py +3 -5
- edsl/jobs/Answers.py +14 -1
- edsl/jobs/Jobs.py +431 -356
- edsl/jobs/JobsChecks.py +10 -35
- edsl/jobs/JobsPrompts.py +4 -6
- edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
- edsl/jobs/buckets/BucketCollection.py +3 -44
- edsl/jobs/buckets/TokenBucket.py +21 -53
- edsl/jobs/interviews/Interview.py +408 -143
- edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
- edsl/jobs/runners/JobsRunnerStatus.py +165 -133
- edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
- edsl/jobs/tasks/TaskHistory.py +18 -38
- edsl/jobs/tasks/task_status_enum.py +2 -0
- edsl/language_models/KeyLookup.py +30 -0
- edsl/language_models/LanguageModel.py +236 -194
- edsl/language_models/ModelList.py +19 -28
- edsl/language_models/__init__.py +2 -1
- edsl/language_models/registry.py +190 -0
- edsl/language_models/repair.py +2 -2
- edsl/language_models/unused/ReplicateBase.py +83 -0
- edsl/language_models/utilities.py +4 -5
- edsl/notebooks/Notebook.py +14 -19
- edsl/prompts/Prompt.py +39 -29
- edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
- edsl/questions/QuestionBase.py +214 -68
- edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
- edsl/questions/QuestionBasePromptsMixin.py +3 -7
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +3 -3
- edsl/questions/QuestionExtract.py +7 -5
- edsl/questions/QuestionFreeText.py +3 -2
- edsl/questions/QuestionList.py +18 -10
- edsl/questions/QuestionMultipleChoice.py +23 -67
- edsl/questions/QuestionNumerical.py +4 -2
- edsl/questions/QuestionRank.py +17 -7
- edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
- edsl/questions/SimpleAskMixin.py +3 -4
- edsl/questions/__init__.py +1 -2
- edsl/questions/derived/QuestionLinearScale.py +3 -6
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +3 -17
- edsl/questions/question_registry.py +1 -1
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +7 -170
- edsl/results/DatasetExportMixin.py +305 -168
- edsl/results/DatasetTree.py +8 -28
- edsl/results/Result.py +206 -298
- edsl/results/Results.py +131 -149
- edsl/results/ResultsDBMixin.py +238 -0
- edsl/results/ResultsExportMixin.py +0 -2
- edsl/results/{results_selector.py → Selector.py} +13 -23
- edsl/results/TableDisplay.py +171 -98
- edsl/results/__init__.py +1 -1
- edsl/scenarios/FileStore.py +239 -150
- edsl/scenarios/Scenario.py +193 -90
- edsl/scenarios/ScenarioHtmlMixin.py +3 -4
- edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
- edsl/scenarios/ScenarioList.py +244 -415
- edsl/scenarios/ScenarioListExportMixin.py +7 -0
- edsl/scenarios/ScenarioListPdfMixin.py +37 -15
- edsl/scenarios/__init__.py +2 -1
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +12 -5
- edsl/surveys/Rule.py +4 -5
- edsl/surveys/RuleCollection.py +27 -25
- edsl/surveys/Survey.py +791 -270
- edsl/surveys/SurveyCSS.py +8 -20
- edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
- edsl/surveys/__init__.py +2 -4
- edsl/surveys/descriptors.py +2 -6
- edsl/surveys/instructions/ChangeInstruction.py +2 -1
- edsl/surveys/instructions/Instruction.py +13 -4
- edsl/surveys/instructions/InstructionCollection.py +6 -11
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/utilities.py +23 -35
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
- edsl-0.1.39.dev1.dist-info/RECORD +277 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
- edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
- edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
- edsl/agents/question_option_processor.py +0 -172
- edsl/coop/CoopFunctionsMixin.py +0 -15
- edsl/coop/ExpectedParrotKeyHandler.py +0 -125
- edsl/exceptions/inference_services.py +0 -5
- edsl/inference_services/AvailableModelCacheHandler.py +0 -184
- edsl/inference_services/AvailableModelFetcher.py +0 -215
- edsl/inference_services/ServiceAvailability.py +0 -135
- edsl/inference_services/data_structures.py +0 -134
- edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
- edsl/jobs/FetchInvigilator.py +0 -47
- edsl/jobs/InterviewTaskManager.py +0 -98
- edsl/jobs/InterviewsConstructor.py +0 -50
- edsl/jobs/JobsComponentConstructor.py +0 -189
- edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
- edsl/jobs/RequestTokenEstimator.py +0 -30
- edsl/jobs/async_interview_runner.py +0 -138
- edsl/jobs/buckets/TokenBucketAPI.py +0 -211
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/check_survey_scenario_compatibility.py +0 -85
- edsl/jobs/data_structures.py +0 -120
- edsl/jobs/decorators.py +0 -35
- edsl/jobs/jobs_status_enums.py +0 -9
- edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
- edsl/jobs/results_exceptions_handler.py +0 -98
- edsl/language_models/ComputeCost.py +0 -63
- edsl/language_models/PriceManager.py +0 -127
- edsl/language_models/RawResponseHandler.py +0 -106
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +0 -131
- edsl/language_models/model.py +0 -256
- edsl/notebooks/NotebookToLaTeX.py +0 -142
- edsl/questions/ExceptionExplainer.py +0 -77
- edsl/questions/HTMLQuestion.py +0 -103
- edsl/questions/QuestionMatrix.py +0 -265
- edsl/questions/data_structures.py +0 -20
- edsl/questions/loop_processor.py +0 -149
- edsl/questions/response_validator_factory.py +0 -34
- edsl/questions/templates/matrix/__init__.py +0 -1
- edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
- edsl/questions/templates/matrix/question_presentation.jinja +0 -20
- edsl/results/MarkdownToDocx.py +0 -122
- edsl/results/MarkdownToPDF.py +0 -111
- edsl/results/TextEditor.py +0 -50
- edsl/results/file_exports.py +0 -252
- edsl/results/smart_objects.py +0 -96
- edsl/results/table_data_class.py +0 -12
- edsl/results/table_renderers.py +0 -118
- edsl/scenarios/ConstructDownloadLink.py +0 -109
- edsl/scenarios/DocumentChunker.py +0 -102
- edsl/scenarios/DocxScenario.py +0 -16
- edsl/scenarios/PdfExtractor.py +0 -40
- edsl/scenarios/directory_scanner.py +0 -96
- edsl/scenarios/file_methods.py +0 -85
- edsl/scenarios/handlers/__init__.py +0 -13
- edsl/scenarios/handlers/csv.py +0 -49
- edsl/scenarios/handlers/docx.py +0 -76
- edsl/scenarios/handlers/html.py +0 -37
- edsl/scenarios/handlers/json.py +0 -111
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/scenarios/handlers/md.py +0 -51
- edsl/scenarios/handlers/pdf.py +0 -68
- edsl/scenarios/handlers/png.py +0 -39
- edsl/scenarios/handlers/pptx.py +0 -105
- edsl/scenarios/handlers/py.py +0 -294
- edsl/scenarios/handlers/sql.py +0 -313
- edsl/scenarios/handlers/sqlite.py +0 -149
- edsl/scenarios/handlers/txt.py +0 -33
- edsl/scenarios/scenario_selector.py +0 -156
- edsl/surveys/ConstructDAG.py +0 -92
- edsl/surveys/EditSurvey.py +0 -221
- edsl/surveys/InstructionHandler.py +0 -100
- edsl/surveys/MemoryManagement.py +0 -72
- edsl/surveys/RuleManager.py +0 -172
- edsl/surveys/Simulator.py +0 -75
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/utilities/PrettyList.py +0 -56
- edsl/utilities/is_notebook.py +0 -18
- edsl/utilities/is_valid_variable_name.py +0 -11
- edsl/utilities/remove_edsl_version.py +0 -24
- edsl-0.1.39.dist-info/RECORD +0 -358
- /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
- /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
- /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0
edsl/results/Result.py
CHANGED
@@ -1,61 +1,81 @@
|
|
1
1
|
# """This module contains the Result class, which captures the result of one interview."""
|
2
2
|
from __future__ import annotations
|
3
|
-
import inspect
|
4
3
|
from collections import UserDict
|
5
|
-
from typing import Any, Type, Callable, Optional
|
4
|
+
from typing import Any, Type, Callable, Optional
|
5
|
+
from collections import UserDict
|
6
6
|
from edsl.Base import Base
|
7
7
|
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
8
8
|
|
9
|
-
if TYPE_CHECKING:
|
10
|
-
from edsl.agents.Agent import Agent
|
11
|
-
from edsl.scenarios.Scenario import Scenario
|
12
|
-
from edsl.language_models.LanguageModel import LanguageModel
|
13
|
-
from edsl.prompts.Prompt import Prompt
|
14
|
-
from edsl.surveys.Survey import Survey
|
15
9
|
|
10
|
+
class PromptDict(UserDict):
|
11
|
+
"""A dictionary that is used to store the prompt for a given result."""
|
12
|
+
|
13
|
+
def rich_print(self):
|
14
|
+
"""Display an object as a table."""
|
15
|
+
from rich.table import Table
|
16
|
+
|
17
|
+
table = Table(title="")
|
18
|
+
table.add_column("Attribute", style="bold")
|
19
|
+
table.add_column("Value")
|
16
20
|
|
17
|
-
|
18
|
-
|
21
|
+
to_display = self
|
22
|
+
for attr_name, attr_value in to_display.items():
|
23
|
+
table.add_row(attr_name, repr(attr_value))
|
19
24
|
|
25
|
+
return table
|
20
26
|
|
21
|
-
class AgentNamer:
|
22
|
-
"""Maintains a registry of agent names to ensure unique naming."""
|
23
27
|
|
24
|
-
|
25
|
-
|
28
|
+
def agent_namer_closure():
|
29
|
+
"""Return a function that can be used to name an agent."""
|
30
|
+
agent_dict = {}
|
26
31
|
|
27
|
-
def
|
28
|
-
"""
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
32
|
+
def agent_namer(agent):
|
33
|
+
"""Return a name for an agent. If the agent has been named before, return the same name. Otherwise, return a new name."""
|
34
|
+
nonlocal agent_dict
|
35
|
+
agent_count = len(agent_dict)
|
36
|
+
if id(agent) in agent_dict:
|
37
|
+
return agent_dict[id(agent)]
|
38
|
+
else:
|
39
|
+
agent_dict[id(agent)] = f"Agent_{agent_count}"
|
40
|
+
return agent_dict[id(agent)]
|
41
|
+
|
42
|
+
return agent_namer
|
33
43
|
|
34
44
|
|
35
|
-
|
36
|
-
agent_namer = AgentNamer().get_name
|
45
|
+
agent_namer = agent_namer_closure()
|
37
46
|
|
38
47
|
|
39
48
|
class Result(Base, UserDict):
|
40
49
|
"""
|
41
50
|
This class captures the result of one interview.
|
51
|
+
|
52
|
+
The answer dictionary has the structure:
|
53
|
+
|
54
|
+
>>> import warnings
|
55
|
+
>>> warnings.simplefilter("ignore", UserWarning)
|
56
|
+
>>> Result.example().answer == {'how_feeling_yesterday': 'Great', 'how_feeling': 'OK'}
|
57
|
+
True
|
58
|
+
|
59
|
+
Its main data is an Agent, a Scenario, a Model, an Iteration, and an Answer.
|
60
|
+
These are stored both in the UserDict and as attributes.
|
61
|
+
|
62
|
+
|
42
63
|
"""
|
43
64
|
|
44
65
|
def __init__(
|
45
66
|
self,
|
46
67
|
agent: "Agent",
|
47
68
|
scenario: "Scenario",
|
48
|
-
model: "LanguageModel",
|
69
|
+
model: Type["LanguageModel"],
|
49
70
|
iteration: int,
|
50
|
-
answer:
|
51
|
-
prompt: dict[
|
52
|
-
raw_model_response
|
71
|
+
answer: str,
|
72
|
+
prompt: dict[str, str] = None,
|
73
|
+
raw_model_response=None,
|
53
74
|
survey: Optional["Survey"] = None,
|
54
|
-
question_to_attributes: Optional[dict
|
75
|
+
question_to_attributes: Optional[dict] = None,
|
55
76
|
generated_tokens: Optional[dict] = None,
|
56
77
|
comments_dict: Optional[dict] = None,
|
57
|
-
cache_used_dict: Optional[dict
|
58
|
-
indices: Optional[dict] = None,
|
78
|
+
cache_used_dict: Optional[dict] = None,
|
59
79
|
):
|
60
80
|
"""Initialize a Result object.
|
61
81
|
|
@@ -66,17 +86,26 @@ class Result(Base, UserDict):
|
|
66
86
|
:param answer: The answer string.
|
67
87
|
:param prompt: A dictionary of prompts.
|
68
88
|
:param raw_model_response: The raw model response.
|
69
|
-
:param survey: The Survey object.
|
70
|
-
:param question_to_attributes: A dictionary of question attributes.
|
71
|
-
:param generated_tokens: A dictionary of generated tokens.
|
72
|
-
:param comments_dict: A dictionary of comments.
|
73
|
-
:param cache_used_dict: A dictionary of cache usage.
|
74
|
-
:param indices: A dictionary of indices.
|
75
89
|
|
76
90
|
"""
|
77
|
-
|
78
|
-
question_to_attributes
|
79
|
-
|
91
|
+
if question_to_attributes is not None:
|
92
|
+
question_to_attributes = question_to_attributes
|
93
|
+
else:
|
94
|
+
question_to_attributes = {}
|
95
|
+
|
96
|
+
if survey is not None:
|
97
|
+
question_to_attributes = {
|
98
|
+
q.question_name: {
|
99
|
+
"question_text": q.question_text,
|
100
|
+
"question_type": q.question_type,
|
101
|
+
"question_options": (
|
102
|
+
None
|
103
|
+
if not hasattr(q, "question_options")
|
104
|
+
else q.question_options
|
105
|
+
),
|
106
|
+
}
|
107
|
+
for q in survey.questions
|
108
|
+
}
|
80
109
|
|
81
110
|
data = {
|
82
111
|
"agent": agent,
|
@@ -89,127 +118,81 @@ class Result(Base, UserDict):
|
|
89
118
|
"question_to_attributes": question_to_attributes,
|
90
119
|
"generated_tokens": generated_tokens or {},
|
91
120
|
"comments_dict": comments_dict or {},
|
92
|
-
"cache_used_dict": cache_used_dict or {},
|
93
121
|
}
|
94
122
|
super().__init__(**data)
|
95
|
-
|
96
|
-
self.
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
123
|
+
# but also store the data as attributes
|
124
|
+
self.agent = agent
|
125
|
+
self.scenario = scenario
|
126
|
+
self.model = model
|
127
|
+
self.iteration = iteration
|
128
|
+
self.answer = answer
|
129
|
+
self.prompt = prompt or {}
|
130
|
+
self.raw_model_response = raw_model_response or {}
|
131
|
+
self.survey = survey
|
132
|
+
self.question_to_attributes = question_to_attributes
|
133
|
+
self.generated_tokens = generated_tokens
|
134
|
+
self.comments_dict = comments_dict or {}
|
135
|
+
self.cache_used_dict = cache_used_dict or {}
|
136
|
+
|
137
|
+
self._combined_dict = None
|
138
|
+
self._problem_keys = None
|
139
|
+
|
140
|
+
def _repr_html_(self):
|
141
|
+
# d = self.to_dict(add_edsl_version=False)
|
142
|
+
d = self.to_dict(add_edsl_version=False)
|
143
|
+
data = [[k, v] for k, v in d.items()]
|
144
|
+
from tabulate import tabulate
|
145
|
+
|
146
|
+
table = str(tabulate(data, headers=["keys", "values"], tablefmt="html"))
|
147
|
+
return f"<pre>{table}</pre>"
|
148
|
+
|
149
|
+
###############
|
150
|
+
# Used in Results
|
151
|
+
###############
|
118
152
|
@property
|
119
|
-
def
|
120
|
-
"""Return the
|
121
|
-
|
122
|
-
|
123
|
-
@property
|
124
|
-
def scenario(self) -> "Scenario":
|
125
|
-
"""Return the Scenario object."""
|
126
|
-
return self.data["scenario"]
|
127
|
-
|
128
|
-
@property
|
129
|
-
def model(self) -> "LanguageModel":
|
130
|
-
"""Return the LanguageModel object."""
|
131
|
-
return self.data["model"]
|
132
|
-
|
133
|
-
@property
|
134
|
-
def answer(self) -> dict[QuestionName, AnswerValue]:
|
135
|
-
"""Return the answers."""
|
136
|
-
return self.data["answer"]
|
137
|
-
|
138
|
-
@staticmethod
|
139
|
-
def _create_agent_sub_dict(agent) -> dict:
|
140
|
-
"""Create a dictionary of agent details"""
|
141
|
-
if agent.name is None:
|
142
|
-
agent_name = agent_namer(agent)
|
153
|
+
def sub_dicts(self) -> dict[str, dict]:
|
154
|
+
"""Return a dictionary where keys are strings for each of the main class attributes/objects."""
|
155
|
+
if self.agent.name is None:
|
156
|
+
agent_name = agent_namer(self.agent)
|
143
157
|
else:
|
144
|
-
agent_name = agent.name
|
158
|
+
agent_name = self.agent.name
|
159
|
+
|
160
|
+
# comments_dict = {k: v for k, v in self.answer.items() if k.endswith("_comment")}
|
161
|
+
question_text_dict = {}
|
162
|
+
question_options_dict = {}
|
163
|
+
question_type_dict = {}
|
164
|
+
for key, _ in self.answer.items():
|
165
|
+
if key in self.question_to_attributes:
|
166
|
+
# You might be tempted to just use the naked key
|
167
|
+
# but this is a bad idea because it pollutes the namespace
|
168
|
+
question_text_dict[
|
169
|
+
key + "_question_text"
|
170
|
+
] = self.question_to_attributes[key]["question_text"]
|
171
|
+
question_options_dict[
|
172
|
+
key + "_question_options"
|
173
|
+
] = self.question_to_attributes[key]["question_options"]
|
174
|
+
question_type_dict[
|
175
|
+
key + "_question_type"
|
176
|
+
] = self.question_to_attributes[key]["question_type"]
|
145
177
|
|
146
178
|
return {
|
147
|
-
"agent": agent.traits
|
179
|
+
"agent": self.agent.traits
|
148
180
|
| {"agent_name": agent_name}
|
149
|
-
| {"agent_instruction": agent.instruction},
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
"
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
"iteration": {"iteration": iteration},
|
162
|
-
}
|
163
|
-
|
164
|
-
def _construct_sub_dicts(self) -> dict[str, dict]:
|
165
|
-
"""Construct a dictionary of sub-dictionaries for the Result object."""
|
166
|
-
sub_dicts_needing_new_keys = {
|
167
|
-
"question_text": {},
|
168
|
-
"question_options": {},
|
169
|
-
"question_type": {},
|
170
|
-
}
|
171
|
-
|
172
|
-
for question_name in self.data["answer"]:
|
173
|
-
if question_name in self.question_to_attributes:
|
174
|
-
for dictionary_name in sub_dicts_needing_new_keys:
|
175
|
-
new_key = question_name + "_" + dictionary_name
|
176
|
-
sub_dicts_needing_new_keys[dictionary_name][new_key] = (
|
177
|
-
self.question_to_attributes[question_name][dictionary_name]
|
178
|
-
)
|
179
|
-
|
180
|
-
new_cache_dict = {
|
181
|
-
f"{k}_cache_used": v for k, v in self.data["cache_used_dict"].items()
|
181
|
+
| {"agent_instruction": self.agent.instruction},
|
182
|
+
"scenario": self.scenario,
|
183
|
+
"model": self.model.parameters | {"model": self.model.model},
|
184
|
+
"answer": self.answer,
|
185
|
+
"prompt": self.prompt,
|
186
|
+
"raw_model_response": self.raw_model_response,
|
187
|
+
"iteration": {"iteration": self.iteration},
|
188
|
+
"question_text": question_text_dict,
|
189
|
+
"question_options": question_options_dict,
|
190
|
+
"question_type": question_type_dict,
|
191
|
+
"comment": self.comments_dict,
|
192
|
+
"generated_tokens": self.generated_tokens,
|
182
193
|
}
|
183
194
|
|
184
|
-
|
185
|
-
**self._create_agent_sub_dict(self.data["agent"]),
|
186
|
-
**self._create_model_sub_dict(self.data["model"]),
|
187
|
-
**self._iteration_sub_dict(self.data["iteration"]),
|
188
|
-
"scenario": self.data["scenario"],
|
189
|
-
"answer": self.data["answer"],
|
190
|
-
"prompt": self.data["prompt"],
|
191
|
-
"comment": self.data["comments_dict"],
|
192
|
-
"generated_tokens": self.data["generated_tokens"],
|
193
|
-
"raw_model_response": self.data["raw_model_response"],
|
194
|
-
"question_text": sub_dicts_needing_new_keys["question_text"],
|
195
|
-
"question_options": sub_dicts_needing_new_keys["question_options"],
|
196
|
-
"question_type": sub_dicts_needing_new_keys["question_type"],
|
197
|
-
"cache_used": new_cache_dict,
|
198
|
-
}
|
199
|
-
if hasattr(self, "indices") and self.indices is not None:
|
200
|
-
d["agent"].update({"agent_index": self.indices["agent"]})
|
201
|
-
d["scenario"].update({"scenario_index": self.indices["scenario"]})
|
202
|
-
d["model"].update({"model_index": self.indices["model"]})
|
203
|
-
return d
|
204
|
-
|
205
|
-
@property
|
206
|
-
def sub_dicts(self) -> dict[str, dict]:
|
207
|
-
"""Return a dictionary where keys are strings for each of the main class attributes/objects."""
|
208
|
-
if self._sub_dicts is None:
|
209
|
-
self._sub_dicts = self._construct_sub_dicts()
|
210
|
-
return self._sub_dicts
|
211
|
-
|
212
|
-
def check_expression(self, expression: str) -> None:
|
195
|
+
def check_expression(self, expression) -> None:
|
213
196
|
for key in self.problem_keys:
|
214
197
|
if key in expression and not key + "." in expression:
|
215
198
|
raise ValueError(
|
@@ -222,13 +205,11 @@ class Result(Base, UserDict):
|
|
222
205
|
raise NotImplementedError
|
223
206
|
|
224
207
|
@property
|
225
|
-
def problem_keys(self)
|
208
|
+
def problem_keys(self):
|
226
209
|
"""Return a list of keys that are problematic."""
|
227
210
|
return self._problem_keys
|
228
211
|
|
229
|
-
def _compute_combined_dict_and_problem_keys(
|
230
|
-
self,
|
231
|
-
) -> tuple[dict[str, Any], list[str]]:
|
212
|
+
def _compute_combined_dict_and_problem_keys(self) -> None:
|
232
213
|
combined = {}
|
233
214
|
problem_keys = []
|
234
215
|
for key, sub_dict in self.sub_dicts.items():
|
@@ -241,7 +222,8 @@ class Result(Base, UserDict):
|
|
241
222
|
combined.update({key: sub_dict})
|
242
223
|
# I *think* this allows us to do do things like "answer.how_feelling" i.e., that the evaluator can use
|
243
224
|
# dot notation to access the subdicts.
|
244
|
-
|
225
|
+
self._combined_dict = combined
|
226
|
+
self._problem_keys = problem_keys
|
245
227
|
|
246
228
|
@property
|
247
229
|
def combined_dict(self) -> dict[str, Any]:
|
@@ -252,14 +234,11 @@ class Result(Base, UserDict):
|
|
252
234
|
'OK'
|
253
235
|
"""
|
254
236
|
if self._combined_dict is None or self._problem_keys is None:
|
255
|
-
(
|
256
|
-
self._combined_dict,
|
257
|
-
self._problem_keys,
|
258
|
-
) = self._compute_combined_dict_and_problem_keys()
|
237
|
+
self._compute_combined_dict_and_problem_keys()
|
259
238
|
return self._combined_dict
|
260
239
|
|
261
240
|
@property
|
262
|
-
def problem_keys(self)
|
241
|
+
def problem_keys(self):
|
263
242
|
"""Return a list of keys that are problematic."""
|
264
243
|
if self._combined_dict is None or self._problem_keys is None:
|
265
244
|
self._compute_combined_dict_and_problem_keys()
|
@@ -299,6 +278,7 @@ class Result(Base, UserDict):
|
|
299
278
|
)
|
300
279
|
problem_keys.append((key, data_type))
|
301
280
|
key = f"{key}_{data_type}"
|
281
|
+
# raise ValueError(f"Key '{key}' is already in the dictionary")
|
302
282
|
d[key] = data_type
|
303
283
|
|
304
284
|
for key, data_type in problem_keys:
|
@@ -307,16 +287,37 @@ class Result(Base, UserDict):
|
|
307
287
|
].pop(key)
|
308
288
|
return d
|
309
289
|
|
310
|
-
def
|
311
|
-
"""Return a
|
290
|
+
def rows(self, index) -> tuple[int, str, str, str]:
|
291
|
+
"""Return a generator of rows for the Result object."""
|
292
|
+
for data_type, subdict in self.sub_dicts.items():
|
293
|
+
for key, value in subdict.items():
|
294
|
+
yield (index, data_type, key, str(value))
|
295
|
+
|
296
|
+
def leaves(self):
|
297
|
+
leaves = []
|
298
|
+
for question_name, answer in self.answer.items():
|
299
|
+
if not question_name.endswith("_comment"):
|
300
|
+
leaves.append(
|
301
|
+
{
|
302
|
+
"question": f"({question_name}): "
|
303
|
+
+ str(
|
304
|
+
self.question_to_attributes[question_name]["question_text"]
|
305
|
+
),
|
306
|
+
"answer": answer,
|
307
|
+
"comment": self.answer.get(question_name + "_comment", ""),
|
308
|
+
"scenario": repr(self.scenario),
|
309
|
+
"agent": repr(self.agent),
|
310
|
+
"model": repr(self.model),
|
311
|
+
"iteration": self.iteration,
|
312
|
+
}
|
313
|
+
)
|
314
|
+
return leaves
|
312
315
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
False
|
319
|
-
"""
|
316
|
+
###############
|
317
|
+
# Useful
|
318
|
+
###############
|
319
|
+
def copy(self) -> Result:
|
320
|
+
"""Return a copy of the Result object."""
|
320
321
|
return Result.from_dict(self.to_dict())
|
321
322
|
|
322
323
|
def __eq__(self, other) -> bool:
|
@@ -327,16 +328,17 @@ class Result(Base, UserDict):
|
|
327
328
|
True
|
328
329
|
|
329
330
|
"""
|
330
|
-
return
|
331
|
+
return self.to_dict() == other.to_dict()
|
331
332
|
|
332
|
-
|
333
|
-
|
334
|
-
|
333
|
+
###############
|
334
|
+
# Serialization
|
335
|
+
###############
|
336
|
+
def to_dict(self, add_edsl_version=True) -> dict[str, Any]:
|
335
337
|
"""Return a dictionary representation of the Result object.
|
336
338
|
|
337
339
|
>>> r = Result.example()
|
338
340
|
>>> r.to_dict()['scenario']
|
339
|
-
{'period': 'morning', '
|
341
|
+
{'period': 'morning', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}
|
340
342
|
"""
|
341
343
|
|
342
344
|
def convert_value(value, add_edsl_version=True):
|
@@ -364,26 +366,21 @@ class Result(Base, UserDict):
|
|
364
366
|
d["edsl_version"] = __version__
|
365
367
|
d["edsl_class_name"] = "Result"
|
366
368
|
|
367
|
-
if include_cache_info:
|
368
|
-
d["cache_used_dict"] = self.data["cache_used_dict"]
|
369
|
-
else:
|
370
|
-
d.pop("cache_used_dict", None)
|
371
|
-
|
372
369
|
return d
|
373
370
|
|
374
371
|
def __hash__(self):
|
375
372
|
"""Return a hash of the Result object."""
|
376
373
|
from edsl.utilities.utilities import dict_hash
|
377
374
|
|
378
|
-
return dict_hash(self.to_dict(add_edsl_version=False
|
375
|
+
return dict_hash(self.to_dict(add_edsl_version=False))
|
379
376
|
|
380
377
|
@classmethod
|
381
378
|
@remove_edsl_version
|
382
379
|
def from_dict(self, json_dict: dict) -> Result:
|
383
380
|
"""Return a Result object from a dictionary representation."""
|
384
381
|
|
385
|
-
from edsl
|
386
|
-
from edsl
|
382
|
+
from edsl import Agent
|
383
|
+
from edsl import Scenario
|
387
384
|
from edsl.language_models.LanguageModel import LanguageModel
|
388
385
|
from edsl.prompts.Prompt import Prompt
|
389
386
|
|
@@ -405,34 +402,51 @@ class Result(Base, UserDict):
|
|
405
402
|
question_to_attributes=json_dict.get("question_to_attributes", None),
|
406
403
|
generated_tokens=json_dict.get("generated_tokens", {}),
|
407
404
|
comments_dict=json_dict.get("comments_dict", {}),
|
408
|
-
cache_used_dict=json_dict.get("cache_used_dict", {}),
|
409
405
|
)
|
410
406
|
return result
|
411
407
|
|
408
|
+
def rich_print(self) -> None:
|
409
|
+
"""Display an object as a table."""
|
410
|
+
# from edsl.utilities import print_dict_with_rich
|
411
|
+
from rich import print
|
412
|
+
from rich.table import Table
|
413
|
+
|
414
|
+
table = Table(title="Result")
|
415
|
+
table.add_column("Attribute", style="bold")
|
416
|
+
table.add_column("Value")
|
417
|
+
|
418
|
+
to_display = self.__dict__.copy()
|
419
|
+
data = to_display.pop("data", None)
|
420
|
+
for attr_name, attr_value in to_display.items():
|
421
|
+
if hasattr(attr_value, "rich_print"):
|
422
|
+
table.add_row(attr_name, attr_value.rich_print())
|
423
|
+
elif isinstance(attr_value, dict):
|
424
|
+
a = PromptDict(attr_value)
|
425
|
+
table.add_row(attr_name, a.rich_print())
|
426
|
+
else:
|
427
|
+
table.add_row(attr_name, repr(attr_value))
|
428
|
+
return table
|
429
|
+
|
412
430
|
def __repr__(self):
|
413
431
|
"""Return a string representation of the Result object."""
|
414
|
-
|
415
|
-
return f"{self.__class__.__name__}({params})"
|
432
|
+
return f"Result(agent={repr(self.agent)}, scenario={repr(self.scenario)}, model={repr(self.model)}, iteration={self.iteration}, answer={repr(self.answer)}, prompt={repr(self.prompt)})"
|
416
433
|
|
417
434
|
@classmethod
|
418
435
|
def example(cls):
|
419
|
-
"""Return an example Result object.
|
420
|
-
|
421
|
-
>>> Result.example()
|
422
|
-
Result(...)
|
423
|
-
|
424
|
-
"""
|
436
|
+
"""Return an example Result object."""
|
425
437
|
from edsl.results.Results import Results
|
426
438
|
|
427
439
|
return Results.example()[0]
|
428
440
|
|
429
|
-
def score(self, scoring_function: Callable) ->
|
441
|
+
def score(self, scoring_function: Callable) -> Any:
|
430
442
|
"""Score the result using a passed-in scoring function.
|
431
443
|
|
432
444
|
>>> def f(status): return 1 if status == 'Joyful' else 0
|
433
445
|
>>> Result.example().score(f)
|
434
446
|
1
|
435
447
|
"""
|
448
|
+
import inspect
|
449
|
+
|
436
450
|
signature = inspect.signature(scoring_function)
|
437
451
|
params = {}
|
438
452
|
for k, v in signature.parameters.items():
|
@@ -444,112 +458,6 @@ class Result(Base, UserDict):
|
|
444
458
|
raise ValueError(f"Parameter {k} not found in Result object")
|
445
459
|
return scoring_function(**params)
|
446
460
|
|
447
|
-
@classmethod
|
448
|
-
def from_interview(
|
449
|
-
cls, interview, extracted_answers, model_response_objects
|
450
|
-
) -> Result:
|
451
|
-
"""Return a Result object from an interview dictionary."""
|
452
|
-
|
453
|
-
def get_question_results(
|
454
|
-
model_response_objects,
|
455
|
-
) -> dict[str, "EDSLResultObjectInput"]:
|
456
|
-
"""Maps the question name to the EDSLResultObjectInput."""
|
457
|
-
question_results = {}
|
458
|
-
for result in model_response_objects:
|
459
|
-
question_results[result.question_name] = result
|
460
|
-
return question_results
|
461
|
-
|
462
|
-
def get_generated_tokens_dict(answer_key_names) -> dict[str, str]:
|
463
|
-
generated_tokens_dict = {
|
464
|
-
k + "_generated_tokens": question_results[k].generated_tokens
|
465
|
-
for k in answer_key_names
|
466
|
-
}
|
467
|
-
return generated_tokens_dict
|
468
|
-
|
469
|
-
def get_comments_dict(answer_key_names) -> dict[str, str]:
|
470
|
-
comments_dict = {
|
471
|
-
k + "_comment": question_results[k].comment for k in answer_key_names
|
472
|
-
}
|
473
|
-
return comments_dict
|
474
|
-
|
475
|
-
def get_question_name_to_prompts(
|
476
|
-
model_response_objects,
|
477
|
-
) -> dict[str, dict[str, str]]:
|
478
|
-
question_name_to_prompts = dict({})
|
479
|
-
for result in model_response_objects:
|
480
|
-
question_name = result.question_name
|
481
|
-
question_name_to_prompts[question_name] = {
|
482
|
-
"user_prompt": result.prompts["user_prompt"],
|
483
|
-
"system_prompt": result.prompts["system_prompt"],
|
484
|
-
}
|
485
|
-
return question_name_to_prompts
|
486
|
-
|
487
|
-
def get_prompt_dictionary(answer_key_names, question_name_to_prompts):
|
488
|
-
prompt_dictionary = {}
|
489
|
-
for answer_key_name in answer_key_names:
|
490
|
-
prompt_dictionary[answer_key_name + "_user_prompt"] = (
|
491
|
-
question_name_to_prompts[answer_key_name]["user_prompt"]
|
492
|
-
)
|
493
|
-
prompt_dictionary[answer_key_name + "_system_prompt"] = (
|
494
|
-
question_name_to_prompts[answer_key_name]["system_prompt"]
|
495
|
-
)
|
496
|
-
return prompt_dictionary
|
497
|
-
|
498
|
-
def get_raw_model_results_and_cache_used_dictionary(model_response_objects):
|
499
|
-
raw_model_results_dictionary = {}
|
500
|
-
cache_used_dictionary = {}
|
501
|
-
for result in model_response_objects:
|
502
|
-
question_name = result.question_name
|
503
|
-
raw_model_results_dictionary[question_name + "_raw_model_response"] = (
|
504
|
-
result.raw_model_response
|
505
|
-
)
|
506
|
-
raw_model_results_dictionary[question_name + "_cost"] = result.cost
|
507
|
-
one_use_buys = (
|
508
|
-
"NA"
|
509
|
-
if isinstance(result.cost, str)
|
510
|
-
or result.cost == 0
|
511
|
-
or result.cost is None
|
512
|
-
else 1.0 / result.cost
|
513
|
-
)
|
514
|
-
raw_model_results_dictionary[question_name + "_one_usd_buys"] = (
|
515
|
-
one_use_buys
|
516
|
-
)
|
517
|
-
cache_used_dictionary[question_name] = result.cache_used
|
518
|
-
|
519
|
-
return raw_model_results_dictionary, cache_used_dictionary
|
520
|
-
|
521
|
-
question_results = get_question_results(model_response_objects)
|
522
|
-
answer_key_names = list(question_results.keys())
|
523
|
-
generated_tokens_dict = get_generated_tokens_dict(answer_key_names)
|
524
|
-
comments_dict = get_comments_dict(answer_key_names)
|
525
|
-
answer_dict = {k: extracted_answers[k] for k in answer_key_names}
|
526
|
-
|
527
|
-
question_name_to_prompts = get_question_name_to_prompts(model_response_objects)
|
528
|
-
prompt_dictionary = get_prompt_dictionary(
|
529
|
-
answer_key_names, question_name_to_prompts
|
530
|
-
)
|
531
|
-
raw_model_results_dictionary, cache_used_dictionary = (
|
532
|
-
get_raw_model_results_and_cache_used_dictionary(model_response_objects)
|
533
|
-
)
|
534
|
-
|
535
|
-
result = cls(
|
536
|
-
agent=interview.agent,
|
537
|
-
scenario=interview.scenario,
|
538
|
-
model=interview.model,
|
539
|
-
iteration=interview.iteration,
|
540
|
-
# Computed objects
|
541
|
-
answer=answer_dict,
|
542
|
-
prompt=prompt_dictionary,
|
543
|
-
raw_model_response=raw_model_results_dictionary,
|
544
|
-
survey=interview.survey,
|
545
|
-
generated_tokens=generated_tokens_dict,
|
546
|
-
comments_dict=comments_dict,
|
547
|
-
cache_used_dict=cache_used_dictionary,
|
548
|
-
indices=interview.indices,
|
549
|
-
)
|
550
|
-
result.interview_hash = interview.initial_hash
|
551
|
-
return result
|
552
|
-
|
553
461
|
|
554
462
|
if __name__ == "__main__":
|
555
463
|
import doctest
|