edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +107 -30
- edsl/BaseDiff.py +260 -0
- edsl/__init__.py +25 -21
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +103 -46
- edsl/agents/AgentList.py +97 -13
- edsl/agents/Invigilator.py +23 -10
- edsl/agents/InvigilatorBase.py +19 -14
- edsl/agents/PromptConstructionMixin.py +342 -100
- edsl/agents/descriptors.py +5 -2
- edsl/base/Base.py +289 -0
- edsl/config.py +2 -1
- edsl/conjure/AgentConstructionMixin.py +152 -0
- edsl/conjure/Conjure.py +56 -0
- edsl/conjure/InputData.py +659 -0
- edsl/conjure/InputDataCSV.py +48 -0
- edsl/conjure/InputDataMixinQuestionStats.py +182 -0
- edsl/conjure/InputDataPyRead.py +91 -0
- edsl/conjure/InputDataSPSS.py +8 -0
- edsl/conjure/InputDataStata.py +8 -0
- edsl/conjure/QuestionOptionMixin.py +76 -0
- edsl/conjure/QuestionTypeMixin.py +23 -0
- edsl/conjure/RawQuestion.py +65 -0
- edsl/conjure/SurveyResponses.py +7 -0
- edsl/conjure/__init__.py +9 -4
- edsl/conjure/examples/placeholder.txt +0 -0
- edsl/conjure/naming_utilities.py +263 -0
- edsl/conjure/utilities.py +165 -28
- edsl/conversation/Conversation.py +238 -0
- edsl/conversation/car_buying.py +58 -0
- edsl/conversation/mug_negotiation.py +81 -0
- edsl/conversation/next_speaker_utilities.py +93 -0
- edsl/coop/coop.py +337 -121
- edsl/coop/utils.py +56 -70
- edsl/data/Cache.py +74 -22
- edsl/data/CacheHandler.py +10 -9
- edsl/data/SQLiteDict.py +11 -3
- edsl/inference_services/AnthropicService.py +1 -0
- edsl/inference_services/DeepInfraService.py +20 -13
- edsl/inference_services/GoogleService.py +7 -1
- edsl/inference_services/InferenceServicesCollection.py +33 -7
- edsl/inference_services/OpenAIService.py +17 -10
- edsl/inference_services/models_available_cache.py +69 -0
- edsl/inference_services/rate_limits_cache.py +25 -0
- edsl/inference_services/write_available.py +10 -0
- edsl/jobs/Answers.py +15 -1
- edsl/jobs/Jobs.py +322 -73
- edsl/jobs/buckets/BucketCollection.py +9 -3
- edsl/jobs/buckets/ModelBuckets.py +4 -2
- edsl/jobs/buckets/TokenBucket.py +1 -2
- edsl/jobs/interviews/Interview.py +7 -10
- edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
- edsl/jobs/interviews/retry_management.py +4 -4
- edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
- edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
- edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
- edsl/jobs/tasks/TaskHistory.py +4 -3
- edsl/language_models/LanguageModel.py +42 -55
- edsl/language_models/ModelList.py +96 -0
- edsl/language_models/registry.py +14 -0
- edsl/language_models/repair.py +97 -25
- edsl/notebooks/Notebook.py +157 -32
- edsl/prompts/Prompt.py +31 -19
- edsl/questions/QuestionBase.py +145 -23
- edsl/questions/QuestionBudget.py +5 -6
- edsl/questions/QuestionCheckBox.py +7 -3
- edsl/questions/QuestionExtract.py +5 -3
- edsl/questions/QuestionFreeText.py +3 -3
- edsl/questions/QuestionFunctional.py +0 -3
- edsl/questions/QuestionList.py +3 -4
- edsl/questions/QuestionMultipleChoice.py +16 -8
- edsl/questions/QuestionNumerical.py +4 -3
- edsl/questions/QuestionRank.py +5 -3
- edsl/questions/__init__.py +4 -3
- edsl/questions/descriptors.py +9 -4
- edsl/questions/question_registry.py +27 -31
- edsl/questions/settings.py +1 -1
- edsl/results/Dataset.py +31 -0
- edsl/results/DatasetExportMixin.py +493 -0
- edsl/results/Result.py +42 -82
- edsl/results/Results.py +178 -66
- edsl/results/ResultsDBMixin.py +10 -9
- edsl/results/ResultsExportMixin.py +23 -507
- edsl/results/ResultsGGMixin.py +3 -3
- edsl/results/ResultsToolsMixin.py +9 -9
- edsl/scenarios/FileStore.py +140 -0
- edsl/scenarios/Scenario.py +59 -6
- edsl/scenarios/ScenarioList.py +138 -52
- edsl/scenarios/ScenarioListExportMixin.py +32 -0
- edsl/scenarios/ScenarioListPdfMixin.py +2 -1
- edsl/scenarios/__init__.py +1 -0
- edsl/study/ObjectEntry.py +173 -0
- edsl/study/ProofOfWork.py +113 -0
- edsl/study/SnapShot.py +73 -0
- edsl/study/Study.py +498 -0
- edsl/study/__init__.py +4 -0
- edsl/surveys/MemoryPlan.py +11 -4
- edsl/surveys/Survey.py +124 -37
- edsl/surveys/SurveyExportMixin.py +25 -5
- edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
- edsl/tools/plotting.py +4 -2
- edsl/utilities/__init__.py +21 -20
- edsl/utilities/gcp_bucket/__init__.py +0 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
- edsl/utilities/gcp_bucket/simple_example.py +9 -0
- edsl/utilities/interface.py +90 -73
- edsl/utilities/repair_functions.py +28 -0
- edsl/utilities/utilities.py +59 -6
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
- edsl-0.1.29.dist-info/RECORD +203 -0
- edsl/conjure/RawResponseColumn.py +0 -327
- edsl/conjure/SurveyBuilder.py +0 -308
- edsl/conjure/SurveyBuilderCSV.py +0 -78
- edsl/conjure/SurveyBuilderSPSS.py +0 -118
- edsl/data/RemoteDict.py +0 -103
- edsl-0.1.27.dev2.dist-info/RECORD +0 -172
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
edsl/results/Result.py
CHANGED
@@ -3,16 +3,7 @@ from __future__ import annotations
|
|
3
3
|
from collections import UserDict
|
4
4
|
from typing import Any, Type, Callable, Optional
|
5
5
|
from collections import UserDict
|
6
|
-
|
7
|
-
from rich.table import Table
|
8
|
-
|
9
|
-
from IPython.display import display
|
10
|
-
|
11
|
-
from edsl.agents import Agent
|
12
|
-
from edsl.language_models import LanguageModel
|
13
|
-
from edsl.scenarios import Scenario
|
14
6
|
from edsl.Base import Base
|
15
|
-
from edsl.prompts import Prompt
|
16
7
|
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
17
8
|
|
18
9
|
|
@@ -21,6 +12,8 @@ class PromptDict(UserDict):
|
|
21
12
|
|
22
13
|
def rich_print(self):
|
23
14
|
"""Display an object as a table."""
|
15
|
+
from rich.table import Table
|
16
|
+
|
24
17
|
table = Table(title="")
|
25
18
|
table.add_column("Attribute", style="bold")
|
26
19
|
table.add_column("Value")
|
@@ -58,6 +51,8 @@ class Result(Base, UserDict):
|
|
58
51
|
|
59
52
|
The answer dictionary has the structure:
|
60
53
|
|
54
|
+
>>> import warnings
|
55
|
+
>>> warnings.simplefilter("ignore", UserWarning)
|
61
56
|
>>> Result.example().answer
|
62
57
|
{'how_feeling': 'OK', 'how_feeling_comment': 'This is a real survey response from a human.', 'how_feeling_yesterday': 'Great', 'how_feeling_yesterday_comment': 'This is a real survey response from a human.'}
|
63
58
|
|
@@ -69,9 +64,9 @@ class Result(Base, UserDict):
|
|
69
64
|
|
70
65
|
def __init__(
|
71
66
|
self,
|
72
|
-
agent: Agent,
|
73
|
-
scenario: Scenario,
|
74
|
-
model: Type[LanguageModel],
|
67
|
+
agent: "Agent",
|
68
|
+
scenario: "Scenario",
|
69
|
+
model: Type["LanguageModel"],
|
75
70
|
iteration: int,
|
76
71
|
answer: str,
|
77
72
|
prompt: dict[str, str] = None,
|
@@ -150,15 +145,15 @@ class Result(Base, UserDict):
|
|
150
145
|
if key in self.question_to_attributes:
|
151
146
|
# You might be tempted to just use the naked key
|
152
147
|
# but this is a bad idea because it pollutes the namespace
|
153
|
-
question_text_dict[
|
154
|
-
key
|
155
|
-
|
156
|
-
question_options_dict[
|
157
|
-
key
|
158
|
-
|
159
|
-
question_type_dict[
|
160
|
-
key
|
161
|
-
|
148
|
+
question_text_dict[key + "_question_text"] = (
|
149
|
+
self.question_to_attributes[key]["question_text"]
|
150
|
+
)
|
151
|
+
question_options_dict[key + "_question_options"] = (
|
152
|
+
self.question_to_attributes[key]["question_options"]
|
153
|
+
)
|
154
|
+
question_type_dict[key + "_question_type"] = (
|
155
|
+
self.question_to_attributes[key]["question_type"]
|
156
|
+
)
|
162
157
|
|
163
158
|
return {
|
164
159
|
"agent": self.agent.traits
|
@@ -237,14 +232,8 @@ class Result(Base, UserDict):
|
|
237
232
|
###############
|
238
233
|
# Serialization
|
239
234
|
###############
|
240
|
-
|
241
|
-
|
242
|
-
"""Return a dictionary representation of the Result object.
|
243
|
-
|
244
|
-
>>> r = Result.example()
|
245
|
-
>>> r.to_dict()['scenario']
|
246
|
-
{'period': 'morning', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}
|
247
|
-
"""
|
235
|
+
def _to_dict(self) -> dict[str, Any]:
|
236
|
+
"""Return a dictionary representation of the Result object."""
|
248
237
|
d = {}
|
249
238
|
for key, value in self.items():
|
250
239
|
if hasattr(value, "to_dict"):
|
@@ -262,10 +251,32 @@ class Result(Base, UserDict):
|
|
262
251
|
d[key] = new_prompt_dict
|
263
252
|
return d
|
264
253
|
|
254
|
+
@add_edsl_version
|
255
|
+
def to_dict(self) -> dict[str, Any]:
|
256
|
+
"""Return a dictionary representation of the Result object.
|
257
|
+
|
258
|
+
>>> r = Result.example()
|
259
|
+
>>> r.to_dict()['scenario']
|
260
|
+
{'period': 'morning', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}
|
261
|
+
"""
|
262
|
+
return self._to_dict()
|
263
|
+
|
264
|
+
def __hash__(self):
|
265
|
+
"""Return a hash of the Result object."""
|
266
|
+
from edsl.utilities.utilities import dict_hash
|
267
|
+
|
268
|
+
return dict_hash(self._to_dict())
|
269
|
+
|
265
270
|
@classmethod
|
266
271
|
@remove_edsl_version
|
267
272
|
def from_dict(self, json_dict: dict) -> Result:
|
268
273
|
"""Return a Result object from a dictionary representation."""
|
274
|
+
|
275
|
+
from edsl import Agent
|
276
|
+
from edsl import Scenario
|
277
|
+
from edsl.language_models.LanguageModel import LanguageModel
|
278
|
+
from edsl.prompts.Prompt import Prompt
|
279
|
+
|
269
280
|
prompt_data = json_dict.get("prompt", {})
|
270
281
|
prompt_d = {}
|
271
282
|
for prompt_name, prompt_obj in prompt_data.items():
|
@@ -289,6 +300,7 @@ class Result(Base, UserDict):
|
|
289
300
|
"""Display an object as a table."""
|
290
301
|
# from edsl.utilities import print_dict_with_rich
|
291
302
|
from rich import print
|
303
|
+
from rich.table import Table
|
292
304
|
|
293
305
|
table = Table(title="Result")
|
294
306
|
table.add_column("Attribute", style="bold")
|
@@ -313,7 +325,7 @@ class Result(Base, UserDict):
|
|
313
325
|
@classmethod
|
314
326
|
def example(cls):
|
315
327
|
"""Return an example Result object."""
|
316
|
-
from edsl.results import Results
|
328
|
+
from edsl.results.Results import Results
|
317
329
|
|
318
330
|
return Results.example()[0]
|
319
331
|
|
@@ -338,59 +350,7 @@ class Result(Base, UserDict):
|
|
338
350
|
return scoring_function(**params)
|
339
351
|
|
340
352
|
|
341
|
-
def main():
|
342
|
-
"""Run the main function."""
|
343
|
-
from edsl.results.Result import Result
|
344
|
-
import json
|
345
|
-
|
346
|
-
print("Being imported")
|
347
|
-
json_string = """
|
348
|
-
{
|
349
|
-
"agent": {
|
350
|
-
"traits": {
|
351
|
-
"status": "Unhappy"
|
352
|
-
}
|
353
|
-
},
|
354
|
-
"scenario": {
|
355
|
-
"period": "morning"
|
356
|
-
},
|
357
|
-
"model": {
|
358
|
-
"model": "gpt-3.5-turbo",
|
359
|
-
"parameters": {
|
360
|
-
"temperature": 0.5,
|
361
|
-
"max_tokens": 1000,
|
362
|
-
"top_p": 1,
|
363
|
-
"frequency_penalty": 0,
|
364
|
-
"presence_penalty": 0,
|
365
|
-
"use_cache": true
|
366
|
-
}
|
367
|
-
},
|
368
|
-
"iteration": 0,
|
369
|
-
"answer": {
|
370
|
-
"how_feeling": "Bad"
|
371
|
-
},
|
372
|
-
"prompt": {"how_feeling_user_prompt": "How are you feeling today?", "how_feeling_system_prompt": "Answer the question"}
|
373
|
-
}
|
374
|
-
"""
|
375
|
-
|
376
|
-
result = Result.from_dict(json.loads(json_string))
|
377
|
-
|
378
|
-
result.sub_dicts
|
379
|
-
assert result.combined_dict["how_feeling"] == "Bad"
|
380
|
-
|
381
|
-
result.combined_dict
|
382
|
-
assert result.get_value("answer", "how_feeling") == "Bad"
|
383
|
-
|
384
|
-
result.key_to_data_type
|
385
|
-
print(result)
|
386
|
-
|
387
|
-
assert result == result.copy()
|
388
|
-
|
389
|
-
result.to_dict()
|
390
|
-
|
391
|
-
|
392
353
|
if __name__ == "__main__":
|
393
|
-
# print(Result.example())
|
394
354
|
import doctest
|
395
355
|
|
396
356
|
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
edsl/results/Results.py
CHANGED
@@ -7,12 +7,7 @@ from __future__ import annotations
|
|
7
7
|
import json
|
8
8
|
import random
|
9
9
|
from collections import UserList, defaultdict
|
10
|
-
from typing import Optional, Callable, Any, Type, Union
|
11
|
-
|
12
|
-
from pygments import highlight
|
13
|
-
from pygments.lexers import JsonLexer
|
14
|
-
from pygments.formatters import HtmlFormatter
|
15
|
-
from IPython.display import HTML
|
10
|
+
from typing import Optional, Callable, Any, Type, Union, List
|
16
11
|
|
17
12
|
from simpleeval import EvalWithCompoundTypes
|
18
13
|
|
@@ -23,28 +18,17 @@ from edsl.exceptions.results import (
|
|
23
18
|
ResultsMutateError,
|
24
19
|
ResultsFilterError,
|
25
20
|
)
|
26
|
-
from edsl.agents import Agent, AgentList
|
27
|
-
from edsl.language_models.LanguageModel import LanguageModel
|
28
|
-
from edsl.results.Dataset import Dataset
|
29
|
-
from edsl.results.Result import Result
|
30
|
-
from edsl.results.ResultsExportMixin import ResultsExportMixin
|
31
|
-
from edsl.scenarios import Scenario
|
32
|
-
from edsl.scenarios.ScenarioList import ScenarioList
|
33
|
-
from edsl.surveys import Survey
|
34
|
-
from edsl.data.Cache import Cache
|
35
|
-
from edsl.utilities import (
|
36
|
-
is_valid_variable_name,
|
37
|
-
shorten_string,
|
38
|
-
)
|
39
|
-
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
40
21
|
|
22
|
+
from edsl.results.ResultsExportMixin import ResultsExportMixin
|
41
23
|
from edsl.results.ResultsToolsMixin import ResultsToolsMixin
|
42
|
-
|
43
24
|
from edsl.results.ResultsDBMixin import ResultsDBMixin
|
44
25
|
from edsl.results.ResultsGGMixin import ResultsGGMixin
|
26
|
+
from edsl.results.ResultsFetchMixin import ResultsFetchMixin
|
27
|
+
|
28
|
+
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
29
|
+
from edsl.utilities.utilities import dict_hash
|
45
30
|
|
46
31
|
from edsl.Base import Base
|
47
|
-
from edsl.results.ResultsFetchMixin import ResultsFetchMixin
|
48
32
|
|
49
33
|
|
50
34
|
class Mixins(
|
@@ -54,7 +38,22 @@ class Mixins(
|
|
54
38
|
ResultsGGMixin,
|
55
39
|
ResultsToolsMixin,
|
56
40
|
):
|
57
|
-
|
41
|
+
def print_long(self, max_rows=None) -> None:
|
42
|
+
"""Print the results in long format.
|
43
|
+
|
44
|
+
>>> from edsl.results import Results
|
45
|
+
>>> r = Results.example()
|
46
|
+
>>> r.select('how_feeling').print_long(max_rows = 2)
|
47
|
+
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┓
|
48
|
+
┃ Result index ┃ Key ┃ Value ┃
|
49
|
+
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━┩
|
50
|
+
│ 0 │ how_feeling │ OK │
|
51
|
+
│ 1 │ how_feeling │ Great │
|
52
|
+
└──────────────┴─────────────┴───────┘
|
53
|
+
"""
|
54
|
+
from edsl.utilities.interface import print_results_long
|
55
|
+
|
56
|
+
print_results_long(self, max_rows=max_rows)
|
58
57
|
|
59
58
|
|
60
59
|
class Results(UserList, Mixins, Base):
|
@@ -82,10 +81,10 @@ class Results(UserList, Mixins, Base):
|
|
82
81
|
|
83
82
|
def __init__(
|
84
83
|
self,
|
85
|
-
survey: Optional[Survey] = None,
|
86
|
-
data: Optional[list[Result]] = None,
|
84
|
+
survey: Optional["Survey"] = None,
|
85
|
+
data: Optional[list["Result"]] = None,
|
87
86
|
created_columns: Optional[list[str]] = None,
|
88
|
-
cache: Optional[Cache] = None,
|
87
|
+
cache: Optional["Cache"] = None,
|
89
88
|
job_uuid: Optional[str] = None,
|
90
89
|
total_results: Optional[int] = None,
|
91
90
|
):
|
@@ -98,6 +97,8 @@ class Results(UserList, Mixins, Base):
|
|
98
97
|
:param total_results: An integer representing the total number of results.
|
99
98
|
"""
|
100
99
|
super().__init__(data)
|
100
|
+
from edsl.data.Cache import Cache
|
101
|
+
|
101
102
|
self.survey = survey
|
102
103
|
self.created_columns = created_columns or []
|
103
104
|
self._job_uuid = job_uuid
|
@@ -123,6 +124,10 @@ class Results(UserList, Mixins, Base):
|
|
123
124
|
raise TypeError("Invalid argument type")
|
124
125
|
|
125
126
|
def _update_results(self) -> None:
|
127
|
+
from edsl import Agent, Scenario
|
128
|
+
from edsl.language_models import LanguageModel
|
129
|
+
from edsl.results import Result
|
130
|
+
|
126
131
|
if self._job_uuid and len(self.data) < self._total_results:
|
127
132
|
results = [
|
128
133
|
Result(
|
@@ -166,7 +171,13 @@ class Results(UserList, Mixins, Base):
|
|
166
171
|
return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
|
167
172
|
|
168
173
|
def _repr_html_(self) -> str:
|
174
|
+
from IPython.display import HTML
|
175
|
+
|
169
176
|
json_str = json.dumps(self.to_dict()["data"], indent=4)
|
177
|
+
from pygments import highlight
|
178
|
+
from pygments.lexers import JsonLexer
|
179
|
+
from pygments.formatters import HtmlFormatter
|
180
|
+
|
170
181
|
formatted_json = highlight(
|
171
182
|
json_str,
|
172
183
|
JsonLexer(),
|
@@ -174,6 +185,37 @@ class Results(UserList, Mixins, Base):
|
|
174
185
|
)
|
175
186
|
return HTML(formatted_json).data
|
176
187
|
|
188
|
+
def _to_dict(self, sort=False):
|
189
|
+
from edsl.data.Cache import Cache
|
190
|
+
|
191
|
+
if sort:
|
192
|
+
data = sorted([result for result in self.data], key=lambda x: hash(x))
|
193
|
+
else:
|
194
|
+
data = [result for result in self.data]
|
195
|
+
return {
|
196
|
+
"data": [result.to_dict() for result in data],
|
197
|
+
"survey": self.survey.to_dict(),
|
198
|
+
"created_columns": self.created_columns,
|
199
|
+
"cache": Cache() if not hasattr(self, "cache") else self.cache.to_dict(),
|
200
|
+
}
|
201
|
+
|
202
|
+
def compare(self, other_results):
|
203
|
+
"""
|
204
|
+
Compare two Results objects and return the differences.
|
205
|
+
"""
|
206
|
+
hashes_0 = [hash(result) for result in self]
|
207
|
+
hashes_1 = [hash(result) for result in other_results]
|
208
|
+
|
209
|
+
in_self_but_not_other = set(hashes_0).difference(set(hashes_1))
|
210
|
+
in_other_but_not_self = set(hashes_1).difference(set(hashes_0))
|
211
|
+
|
212
|
+
indicies_self = [hashes_0.index(h) for h in in_self_but_not_other]
|
213
|
+
indices_other = [hashes_1.index(h) for h in in_other_but_not_self]
|
214
|
+
return {
|
215
|
+
"a_not_b": [self[i] for i in indicies_self],
|
216
|
+
"b_not_a": [other_results[i] for i in indices_other],
|
217
|
+
}
|
218
|
+
|
177
219
|
@add_edsl_version
|
178
220
|
def to_dict(self) -> dict[str, Any]:
|
179
221
|
"""Convert the Results object to a dictionary.
|
@@ -186,12 +228,39 @@ class Results(UserList, Mixins, Base):
|
|
186
228
|
>>> r.to_dict().keys()
|
187
229
|
dict_keys(['data', 'survey', 'created_columns', 'cache', 'edsl_version', 'edsl_class_name'])
|
188
230
|
"""
|
189
|
-
return
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
231
|
+
return self._to_dict()
|
232
|
+
|
233
|
+
def __hash__(self) -> int:
|
234
|
+
return dict_hash(self._to_dict(sort=True))
|
235
|
+
|
236
|
+
@property
|
237
|
+
def hashes(self) -> set:
|
238
|
+
return set(hash(result) for result in self.data)
|
239
|
+
|
240
|
+
def sample(self, n: int) -> "Results":
|
241
|
+
"""Return a random sample of the results.
|
242
|
+
|
243
|
+
:param n: The number of samples to return.
|
244
|
+
|
245
|
+
>>> from edsl.results import Results
|
246
|
+
>>> r = Results.example()
|
247
|
+
>>> len(r.sample(2))
|
248
|
+
2
|
249
|
+
"""
|
250
|
+
indices = None
|
251
|
+
|
252
|
+
for entry in self:
|
253
|
+
key, values = list(entry.items())[0]
|
254
|
+
if indices is None: # gets the indices for the first time
|
255
|
+
indices = list(range(len(values)))
|
256
|
+
sampled_indices = random.sample(indices, n)
|
257
|
+
if n > len(indices):
|
258
|
+
raise ValueError(
|
259
|
+
f"Cannot sample {n} items from a list of length {len(indices)}."
|
260
|
+
)
|
261
|
+
entry[key] = [values[i] for i in sampled_indices]
|
262
|
+
|
263
|
+
return self
|
195
264
|
|
196
265
|
@classmethod
|
197
266
|
@remove_edsl_version
|
@@ -208,12 +277,20 @@ class Results(UserList, Mixins, Base):
|
|
208
277
|
>>> r == r2
|
209
278
|
True
|
210
279
|
"""
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
280
|
+
from edsl import Survey, Cache
|
281
|
+
from edsl.results.Result import Result
|
282
|
+
|
283
|
+
try:
|
284
|
+
results = cls(
|
285
|
+
survey=Survey.from_dict(data["survey"]),
|
286
|
+
data=[Result.from_dict(r) for r in data["data"]],
|
287
|
+
created_columns=data.get("created_columns", None),
|
288
|
+
cache=(
|
289
|
+
Cache.from_dict(data.get("cache")) if "cache" in data else Cache()
|
290
|
+
),
|
291
|
+
)
|
292
|
+
except Exception as e:
|
293
|
+
breakpoint()
|
217
294
|
return results
|
218
295
|
|
219
296
|
######################
|
@@ -280,6 +357,8 @@ class Results(UserList, Mixins, Base):
|
|
280
357
|
>>> r.answer_keys
|
281
358
|
{'how_feeling': 'How are you this {{ period }}?', 'how_feeling_yesterday': 'How were you feeling yesterday {{ period }}?'}
|
282
359
|
"""
|
360
|
+
from edsl.utilities.utilities import shorten_string
|
361
|
+
|
283
362
|
if not self.survey:
|
284
363
|
raise Exception("Survey is not defined so no answer keys are available.")
|
285
364
|
|
@@ -294,7 +373,7 @@ class Results(UserList, Mixins, Base):
|
|
294
373
|
return sorted_dict
|
295
374
|
|
296
375
|
@property
|
297
|
-
def agents(self) -> AgentList:
|
376
|
+
def agents(self) -> "AgentList":
|
298
377
|
"""Return a list of all of the agents in the Results.
|
299
378
|
|
300
379
|
Example:
|
@@ -303,10 +382,12 @@ class Results(UserList, Mixins, Base):
|
|
303
382
|
>>> r.agents
|
304
383
|
AgentList([Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Joyful'}), Agent(traits = {'status': 'Sad'}), Agent(traits = {'status': 'Sad'})])
|
305
384
|
"""
|
385
|
+
from edsl import AgentList
|
386
|
+
|
306
387
|
return AgentList([r.agent for r in self.data])
|
307
388
|
|
308
389
|
@property
|
309
|
-
def models(self) -> list[Type[LanguageModel]]:
|
390
|
+
def models(self) -> list[Type["LanguageModel"]]:
|
310
391
|
"""Return a list of all of the models in the Results.
|
311
392
|
|
312
393
|
Example:
|
@@ -318,7 +399,7 @@ class Results(UserList, Mixins, Base):
|
|
318
399
|
return [r.model for r in self.data]
|
319
400
|
|
320
401
|
@property
|
321
|
-
def scenarios(self) -> ScenarioList:
|
402
|
+
def scenarios(self) -> "ScenarioList":
|
322
403
|
"""Return a list of all of the scenarios in the Results.
|
323
404
|
|
324
405
|
Example:
|
@@ -327,6 +408,8 @@ class Results(UserList, Mixins, Base):
|
|
327
408
|
>>> r.scenarios
|
328
409
|
ScenarioList([Scenario({'period': 'morning'}), Scenario({'period': 'afternoon'}), Scenario({'period': 'morning'}), Scenario({'period': 'afternoon'})])
|
329
410
|
"""
|
411
|
+
from edsl import ScenarioList
|
412
|
+
|
330
413
|
return ScenarioList([r.scenario for r in self.data])
|
331
414
|
|
332
415
|
@property
|
@@ -426,7 +509,7 @@ class Results(UserList, Mixins, Base):
|
|
426
509
|
)
|
427
510
|
return data_type, key
|
428
511
|
|
429
|
-
def first(self) -> Result:
|
512
|
+
def first(self) -> "Result":
|
430
513
|
"""Return the first observation in the results.
|
431
514
|
|
432
515
|
Example:
|
@@ -487,6 +570,39 @@ class Results(UserList, Mixins, Base):
|
|
487
570
|
created_columns=self.created_columns + [new_var_name],
|
488
571
|
)
|
489
572
|
|
573
|
+
def add_column(self, column_name: str, values: list) -> Results:
|
574
|
+
"""Adds columns to Results
|
575
|
+
|
576
|
+
>>> r = Results.example()
|
577
|
+
>>> r.add_column('a', [1,2,3, 4]).select('a')
|
578
|
+
Dataset([{'answer.a': [1, 2, 3, 4]}])
|
579
|
+
"""
|
580
|
+
|
581
|
+
assert len(values) == len(
|
582
|
+
self.data
|
583
|
+
), "The number of values must match the number of results."
|
584
|
+
new_results = self.data.copy()
|
585
|
+
for i, result in enumerate(new_results):
|
586
|
+
result["answer"][column_name] = values[i]
|
587
|
+
return Results(
|
588
|
+
survey=self.survey,
|
589
|
+
data=new_results,
|
590
|
+
created_columns=self.created_columns + [column_name],
|
591
|
+
)
|
592
|
+
|
593
|
+
def add_columns_from_dict(self, columns: List[dict]) -> Results:
|
594
|
+
"""Adds columns to Results from a list of dictionaries.
|
595
|
+
|
596
|
+
>>> r = Results.example()
|
597
|
+
>>> r.add_columns_from_dict([{'a': 1, 'b': 2}, {'a': 3, 'b': 4}, {'a':3, 'b':2}, {'a':3, 'b':2}]).select('a', 'b')
|
598
|
+
Dataset([{'answer.a': [1, 3, 3, 3]}, {'answer.b': [2, 4, 2, 2]}])
|
599
|
+
"""
|
600
|
+
keys = list(columns[0].keys())
|
601
|
+
for key in keys:
|
602
|
+
values = [d[key] for d in columns]
|
603
|
+
self = self.add_column(key, values)
|
604
|
+
return self
|
605
|
+
|
490
606
|
def mutate(
|
491
607
|
self, new_var_string: str, functions_dict: Optional[dict] = None
|
492
608
|
) -> Results:
|
@@ -511,6 +627,8 @@ class Results(UserList, Mixins, Base):
|
|
511
627
|
)
|
512
628
|
raw_var_name, expression = new_var_string.split("=", 1)
|
513
629
|
var_name = raw_var_name.strip()
|
630
|
+
from edsl.utilities.utilities import is_valid_variable_name
|
631
|
+
|
514
632
|
if not is_valid_variable_name(var_name):
|
515
633
|
raise ResultsInvalidNameError(f"{var_name} is not a valid variable name.")
|
516
634
|
|
@@ -522,7 +640,7 @@ class Results(UserList, Mixins, Base):
|
|
522
640
|
names=result.combined_dict, functions=functions_dict
|
523
641
|
)
|
524
642
|
|
525
|
-
def new_result(old_result: Result, var_name: str) -> Result:
|
643
|
+
def new_result(old_result: "Result", var_name: str) -> "Result":
|
526
644
|
evaluator = create_evaluator(old_result)
|
527
645
|
value = evaluator.eval(expression)
|
528
646
|
new_result = old_result.copy()
|
@@ -612,7 +730,7 @@ class Results(UserList, Mixins, Base):
|
|
612
730
|
|
613
731
|
return Results(survey=self.survey, data=new_data, created_columns=None)
|
614
732
|
|
615
|
-
def select(self, *columns: Union[str, list[str]]) -> Dataset:
|
733
|
+
def select(self, *columns: Union[str, list[str]]) -> "Dataset":
|
616
734
|
"""
|
617
735
|
Select data from the results and format it.
|
618
736
|
|
@@ -624,6 +742,7 @@ class Results(UserList, Mixins, Base):
|
|
624
742
|
>>> results.select('how_feeling')
|
625
743
|
Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}])
|
626
744
|
"""
|
745
|
+
|
627
746
|
if len(self) == 0:
|
628
747
|
raise Exception("No data to select from---the Results object is empty.")
|
629
748
|
|
@@ -680,13 +799,22 @@ class Results(UserList, Mixins, Base):
|
|
680
799
|
return items_in_order.index(single_key)
|
681
800
|
|
682
801
|
sorted(new_data, key=sort_by_key_order)
|
802
|
+
from edsl.results.Dataset import Dataset
|
683
803
|
|
684
804
|
return Dataset(new_data)
|
685
805
|
|
686
|
-
def sort_by(self, columns, reverse: bool = False) -> Results:
|
806
|
+
def sort_by(self, *columns: str, reverse: bool = False) -> Results:
|
807
|
+
import warnings
|
808
|
+
|
809
|
+
warnings.warn(
|
810
|
+
"sort_by is deprecated. Use order_by instead.", DeprecationWarning
|
811
|
+
)
|
812
|
+
return self.order_by(*columns, reverse=reverse)
|
813
|
+
|
814
|
+
def order_by(self, *columns: str, reverse: bool = False) -> Results:
|
687
815
|
"""Sort the results by one or more columns.
|
688
816
|
|
689
|
-
:param columns:
|
817
|
+
:param columns: One or more column names as strings.
|
690
818
|
:param reverse: A boolean that determines whether to sort in reverse order.
|
691
819
|
|
692
820
|
Each column name can be a single key, e.g. "how_feeling", or a dot-separated string, e.g. "answer.how_feeling".
|
@@ -694,7 +822,7 @@ class Results(UserList, Mixins, Base):
|
|
694
822
|
Example:
|
695
823
|
|
696
824
|
>>> r = Results.example()
|
697
|
-
>>> r.sort_by(
|
825
|
+
>>> r.sort_by('how_feeling', reverse=False).select('how_feeling').print()
|
698
826
|
┏━━━━━━━━━━━━━━┓
|
699
827
|
┃ answer ┃
|
700
828
|
┃ .how_feeling ┃
|
@@ -707,7 +835,7 @@ class Results(UserList, Mixins, Base):
|
|
707
835
|
├──────────────┤
|
708
836
|
│ Terrible │
|
709
837
|
└──────────────┘
|
710
|
-
>>> r.sort_by(
|
838
|
+
>>> r.sort_by('how_feeling', reverse=True).select('how_feeling').print()
|
711
839
|
┏━━━━━━━━━━━━━━┓
|
712
840
|
┃ answer ┃
|
713
841
|
┃ .how_feeling ┃
|
@@ -721,8 +849,6 @@ class Results(UserList, Mixins, Base):
|
|
721
849
|
│ Great │
|
722
850
|
└──────────────┘
|
723
851
|
"""
|
724
|
-
if isinstance(columns, str):
|
725
|
-
columns = [columns]
|
726
852
|
|
727
853
|
def to_numeric_if_possible(v):
|
728
854
|
try:
|
@@ -731,28 +857,14 @@ class Results(UserList, Mixins, Base):
|
|
731
857
|
return v
|
732
858
|
|
733
859
|
def sort_key(item):
|
734
|
-
# Create an empty list to store the key components for sorting
|
735
860
|
key_components = []
|
736
|
-
|
737
|
-
# Loop through each column specified in the sort
|
738
861
|
for col in columns:
|
739
|
-
# Parse the column into its data type and key
|
740
862
|
data_type, key = self._parse_column(col)
|
741
|
-
|
742
|
-
# Retrieve the value from the item based on the parsed data type and key
|
743
863
|
value = item.get_value(data_type, key)
|
744
|
-
|
745
|
-
# Convert the value to numeric if possible, and append it to the key components
|
746
864
|
key_components.append(to_numeric_if_possible(value))
|
747
|
-
|
748
|
-
# Convert the list of key components into a tuple to serve as the sorting key
|
749
865
|
return tuple(key_components)
|
750
866
|
|
751
|
-
new_data = sorted(
|
752
|
-
self.data,
|
753
|
-
key=sort_key,
|
754
|
-
reverse=reverse,
|
755
|
-
)
|
867
|
+
new_data = sorted(self.data, key=sort_key, reverse=reverse)
|
756
868
|
return Results(survey=self.survey, data=new_data, created_columns=None)
|
757
869
|
|
758
870
|
def filter(self, expression: str) -> Results:
|
@@ -846,7 +958,7 @@ class Results(UserList, Mixins, Base):
|
|
846
958
|
|
847
959
|
:param debug: if False, uses actual API calls
|
848
960
|
"""
|
849
|
-
from edsl.jobs import Jobs
|
961
|
+
from edsl.jobs.Jobs import Jobs
|
850
962
|
from edsl.data.Cache import Cache
|
851
963
|
|
852
964
|
c = Cache()
|