edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +107 -30
- edsl/BaseDiff.py +260 -0
- edsl/__init__.py +25 -21
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +103 -46
- edsl/agents/AgentList.py +97 -13
- edsl/agents/Invigilator.py +23 -10
- edsl/agents/InvigilatorBase.py +19 -14
- edsl/agents/PromptConstructionMixin.py +342 -100
- edsl/agents/descriptors.py +5 -2
- edsl/base/Base.py +289 -0
- edsl/config.py +2 -1
- edsl/conjure/AgentConstructionMixin.py +152 -0
- edsl/conjure/Conjure.py +56 -0
- edsl/conjure/InputData.py +659 -0
- edsl/conjure/InputDataCSV.py +48 -0
- edsl/conjure/InputDataMixinQuestionStats.py +182 -0
- edsl/conjure/InputDataPyRead.py +91 -0
- edsl/conjure/InputDataSPSS.py +8 -0
- edsl/conjure/InputDataStata.py +8 -0
- edsl/conjure/QuestionOptionMixin.py +76 -0
- edsl/conjure/QuestionTypeMixin.py +23 -0
- edsl/conjure/RawQuestion.py +65 -0
- edsl/conjure/SurveyResponses.py +7 -0
- edsl/conjure/__init__.py +9 -4
- edsl/conjure/examples/placeholder.txt +0 -0
- edsl/conjure/naming_utilities.py +263 -0
- edsl/conjure/utilities.py +165 -28
- edsl/conversation/Conversation.py +238 -0
- edsl/conversation/car_buying.py +58 -0
- edsl/conversation/mug_negotiation.py +81 -0
- edsl/conversation/next_speaker_utilities.py +93 -0
- edsl/coop/coop.py +337 -121
- edsl/coop/utils.py +56 -70
- edsl/data/Cache.py +74 -22
- edsl/data/CacheHandler.py +10 -9
- edsl/data/SQLiteDict.py +11 -3
- edsl/inference_services/AnthropicService.py +1 -0
- edsl/inference_services/DeepInfraService.py +20 -13
- edsl/inference_services/GoogleService.py +7 -1
- edsl/inference_services/InferenceServicesCollection.py +33 -7
- edsl/inference_services/OpenAIService.py +17 -10
- edsl/inference_services/models_available_cache.py +69 -0
- edsl/inference_services/rate_limits_cache.py +25 -0
- edsl/inference_services/write_available.py +10 -0
- edsl/jobs/Answers.py +15 -1
- edsl/jobs/Jobs.py +322 -73
- edsl/jobs/buckets/BucketCollection.py +9 -3
- edsl/jobs/buckets/ModelBuckets.py +4 -2
- edsl/jobs/buckets/TokenBucket.py +1 -2
- edsl/jobs/interviews/Interview.py +7 -10
- edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
- edsl/jobs/interviews/retry_management.py +4 -4
- edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
- edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
- edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
- edsl/jobs/tasks/TaskHistory.py +4 -3
- edsl/language_models/LanguageModel.py +42 -55
- edsl/language_models/ModelList.py +96 -0
- edsl/language_models/registry.py +14 -0
- edsl/language_models/repair.py +97 -25
- edsl/notebooks/Notebook.py +157 -32
- edsl/prompts/Prompt.py +31 -19
- edsl/questions/QuestionBase.py +145 -23
- edsl/questions/QuestionBudget.py +5 -6
- edsl/questions/QuestionCheckBox.py +7 -3
- edsl/questions/QuestionExtract.py +5 -3
- edsl/questions/QuestionFreeText.py +3 -3
- edsl/questions/QuestionFunctional.py +0 -3
- edsl/questions/QuestionList.py +3 -4
- edsl/questions/QuestionMultipleChoice.py +16 -8
- edsl/questions/QuestionNumerical.py +4 -3
- edsl/questions/QuestionRank.py +5 -3
- edsl/questions/__init__.py +4 -3
- edsl/questions/descriptors.py +9 -4
- edsl/questions/question_registry.py +27 -31
- edsl/questions/settings.py +1 -1
- edsl/results/Dataset.py +31 -0
- edsl/results/DatasetExportMixin.py +493 -0
- edsl/results/Result.py +42 -82
- edsl/results/Results.py +178 -66
- edsl/results/ResultsDBMixin.py +10 -9
- edsl/results/ResultsExportMixin.py +23 -507
- edsl/results/ResultsGGMixin.py +3 -3
- edsl/results/ResultsToolsMixin.py +9 -9
- edsl/scenarios/FileStore.py +140 -0
- edsl/scenarios/Scenario.py +59 -6
- edsl/scenarios/ScenarioList.py +138 -52
- edsl/scenarios/ScenarioListExportMixin.py +32 -0
- edsl/scenarios/ScenarioListPdfMixin.py +2 -1
- edsl/scenarios/__init__.py +1 -0
- edsl/study/ObjectEntry.py +173 -0
- edsl/study/ProofOfWork.py +113 -0
- edsl/study/SnapShot.py +73 -0
- edsl/study/Study.py +498 -0
- edsl/study/__init__.py +4 -0
- edsl/surveys/MemoryPlan.py +11 -4
- edsl/surveys/Survey.py +124 -37
- edsl/surveys/SurveyExportMixin.py +25 -5
- edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
- edsl/tools/plotting.py +4 -2
- edsl/utilities/__init__.py +21 -20
- edsl/utilities/gcp_bucket/__init__.py +0 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
- edsl/utilities/gcp_bucket/simple_example.py +9 -0
- edsl/utilities/interface.py +90 -73
- edsl/utilities/repair_functions.py +28 -0
- edsl/utilities/utilities.py +59 -6
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
- edsl-0.1.29.dist-info/RECORD +203 -0
- edsl/conjure/RawResponseColumn.py +0 -327
- edsl/conjure/SurveyBuilder.py +0 -308
- edsl/conjure/SurveyBuilderCSV.py +0 -78
- edsl/conjure/SurveyBuilderSPSS.py +0 -118
- edsl/data/RemoteDict.py +0 -103
- edsl-0.1.27.dev2.dist-info/RECORD +0 -172
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
- {edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
from edsl import Scenario
|
2
|
+
import base64
|
3
|
+
import io
|
4
|
+
import tempfile
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
|
8
|
+
class FileStore(Scenario):
|
9
|
+
def __init__(
|
10
|
+
self,
|
11
|
+
filename: str,
|
12
|
+
binary: Optional[bool] = None,
|
13
|
+
suffix: Optional[str] = None,
|
14
|
+
base64_string: Optional[str] = None,
|
15
|
+
):
|
16
|
+
self.filename = filename
|
17
|
+
self.suffix = suffix or "." + filename.split(".")[-1]
|
18
|
+
self.binary = binary or False
|
19
|
+
self.base64_string = base64_string or self.encode_file_to_base64_string(
|
20
|
+
filename
|
21
|
+
)
|
22
|
+
super().__init__(
|
23
|
+
{
|
24
|
+
"filename": self.filename,
|
25
|
+
"base64_string": self.base64_string,
|
26
|
+
"binary": self.binary,
|
27
|
+
"suffix": self.suffix,
|
28
|
+
}
|
29
|
+
)
|
30
|
+
|
31
|
+
@classmethod
|
32
|
+
def from_dict(cls, d):
|
33
|
+
return cls(d["filename"], d["binary"], d["suffix"], d["base64_string"])
|
34
|
+
|
35
|
+
def encode_file_to_base64_string(self, file_path):
|
36
|
+
try:
|
37
|
+
# Attempt to open the file in text mode
|
38
|
+
with open(file_path, "r") as text_file:
|
39
|
+
# Read the text data
|
40
|
+
text_data = text_file.read()
|
41
|
+
# Encode the text data to a base64 string
|
42
|
+
base64_encoded_data = base64.b64encode(text_data.encode("utf-8"))
|
43
|
+
except UnicodeDecodeError:
|
44
|
+
# If reading as text fails, open the file in binary mode
|
45
|
+
with open(file_path, "rb") as binary_file:
|
46
|
+
# Read the binary data
|
47
|
+
binary_data = binary_file.read()
|
48
|
+
# Encode the binary data to a base64 string
|
49
|
+
base64_encoded_data = base64.b64encode(binary_data)
|
50
|
+
self.binary = True
|
51
|
+
# Convert the base64 bytes to a string
|
52
|
+
base64_string = base64_encoded_data.decode("utf-8")
|
53
|
+
|
54
|
+
return base64_string
|
55
|
+
|
56
|
+
def open(self):
|
57
|
+
if self.binary:
|
58
|
+
return self.base64_to_file(self["base64_string"], is_binary=True)
|
59
|
+
else:
|
60
|
+
return self.base64_to_text_file(self["base64_string"])
|
61
|
+
|
62
|
+
@staticmethod
|
63
|
+
def base64_to_text_file(base64_string):
|
64
|
+
# Decode the base64 string to bytes
|
65
|
+
text_data_bytes = base64.b64decode(base64_string)
|
66
|
+
|
67
|
+
# Convert bytes to string
|
68
|
+
text_data = text_data_bytes.decode("utf-8")
|
69
|
+
|
70
|
+
# Create a StringIO object from the text data
|
71
|
+
text_file = io.StringIO(text_data)
|
72
|
+
|
73
|
+
return text_file
|
74
|
+
|
75
|
+
@staticmethod
|
76
|
+
def base64_to_file(base64_string, is_binary=True):
|
77
|
+
# Decode the base64 string to bytes
|
78
|
+
file_data = base64.b64decode(base64_string)
|
79
|
+
|
80
|
+
if is_binary:
|
81
|
+
# Create a BytesIO object for binary data
|
82
|
+
return io.BytesIO(file_data)
|
83
|
+
else:
|
84
|
+
# Convert bytes to string for text data
|
85
|
+
text_data = file_data.decode("utf-8")
|
86
|
+
# Create a StringIO object for text data
|
87
|
+
return io.StringIO(text_data)
|
88
|
+
|
89
|
+
def to_tempfile(self, suffix=None):
|
90
|
+
if suffix is None:
|
91
|
+
suffix = self.suffix
|
92
|
+
if self.binary:
|
93
|
+
file_like_object = self.base64_to_file(
|
94
|
+
self["base64_string"], is_binary=True
|
95
|
+
)
|
96
|
+
else:
|
97
|
+
file_like_object = self.base64_to_text_file(self["base64_string"])
|
98
|
+
|
99
|
+
# Create a named temporary file
|
100
|
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
|
101
|
+
temp_file.write(file_like_object.read())
|
102
|
+
temp_file.close()
|
103
|
+
|
104
|
+
return temp_file.name
|
105
|
+
|
106
|
+
def push(self, description=None):
|
107
|
+
scenario_version = Scenario.from_dict(self.to_dict())
|
108
|
+
if description is None:
|
109
|
+
description = "File: " + self["filename"]
|
110
|
+
info = scenario_version.push(description=description)
|
111
|
+
return info
|
112
|
+
|
113
|
+
@classmethod
|
114
|
+
def pull(cls, uuid):
|
115
|
+
scenario_version = Scenario.pull(uuid)
|
116
|
+
return cls.from_dict(scenario_version.to_dict())
|
117
|
+
|
118
|
+
|
119
|
+
class CSVFileStore(FileStore):
|
120
|
+
def __init__(self, filename):
|
121
|
+
super().__init__(filename, suffix=".csv")
|
122
|
+
|
123
|
+
|
124
|
+
class PDFFileStore(FileStore):
|
125
|
+
def __init__(self, filename):
|
126
|
+
super().__init__(filename, suffix=".pdf")
|
127
|
+
|
128
|
+
|
129
|
+
if __name__ == "__main__":
|
130
|
+
# file_path = "../conjure/examples/Ex11-2.sav"
|
131
|
+
# fs = FileStore(file_path)
|
132
|
+
# info = fs.push()
|
133
|
+
# print(info)
|
134
|
+
|
135
|
+
# from edsl import Conjure
|
136
|
+
|
137
|
+
# c = Conjure(datafile_name=fs.to_tempfile())
|
138
|
+
f = PDFFileStore("paper.pdf")
|
139
|
+
# print(f.to_tempfile())
|
140
|
+
f.push()
|
edsl/scenarios/Scenario.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
"""A Scenario is a dictionary with a key/value to parameterize a question."""
|
2
2
|
|
3
|
+
import time
|
3
4
|
import copy
|
4
5
|
from collections import UserDict
|
5
6
|
from typing import Union, List, Optional, Generator
|
6
7
|
import base64
|
7
8
|
import hashlib
|
8
|
-
|
9
|
+
import os
|
9
10
|
|
10
11
|
from edsl.Base import Base
|
11
|
-
|
12
12
|
from edsl.scenarios.ScenarioImageMixin import ScenarioImageMixin
|
13
13
|
from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
|
14
14
|
|
@@ -19,7 +19,9 @@ from edsl.utilities.decorators import (
|
|
19
19
|
|
20
20
|
|
21
21
|
class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
22
|
-
"""A Scenario is a dictionary of keys/values
|
22
|
+
"""A Scenario is a dictionary of keys/values.
|
23
|
+
|
24
|
+
They can be used parameterize edsl questions."""
|
23
25
|
|
24
26
|
def __init__(self, data: Union[dict, None] = None, name: str = None):
|
25
27
|
"""Initialize a new Scenario.
|
@@ -32,7 +34,7 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
32
34
|
self.name = name
|
33
35
|
|
34
36
|
def replicate(self, n: int) -> "ScenarioList":
|
35
|
-
"""Replicate a scenario n times.
|
37
|
+
"""Replicate a scenario n times to return a ScenarioList.
|
36
38
|
|
37
39
|
:param n: The number of times to replicate the scenario.
|
38
40
|
|
@@ -58,7 +60,7 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
58
60
|
self._has_image = value
|
59
61
|
|
60
62
|
def __add__(self, other_scenario: "Scenario") -> "Scenario":
|
61
|
-
"""Combine two scenarios
|
63
|
+
"""Combine two scenarios by taking the union of their keys
|
62
64
|
|
63
65
|
If the other scenario is None, then just return self.
|
64
66
|
|
@@ -102,6 +104,17 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
102
104
|
new_scenario[key] = value
|
103
105
|
return new_scenario
|
104
106
|
|
107
|
+
def _to_dict(self) -> dict:
|
108
|
+
"""Convert a scenario to a dictionary.
|
109
|
+
|
110
|
+
Example:
|
111
|
+
|
112
|
+
>>> s = Scenario({"food": "wood chips"})
|
113
|
+
>>> s.to_dict()
|
114
|
+
{'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}
|
115
|
+
"""
|
116
|
+
return self.data.copy()
|
117
|
+
|
105
118
|
@add_edsl_version
|
106
119
|
def to_dict(self) -> dict:
|
107
120
|
"""Convert a scenario to a dictionary.
|
@@ -112,7 +125,21 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
112
125
|
>>> s.to_dict()
|
113
126
|
{'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}
|
114
127
|
"""
|
115
|
-
return self.
|
128
|
+
return self._to_dict()
|
129
|
+
|
130
|
+
def __hash__(self) -> int:
|
131
|
+
"""
|
132
|
+
Return a hash of the scenario.
|
133
|
+
|
134
|
+
Example:
|
135
|
+
|
136
|
+
>>> s = Scenario({"food": "wood chips"})
|
137
|
+
>>> hash(s)
|
138
|
+
1153210385458344214
|
139
|
+
"""
|
140
|
+
from edsl.utilities.utilities import dict_hash
|
141
|
+
|
142
|
+
return dict_hash(self._to_dict())
|
116
143
|
|
117
144
|
def print(self):
|
118
145
|
from rich import print_json
|
@@ -183,6 +210,30 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
183
210
|
s.has_image = True
|
184
211
|
return s
|
185
212
|
|
213
|
+
@classmethod
|
214
|
+
def from_pdf(cls, pdf_path):
|
215
|
+
import fitz # PyMuPDF
|
216
|
+
|
217
|
+
# Ensure the file exists
|
218
|
+
if not os.path.exists(pdf_path):
|
219
|
+
raise FileNotFoundError(f"The file {pdf_path} does not exist.")
|
220
|
+
|
221
|
+
# Open the PDF file
|
222
|
+
document = fitz.open(pdf_path)
|
223
|
+
|
224
|
+
# Get the filename from the path
|
225
|
+
filename = os.path.basename(pdf_path)
|
226
|
+
|
227
|
+
# Iterate through each page and extract text
|
228
|
+
text = ""
|
229
|
+
for page_num in range(len(document)):
|
230
|
+
page = document.load_page(page_num)
|
231
|
+
text = text + page.get_text()
|
232
|
+
|
233
|
+
# Create a dictionary for the combined text
|
234
|
+
page_info = {"filename": filename, "text": text}
|
235
|
+
return Scenario(page_info)
|
236
|
+
|
186
237
|
@classmethod
|
187
238
|
def from_docx(cls, docx_path: str) -> "Scenario":
|
188
239
|
"""Creates a scenario from the text of a docx file.
|
@@ -350,6 +401,8 @@ class Scenario(Base, UserDict, ScenarioImageMixin, ScenarioHtmlMixin):
|
|
350
401
|
|
351
402
|
def rich_print(self) -> "Table":
|
352
403
|
"""Display an object as a rich table."""
|
404
|
+
from rich.table import Table
|
405
|
+
|
353
406
|
table_data, column_names = self._table()
|
354
407
|
table = Table(title=f"{self.__class__.__name__} Attributes")
|
355
408
|
for column in column_names:
|
edsl/scenarios/ScenarioList.py
CHANGED
@@ -2,28 +2,25 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
import csv
|
5
|
-
|
5
|
+
import random
|
6
|
+
from collections import UserList, Counter
|
6
7
|
from collections.abc import Iterable
|
7
|
-
from collections import Counter
|
8
|
-
|
9
8
|
from typing import Any, Optional, Union, List
|
10
9
|
|
11
|
-
from rich.table import Table
|
12
10
|
from simpleeval import EvalWithCompoundTypes
|
13
11
|
|
14
|
-
from edsl.scenarios.Scenario import Scenario
|
15
12
|
from edsl.Base import Base
|
16
13
|
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
14
|
+
from edsl.scenarios.Scenario import Scenario
|
17
15
|
from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
|
16
|
+
from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
|
18
17
|
|
19
|
-
import pandas as pd
|
20
|
-
|
21
|
-
from edsl.utilities.interface import print_scenario_list
|
22
18
|
|
23
|
-
|
19
|
+
class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
|
20
|
+
pass
|
24
21
|
|
25
22
|
|
26
|
-
class ScenarioList(Base, UserList,
|
23
|
+
class ScenarioList(Base, UserList, ScenarioListMixin):
|
27
24
|
"""Class for creating a list of scenarios to be used in a survey."""
|
28
25
|
|
29
26
|
def __init__(self, data: Optional[list] = None):
|
@@ -33,11 +30,37 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
33
30
|
else:
|
34
31
|
super().__init__([])
|
35
32
|
|
33
|
+
@property
|
34
|
+
def parameters(self) -> set:
|
35
|
+
"""Return the set of parameters in the ScenarioList
|
36
|
+
|
37
|
+
Example:
|
38
|
+
|
39
|
+
>>> s = ScenarioList([Scenario({'a': 1}), Scenario({'b': 2})])
|
40
|
+
>>> s.parameters == {'a', 'b'}
|
41
|
+
True
|
42
|
+
"""
|
43
|
+
if len(self) == 0:
|
44
|
+
return set()
|
45
|
+
|
46
|
+
return set.union(*[set(s.keys()) for s in self])
|
47
|
+
|
48
|
+
def __hash__(self) -> int:
|
49
|
+
"""Return the hash of the ScenarioList.
|
50
|
+
|
51
|
+
>>> s = ScenarioList.example()
|
52
|
+
>>> hash(s)
|
53
|
+
1262252885757976162
|
54
|
+
"""
|
55
|
+
from edsl.utilities.utilities import dict_hash
|
56
|
+
|
57
|
+
return dict_hash(self._to_dict(sort=True))
|
58
|
+
|
36
59
|
def __repr__(self):
|
37
60
|
return f"ScenarioList({self.data})"
|
38
61
|
|
39
62
|
def __mul__(self, other: ScenarioList) -> ScenarioList:
|
40
|
-
"""
|
63
|
+
"""Takes the cross product of two ScenarioLists."""
|
41
64
|
from itertools import product
|
42
65
|
|
43
66
|
new_sl = []
|
@@ -45,6 +68,24 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
45
68
|
new_sl.append(s1 + s2)
|
46
69
|
return ScenarioList(new_sl)
|
47
70
|
|
71
|
+
def times(self, other: ScenarioList) -> ScenarioList:
|
72
|
+
"""Takes the cross product of two ScenarioLists.
|
73
|
+
|
74
|
+
Example:
|
75
|
+
|
76
|
+
>>> s1 = ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
|
77
|
+
>>> s2 = ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
|
78
|
+
>>> s1.times(s2)
|
79
|
+
ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2}), Scenario({'a': 2, 'b': 1}), Scenario({'a': 2, 'b': 2})])
|
80
|
+
"""
|
81
|
+
return self.__mul__(other)
|
82
|
+
|
83
|
+
def shuffle(self, seed: Optional[str] = "edsl") -> ScenarioList:
|
84
|
+
"""Shuffle the ScenarioList."""
|
85
|
+
random.seed(seed)
|
86
|
+
random.shuffle(self.data)
|
87
|
+
return self
|
88
|
+
|
48
89
|
def _repr_html_(self) -> str:
|
49
90
|
from edsl.utilities.utilities import data_to_html
|
50
91
|
|
@@ -69,14 +110,13 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
69
110
|
|
70
111
|
def sample(self, n: int, seed="edsl") -> ScenarioList:
|
71
112
|
"""Return a random sample from the ScenarioList"""
|
72
|
-
import random
|
73
113
|
|
74
114
|
if seed != "edsl":
|
75
115
|
random.seed(seed)
|
76
116
|
|
77
117
|
return ScenarioList(random.sample(self.data, n))
|
78
118
|
|
79
|
-
def expand(self, expand_field: str) -> ScenarioList:
|
119
|
+
def expand(self, expand_field: str, number_field=False) -> ScenarioList:
|
80
120
|
"""Expand the ScenarioList by a field.
|
81
121
|
|
82
122
|
Example:
|
@@ -90,9 +130,11 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
90
130
|
values = scenario[expand_field]
|
91
131
|
if not isinstance(values, Iterable) or isinstance(values, str):
|
92
132
|
values = [values]
|
93
|
-
for value in values:
|
133
|
+
for index, value in enumerate(values):
|
94
134
|
new_scenario = scenario.copy()
|
95
135
|
new_scenario[expand_field] = value
|
136
|
+
if number_field:
|
137
|
+
new_scenario[expand_field + "_number"] = index + 1
|
96
138
|
new_scenarios.append(new_scenario)
|
97
139
|
return ScenarioList(new_scenarios)
|
98
140
|
|
@@ -112,6 +154,8 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
112
154
|
)
|
113
155
|
raw_var_name, expression = new_var_string.split("=", 1)
|
114
156
|
var_name = raw_var_name.strip()
|
157
|
+
from edsl.utilities.utilities import is_valid_variable_name
|
158
|
+
|
115
159
|
if not is_valid_variable_name(var_name):
|
116
160
|
raise Exception(f"{var_name} is not a valid variable name.")
|
117
161
|
|
@@ -135,16 +179,20 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
135
179
|
|
136
180
|
return ScenarioList(new_data)
|
137
181
|
|
138
|
-
def order_by(self,
|
139
|
-
"""Order the scenarios by
|
182
|
+
def order_by(self, *fields: str, reverse: bool = False) -> ScenarioList:
|
183
|
+
"""Order the scenarios by one or more fields.
|
140
184
|
|
141
185
|
Example:
|
142
186
|
|
143
187
|
>>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
|
144
|
-
>>> s.order_by('b')
|
188
|
+
>>> s.order_by('b', 'a')
|
145
189
|
ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
|
146
190
|
"""
|
147
|
-
|
191
|
+
|
192
|
+
def get_sort_key(scenario: Any) -> tuple:
|
193
|
+
return tuple(scenario[field] for field in fields)
|
194
|
+
|
195
|
+
return ScenarioList(sorted(self, key=get_sort_key, reverse=reverse))
|
148
196
|
|
149
197
|
def filter(self, expression: str) -> ScenarioList:
|
150
198
|
"""
|
@@ -217,6 +265,13 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
217
265
|
"""
|
218
266
|
return cls([Scenario({name: value}) for value in values])
|
219
267
|
|
268
|
+
def to_dataset(self) -> "Dataset":
|
269
|
+
from edsl.results.Dataset import Dataset
|
270
|
+
|
271
|
+
keys = self[0].keys()
|
272
|
+
data = {key: [scenario[key] for scenario in self.data] for key in keys}
|
273
|
+
return Dataset([data])
|
274
|
+
|
220
275
|
def add_list(self, name, values) -> ScenarioList:
|
221
276
|
"""Add a list of values to a ScenarioList.
|
222
277
|
|
@@ -227,7 +282,10 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
227
282
|
ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
228
283
|
"""
|
229
284
|
for i, value in enumerate(values):
|
230
|
-
|
285
|
+
if i < len(self):
|
286
|
+
self[i][name] = value
|
287
|
+
else:
|
288
|
+
self.append(Scenario({name: value}))
|
231
289
|
return self
|
232
290
|
|
233
291
|
def add_value(self, name, value):
|
@@ -244,6 +302,16 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
244
302
|
return self
|
245
303
|
|
246
304
|
def rename(self, replacement_dict: dict) -> ScenarioList:
|
305
|
+
"""Rename the fields in the scenarios.
|
306
|
+
|
307
|
+
Example:
|
308
|
+
|
309
|
+
>>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
|
310
|
+
>>> s.rename({'name': 'first_name', 'age': 'years'})
|
311
|
+
ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
|
312
|
+
|
313
|
+
"""
|
314
|
+
|
247
315
|
new_list = ScenarioList([])
|
248
316
|
for obj in self:
|
249
317
|
new_obj = obj.rename(replacement_dict)
|
@@ -274,6 +342,20 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
274
342
|
"""
|
275
343
|
return cls([Scenario(row) for row in df.to_dict(orient="records")])
|
276
344
|
|
345
|
+
def to_key_value(self, field, value=None) -> Union[dict, set]:
|
346
|
+
"""Return the set of values in the field.
|
347
|
+
|
348
|
+
Example:
|
349
|
+
|
350
|
+
>>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
|
351
|
+
>>> s.to_key_value('name') == {'Alice', 'Bob'}
|
352
|
+
True
|
353
|
+
"""
|
354
|
+
if value is None:
|
355
|
+
return {scenario[field] for scenario in self}
|
356
|
+
else:
|
357
|
+
return {scenario[field]: scenario[value] for scenario in self}
|
358
|
+
|
277
359
|
@classmethod
|
278
360
|
def from_csv(cls, filename: str) -> ScenarioList:
|
279
361
|
"""Create a ScenarioList from a CSV file.
|
@@ -293,6 +375,8 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
293
375
|
>>> scenario_list[1]['age']
|
294
376
|
'25'
|
295
377
|
"""
|
378
|
+
from edsl.scenarios.Scenario import Scenario
|
379
|
+
|
296
380
|
observations = []
|
297
381
|
with open(filename, "r") as f:
|
298
382
|
reader = csv.reader(f)
|
@@ -301,6 +385,13 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
301
385
|
observations.append(Scenario(dict(zip(header, row))))
|
302
386
|
return cls(observations)
|
303
387
|
|
388
|
+
def _to_dict(self, sort=False) -> dict:
|
389
|
+
if sort:
|
390
|
+
data = sorted(self, key=lambda x: hash(x))
|
391
|
+
else:
|
392
|
+
data = self
|
393
|
+
return {"scenarios": [s._to_dict() for s in data]}
|
394
|
+
|
304
395
|
@add_edsl_version
|
305
396
|
def to_dict(self) -> dict[str, Any]:
|
306
397
|
"""Return the `ScenarioList` as a dictionary.
|
@@ -315,13 +406,24 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
315
406
|
|
316
407
|
@classmethod
|
317
408
|
def gen(cls, scenario_dicts_list: List[dict]) -> ScenarioList:
|
318
|
-
"""Create a `ScenarioList` from a list of dictionaries.
|
409
|
+
"""Create a `ScenarioList` from a list of dictionaries.
|
410
|
+
|
411
|
+
Example:
|
412
|
+
|
413
|
+
>>> ScenarioList.gen([{'name': 'Alice'}, {'name': 'Bob'}])
|
414
|
+
ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
|
415
|
+
|
416
|
+
"""
|
417
|
+
from edsl.scenarios.Scenario import Scenario
|
418
|
+
|
319
419
|
return cls([Scenario(s) for s in scenario_dicts_list])
|
320
420
|
|
321
421
|
@classmethod
|
322
422
|
@remove_edsl_version
|
323
423
|
def from_dict(cls, data) -> ScenarioList:
|
324
424
|
"""Create a `ScenarioList` from a dictionary."""
|
425
|
+
from edsl.scenarios.Scenario import Scenario
|
426
|
+
|
325
427
|
return cls([Scenario.from_dict(s) for s in data["scenarios"]])
|
326
428
|
|
327
429
|
def code(self) -> str:
|
@@ -346,6 +448,8 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
346
448
|
|
347
449
|
def rich_print(self) -> None:
|
348
450
|
"""Display an object as a table."""
|
451
|
+
from rich.table import Table
|
452
|
+
|
349
453
|
table = Table(title="ScenarioList")
|
350
454
|
table.add_column("Index", style="bold")
|
351
455
|
table.add_column("Scenario")
|
@@ -360,40 +464,22 @@ class ScenarioList(Base, UserList, ScenarioListPdfMixin):
|
|
360
464
|
pretty_labels: Optional[dict] = None,
|
361
465
|
filename: str = None,
|
362
466
|
):
|
363
|
-
print_scenario_list
|
364
|
-
|
365
|
-
|
366
|
-
# format = "html"
|
367
|
-
# else:
|
368
|
-
# format = "rich"
|
369
|
-
|
370
|
-
# if pretty_labels is None:
|
371
|
-
# pretty_labels = {}
|
372
|
-
|
373
|
-
# if format not in ["rich", "html", "markdown"]:
|
374
|
-
# raise ValueError("format must be one of 'rich', 'html', or 'markdown'.")
|
375
|
-
|
376
|
-
# if max_rows is not None:
|
377
|
-
# new_data = self[:max_rows]
|
378
|
-
# else:
|
379
|
-
# new_data = self
|
380
|
-
|
381
|
-
# if format == "rich":
|
382
|
-
# print_list_of_dicts_with_rich(
|
383
|
-
# new_data, filename=filename, split_at_dot=False
|
384
|
-
# )
|
385
|
-
# elif format == "html":
|
386
|
-
# notebook = is_notebook()
|
387
|
-
# html = print_list_of_dicts_as_html_table(
|
388
|
-
# new_data, filename=None, interactive=False, notebook=notebook
|
389
|
-
# )
|
390
|
-
# # print(html)
|
391
|
-
# display(HTML(html))
|
392
|
-
# elif format == "markdown":
|
393
|
-
# print_list_of_dicts_as_markdown_table(new_data, filename=filename)
|
467
|
+
from edsl.utilities.interface import print_scenario_list
|
468
|
+
|
469
|
+
print_scenario_list(self[:max_rows])
|
394
470
|
|
395
471
|
def __getitem__(self, key: Union[int, slice]) -> Any:
|
396
|
-
"""Return the item at the given index.
|
472
|
+
"""Return the item at the given index.
|
473
|
+
|
474
|
+
Example:
|
475
|
+
>>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
|
476
|
+
>>> s[0]
|
477
|
+
Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})
|
478
|
+
|
479
|
+
>>> s[:1]
|
480
|
+
ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
|
481
|
+
|
482
|
+
"""
|
397
483
|
if isinstance(key, slice):
|
398
484
|
return ScenarioList(super().__getitem__(key))
|
399
485
|
elif isinstance(key, int):
|
@@ -0,0 +1,32 @@
|
|
1
|
+
"""Mixin class for exporting results."""
|
2
|
+
|
3
|
+
from functools import wraps
|
4
|
+
from edsl.results.DatasetExportMixin import DatasetExportMixin
|
5
|
+
|
6
|
+
|
7
|
+
def to_dataset(func):
|
8
|
+
"""Convert the Results object to a Dataset object before calling the function."""
|
9
|
+
|
10
|
+
@wraps(func)
|
11
|
+
def wrapper(self, *args, **kwargs):
|
12
|
+
"""Return the function with the Results object converted to a Dataset object."""
|
13
|
+
if self.__class__.__name__ == "ScenarioList":
|
14
|
+
return func(self.to_dataset(), *args, **kwargs)
|
15
|
+
else:
|
16
|
+
raise Exception(
|
17
|
+
f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
|
18
|
+
)
|
19
|
+
|
20
|
+
return wrapper
|
21
|
+
|
22
|
+
|
23
|
+
def decorate_all_methods(cls):
|
24
|
+
for attr_name, attr_value in cls.__dict__.items():
|
25
|
+
if callable(attr_value):
|
26
|
+
setattr(cls, attr_name, to_dataset(attr_value))
|
27
|
+
return cls
|
28
|
+
|
29
|
+
|
30
|
+
@decorate_all_methods
|
31
|
+
class ScenarioListExportMixin(DatasetExportMixin):
|
32
|
+
"""Mixin class for exporting Results objects."""
|
@@ -2,7 +2,7 @@ import fitz # PyMuPDF
|
|
2
2
|
import os
|
3
3
|
import subprocess
|
4
4
|
|
5
|
-
from edsl import Scenario
|
5
|
+
# from edsl import Scenario
|
6
6
|
|
7
7
|
|
8
8
|
class ScenarioListPdfMixin:
|
@@ -22,6 +22,7 @@ class ScenarioListPdfMixin:
|
|
22
22
|
"""
|
23
23
|
import tempfile
|
24
24
|
from pdf2image import convert_from_path
|
25
|
+
from edsl.scenarios import Scenario
|
25
26
|
|
26
27
|
with tempfile.TemporaryDirectory() as output_folder:
|
27
28
|
# Convert PDF to images
|
edsl/scenarios/__init__.py
CHANGED