edsl 0.1.36.dev7__py3-none-any.whl → 0.1.37.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. edsl/Base.py +303 -303
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +48 -48
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +804 -804
  7. edsl/agents/AgentList.py +345 -337
  8. edsl/agents/Invigilator.py +222 -222
  9. edsl/agents/InvigilatorBase.py +305 -298
  10. edsl/agents/PromptConstructor.py +310 -320
  11. edsl/agents/__init__.py +3 -3
  12. edsl/agents/descriptors.py +86 -86
  13. edsl/agents/prompt_helpers.py +129 -129
  14. edsl/auto/AutoStudy.py +117 -117
  15. edsl/auto/StageBase.py +230 -230
  16. edsl/auto/StageGenerateSurvey.py +178 -178
  17. edsl/auto/StageLabelQuestions.py +125 -125
  18. edsl/auto/StagePersona.py +61 -61
  19. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  20. edsl/auto/StagePersonaDimensionValues.py +74 -74
  21. edsl/auto/StagePersonaDimensions.py +69 -69
  22. edsl/auto/StageQuestions.py +73 -73
  23. edsl/auto/SurveyCreatorPipeline.py +21 -21
  24. edsl/auto/utilities.py +224 -224
  25. edsl/base/Base.py +289 -289
  26. edsl/config.py +149 -149
  27. edsl/conjure/AgentConstructionMixin.py +152 -152
  28. edsl/conjure/Conjure.py +62 -62
  29. edsl/conjure/InputData.py +659 -659
  30. edsl/conjure/InputDataCSV.py +48 -48
  31. edsl/conjure/InputDataMixinQuestionStats.py +182 -182
  32. edsl/conjure/InputDataPyRead.py +91 -91
  33. edsl/conjure/InputDataSPSS.py +8 -8
  34. edsl/conjure/InputDataStata.py +8 -8
  35. edsl/conjure/QuestionOptionMixin.py +76 -76
  36. edsl/conjure/QuestionTypeMixin.py +23 -23
  37. edsl/conjure/RawQuestion.py +65 -65
  38. edsl/conjure/SurveyResponses.py +7 -7
  39. edsl/conjure/__init__.py +9 -9
  40. edsl/conjure/naming_utilities.py +263 -263
  41. edsl/conjure/utilities.py +201 -201
  42. edsl/conversation/Conversation.py +238 -238
  43. edsl/conversation/car_buying.py +58 -58
  44. edsl/conversation/mug_negotiation.py +81 -81
  45. edsl/conversation/next_speaker_utilities.py +93 -93
  46. edsl/coop/PriceFetcher.py +54 -54
  47. edsl/coop/__init__.py +2 -2
  48. edsl/coop/coop.py +824 -849
  49. edsl/coop/utils.py +131 -131
  50. edsl/data/Cache.py +527 -527
  51. edsl/data/CacheEntry.py +228 -228
  52. edsl/data/CacheHandler.py +149 -149
  53. edsl/data/RemoteCacheSync.py +97 -84
  54. edsl/data/SQLiteDict.py +292 -292
  55. edsl/data/__init__.py +4 -4
  56. edsl/data/orm.py +10 -10
  57. edsl/data_transfer_models.py +73 -73
  58. edsl/enums.py +173 -173
  59. edsl/exceptions/__init__.py +50 -50
  60. edsl/exceptions/agents.py +40 -40
  61. edsl/exceptions/configuration.py +16 -16
  62. edsl/exceptions/coop.py +10 -10
  63. edsl/exceptions/data.py +14 -14
  64. edsl/exceptions/general.py +34 -34
  65. edsl/exceptions/jobs.py +33 -33
  66. edsl/exceptions/language_models.py +63 -63
  67. edsl/exceptions/prompts.py +15 -15
  68. edsl/exceptions/questions.py +91 -91
  69. edsl/exceptions/results.py +26 -26
  70. edsl/exceptions/surveys.py +34 -34
  71. edsl/inference_services/AnthropicService.py +87 -87
  72. edsl/inference_services/AwsBedrock.py +115 -115
  73. edsl/inference_services/AzureAI.py +217 -217
  74. edsl/inference_services/DeepInfraService.py +18 -18
  75. edsl/inference_services/GoogleService.py +156 -156
  76. edsl/inference_services/GroqService.py +20 -20
  77. edsl/inference_services/InferenceServiceABC.py +147 -147
  78. edsl/inference_services/InferenceServicesCollection.py +74 -74
  79. edsl/inference_services/MistralAIService.py +123 -123
  80. edsl/inference_services/OllamaService.py +18 -18
  81. edsl/inference_services/OpenAIService.py +224 -224
  82. edsl/inference_services/TestService.py +89 -89
  83. edsl/inference_services/TogetherAIService.py +170 -170
  84. edsl/inference_services/models_available_cache.py +118 -118
  85. edsl/inference_services/rate_limits_cache.py +25 -25
  86. edsl/inference_services/registry.py +39 -39
  87. edsl/inference_services/write_available.py +10 -10
  88. edsl/jobs/Answers.py +56 -56
  89. edsl/jobs/Jobs.py +1112 -1112
  90. edsl/jobs/__init__.py +1 -1
  91. edsl/jobs/buckets/BucketCollection.py +63 -63
  92. edsl/jobs/buckets/ModelBuckets.py +65 -65
  93. edsl/jobs/buckets/TokenBucket.py +248 -248
  94. edsl/jobs/interviews/Interview.py +661 -661
  95. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  96. edsl/jobs/interviews/InterviewExceptionEntry.py +182 -189
  97. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  98. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  99. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  100. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  101. edsl/jobs/interviews/ReportErrors.py +66 -66
  102. edsl/jobs/interviews/interview_status_enum.py +9 -9
  103. edsl/jobs/runners/JobsRunnerAsyncio.py +338 -337
  104. edsl/jobs/runners/JobsRunnerStatus.py +332 -332
  105. edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
  106. edsl/jobs/tasks/TaskCreators.py +64 -64
  107. edsl/jobs/tasks/TaskHistory.py +441 -441
  108. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  109. edsl/jobs/tasks/task_status_enum.py +163 -163
  110. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  111. edsl/jobs/tokens/TokenUsage.py +34 -34
  112. edsl/language_models/LanguageModel.py +718 -718
  113. edsl/language_models/ModelList.py +102 -102
  114. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  115. edsl/language_models/__init__.py +2 -2
  116. edsl/language_models/fake_openai_call.py +15 -15
  117. edsl/language_models/fake_openai_service.py +61 -61
  118. edsl/language_models/registry.py +137 -137
  119. edsl/language_models/repair.py +156 -156
  120. edsl/language_models/unused/ReplicateBase.py +83 -83
  121. edsl/language_models/utilities.py +64 -64
  122. edsl/notebooks/Notebook.py +259 -259
  123. edsl/notebooks/__init__.py +1 -1
  124. edsl/prompts/Prompt.py +350 -358
  125. edsl/prompts/__init__.py +2 -2
  126. edsl/questions/AnswerValidatorMixin.py +289 -289
  127. edsl/questions/QuestionBase.py +616 -616
  128. edsl/questions/QuestionBaseGenMixin.py +161 -161
  129. edsl/questions/QuestionBasePromptsMixin.py +266 -266
  130. edsl/questions/QuestionBudget.py +227 -227
  131. edsl/questions/QuestionCheckBox.py +359 -359
  132. edsl/questions/QuestionExtract.py +183 -183
  133. edsl/questions/QuestionFreeText.py +113 -113
  134. edsl/questions/QuestionFunctional.py +159 -159
  135. edsl/questions/QuestionList.py +231 -231
  136. edsl/questions/QuestionMultipleChoice.py +286 -286
  137. edsl/questions/QuestionNumerical.py +153 -153
  138. edsl/questions/QuestionRank.py +324 -324
  139. edsl/questions/Quick.py +41 -41
  140. edsl/questions/RegisterQuestionsMeta.py +71 -71
  141. edsl/questions/ResponseValidatorABC.py +174 -174
  142. edsl/questions/SimpleAskMixin.py +73 -73
  143. edsl/questions/__init__.py +26 -26
  144. edsl/questions/compose_questions.py +98 -98
  145. edsl/questions/decorators.py +21 -21
  146. edsl/questions/derived/QuestionLikertFive.py +76 -76
  147. edsl/questions/derived/QuestionLinearScale.py +87 -87
  148. edsl/questions/derived/QuestionTopK.py +91 -91
  149. edsl/questions/derived/QuestionYesNo.py +82 -82
  150. edsl/questions/descriptors.py +418 -418
  151. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  152. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  153. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  154. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  155. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  156. edsl/questions/prompt_templates/question_list.jinja +17 -17
  157. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  158. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  159. edsl/questions/question_registry.py +147 -147
  160. edsl/questions/settings.py +12 -12
  161. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  162. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  163. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  164. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  165. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  166. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  167. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  168. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  169. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  170. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  171. edsl/questions/templates/list/question_presentation.jinja +5 -5
  172. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  173. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  174. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  175. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  176. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  177. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  178. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  179. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  180. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  181. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  182. edsl/results/Dataset.py +293 -293
  183. edsl/results/DatasetExportMixin.py +693 -693
  184. edsl/results/DatasetTree.py +145 -145
  185. edsl/results/Result.py +435 -433
  186. edsl/results/Results.py +1160 -1158
  187. edsl/results/ResultsDBMixin.py +238 -238
  188. edsl/results/ResultsExportMixin.py +43 -43
  189. edsl/results/ResultsFetchMixin.py +33 -33
  190. edsl/results/ResultsGGMixin.py +121 -121
  191. edsl/results/ResultsToolsMixin.py +98 -98
  192. edsl/results/Selector.py +118 -118
  193. edsl/results/__init__.py +2 -2
  194. edsl/results/tree_explore.py +115 -115
  195. edsl/scenarios/FileStore.py +458 -458
  196. edsl/scenarios/Scenario.py +510 -510
  197. edsl/scenarios/ScenarioHtmlMixin.py +59 -59
  198. edsl/scenarios/ScenarioList.py +1101 -1101
  199. edsl/scenarios/ScenarioListExportMixin.py +52 -52
  200. edsl/scenarios/ScenarioListPdfMixin.py +261 -261
  201. edsl/scenarios/__init__.py +4 -4
  202. edsl/shared.py +1 -1
  203. edsl/study/ObjectEntry.py +173 -173
  204. edsl/study/ProofOfWork.py +113 -113
  205. edsl/study/SnapShot.py +80 -80
  206. edsl/study/Study.py +528 -528
  207. edsl/study/__init__.py +4 -4
  208. edsl/surveys/DAG.py +148 -148
  209. edsl/surveys/Memory.py +31 -31
  210. edsl/surveys/MemoryPlan.py +244 -244
  211. edsl/surveys/Rule.py +324 -324
  212. edsl/surveys/RuleCollection.py +387 -387
  213. edsl/surveys/Survey.py +1772 -1772
  214. edsl/surveys/SurveyCSS.py +261 -261
  215. edsl/surveys/SurveyExportMixin.py +259 -259
  216. edsl/surveys/SurveyFlowVisualizationMixin.py +121 -121
  217. edsl/surveys/SurveyQualtricsImport.py +284 -284
  218. edsl/surveys/__init__.py +3 -3
  219. edsl/surveys/base.py +53 -53
  220. edsl/surveys/descriptors.py +56 -56
  221. edsl/surveys/instructions/ChangeInstruction.py +47 -47
  222. edsl/surveys/instructions/Instruction.py +51 -51
  223. edsl/surveys/instructions/InstructionCollection.py +77 -77
  224. edsl/templates/error_reporting/base.html +23 -23
  225. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  226. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  227. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  228. edsl/templates/error_reporting/interview_details.html +115 -115
  229. edsl/templates/error_reporting/interviews.html +9 -9
  230. edsl/templates/error_reporting/overview.html +4 -4
  231. edsl/templates/error_reporting/performance_plot.html +1 -1
  232. edsl/templates/error_reporting/report.css +73 -73
  233. edsl/templates/error_reporting/report.html +117 -117
  234. edsl/templates/error_reporting/report.js +25 -25
  235. edsl/tools/__init__.py +1 -1
  236. edsl/tools/clusters.py +192 -192
  237. edsl/tools/embeddings.py +27 -27
  238. edsl/tools/embeddings_plotting.py +118 -118
  239. edsl/tools/plotting.py +112 -112
  240. edsl/tools/summarize.py +18 -18
  241. edsl/utilities/SystemInfo.py +28 -28
  242. edsl/utilities/__init__.py +22 -22
  243. edsl/utilities/ast_utilities.py +25 -25
  244. edsl/utilities/data/Registry.py +6 -6
  245. edsl/utilities/data/__init__.py +1 -1
  246. edsl/utilities/data/scooter_results.json +1 -1
  247. edsl/utilities/decorators.py +77 -77
  248. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  249. edsl/utilities/interface.py +627 -627
  250. edsl/utilities/repair_functions.py +28 -28
  251. edsl/utilities/restricted_python.py +70 -70
  252. edsl/utilities/utilities.py +391 -391
  253. {edsl-0.1.36.dev7.dist-info → edsl-0.1.37.dev1.dist-info}/LICENSE +21 -21
  254. {edsl-0.1.36.dev7.dist-info → edsl-0.1.37.dev1.dist-info}/METADATA +1 -1
  255. edsl-0.1.37.dev1.dist-info/RECORD +279 -0
  256. edsl-0.1.36.dev7.dist-info/RECORD +0 -279
  257. {edsl-0.1.36.dev7.dist-info → edsl-0.1.37.dev1.dist-info}/WHEEL +0 -0
@@ -1,1101 +1,1101 @@
1
- """A list of Scenarios to be used in a survey."""
2
-
3
- from __future__ import annotations
4
- from typing import Any, Optional, Union, List, Callable
5
- import csv
6
- import random
7
- from collections import UserList, Counter
8
- from collections.abc import Iterable
9
- import urllib.parse
10
- import urllib.request
11
- from io import StringIO
12
- from collections import defaultdict
13
- import inspect
14
-
15
- from simpleeval import EvalWithCompoundTypes
16
-
17
- from edsl.Base import Base
18
- from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
19
- from edsl.scenarios.Scenario import Scenario
20
- from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
21
- from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
22
-
23
- from edsl.conjure.naming_utilities import sanitize_string
24
- from edsl.utilities.utilities import is_valid_variable_name
25
-
26
-
27
- class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
28
- pass
29
-
30
-
31
- class ScenarioList(Base, UserList, ScenarioListMixin):
32
- """Class for creating a list of scenarios to be used in a survey."""
33
-
34
- def __init__(self, data: Optional[list] = None, codebook: Optional[dict] = None):
35
- """Initialize the ScenarioList class."""
36
- if data is not None:
37
- super().__init__(data)
38
- else:
39
- super().__init__([])
40
- self.codebook = codebook or {}
41
-
42
- def unique(self) -> ScenarioList:
43
- """Return a list of unique scenarios.
44
-
45
- >>> s = ScenarioList([Scenario({'a': 1}), Scenario({'a': 1}), Scenario({'a': 2})])
46
- >>> s.unique()
47
- ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
48
- """
49
- return ScenarioList(list(set(self)))
50
-
51
- @property
52
- def has_jinja_braces(self) -> bool:
53
- """Check if the ScenarioList has Jinja braces."""
54
- return any([scenario.has_jinja_braces for scenario in self])
55
-
56
- def convert_jinja_braces(self) -> ScenarioList:
57
- """Convert Jinja braces to Python braces."""
58
- return ScenarioList([scenario.convert_jinja_braces() for scenario in self])
59
-
60
- def give_valid_names(self) -> ScenarioList:
61
- """Give valid names to the scenario keys.
62
-
63
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
64
- >>> s.give_valid_names()
65
- ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
66
- >>> s = ScenarioList([Scenario({'are you there John?': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
67
- >>> s.give_valid_names()
68
- ScenarioList([Scenario({'john': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
69
- """
70
- codebook = {}
71
- new_scenaerios = []
72
- for scenario in self:
73
- new_scenario = {}
74
- for key in scenario:
75
- if not is_valid_variable_name(key):
76
- if key in codebook:
77
- new_key = codebook[key]
78
- else:
79
- new_key = sanitize_string(key)
80
- if not is_valid_variable_name(new_key):
81
- new_key = f"var_{len(codebook)}"
82
- codebook[key] = new_key
83
- new_scenario[new_key] = scenario[key]
84
- else:
85
- new_scenario[key] = scenario[key]
86
- new_scenaerios.append(Scenario(new_scenario))
87
- return ScenarioList(new_scenaerios, codebook)
88
-
89
- def unpivot(self, id_vars=None, value_vars=None):
90
- """
91
- Unpivot the ScenarioList, allowing for id variables to be specified.
92
-
93
- Parameters:
94
- id_vars (list): Fields to use as identifier variables (kept in each entry)
95
- value_vars (list): Fields to unpivot. If None, all fields not in id_vars will be used.
96
-
97
- Example:
98
- >>> s = ScenarioList([
99
- ... Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}),
100
- ... Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})
101
- ... ])
102
- >>> s.unpivot(id_vars=['id', 'year'], value_vars=['a', 'b'])
103
- ScenarioList([Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}), Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}), Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}), Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})])
104
- """
105
- if id_vars is None:
106
- id_vars = []
107
- if value_vars is None:
108
- value_vars = [field for field in self[0].keys() if field not in id_vars]
109
-
110
- new_scenarios = []
111
- for scenario in self:
112
- for var in value_vars:
113
- new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
114
- new_scenario["variable"] = var
115
- new_scenario["value"] = scenario[var]
116
- new_scenarios.append(Scenario(new_scenario))
117
-
118
- return ScenarioList(new_scenarios)
119
-
120
- def pivot(self, id_vars, var_name="variable", value_name="value"):
121
- """
122
- Pivot the ScenarioList from long to wide format.
123
-
124
- Parameters:
125
- id_vars (list): Fields to use as identifier variables
126
- var_name (str): Name of the variable column (default: 'variable')
127
- value_name (str): Name of the value column (default: 'value')
128
-
129
- Example:
130
- >>> s = ScenarioList([
131
- ... Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}),
132
- ... Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}),
133
- ... Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}),
134
- ... Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})
135
- ... ])
136
- >>> s.pivot(id_vars=['id', 'year'])
137
- ScenarioList([Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}), Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})])
138
- """
139
- pivoted_dict = {}
140
-
141
- for scenario in self:
142
- # Create a tuple of id values to use as a key
143
- id_key = tuple(scenario[id_var] for id_var in id_vars)
144
-
145
- # If this combination of id values hasn't been seen before, initialize it
146
- if id_key not in pivoted_dict:
147
- pivoted_dict[id_key] = {id_var: scenario[id_var] for id_var in id_vars}
148
-
149
- # Add the variable-value pair to the dict
150
- variable = scenario[var_name]
151
- value = scenario[value_name]
152
- pivoted_dict[id_key][variable] = value
153
-
154
- # Convert the dict of dicts to a list of Scenarios
155
- pivoted_scenarios = [
156
- Scenario(dict(zip(id_vars, id_key), **values))
157
- for id_key, values in pivoted_dict.items()
158
- ]
159
-
160
- return ScenarioList(pivoted_scenarios)
161
-
162
- def group_by(self, id_vars, variables, func):
163
- """
164
- Group the ScenarioList by id_vars and apply a function to the specified variables.
165
-
166
- Parameters:
167
- id_vars (list): Fields to use as identifier variables for grouping
168
- variables (list): Fields to pass to the aggregation function
169
- func (callable): Function to apply to the grouped variables.
170
- Should accept lists of values for each variable.
171
-
172
- Returns:
173
- ScenarioList: A new ScenarioList with the grouped and aggregated results
174
-
175
- Example:
176
- >>> def avg_sum(a, b):
177
- ... return {'avg_a': sum(a) / len(a), 'sum_b': sum(b)}
178
- >>> s = ScenarioList([
179
- ... Scenario({'group': 'A', 'year': 2020, 'a': 10, 'b': 20}),
180
- ... Scenario({'group': 'A', 'year': 2021, 'a': 15, 'b': 25}),
181
- ... Scenario({'group': 'B', 'year': 2020, 'a': 12, 'b': 22}),
182
- ... Scenario({'group': 'B', 'year': 2021, 'a': 17, 'b': 27})
183
- ... ])
184
- >>> s.group_by(id_vars=['group'], variables=['a', 'b'], func=avg_sum)
185
- ScenarioList([Scenario({'group': 'A', 'avg_a': 12.5, 'sum_b': 45}), Scenario({'group': 'B', 'avg_a': 14.5, 'sum_b': 49})])
186
- """
187
- # Check if the function is compatible with the specified variables
188
- func_params = inspect.signature(func).parameters
189
- if len(func_params) != len(variables):
190
- raise ValueError(
191
- f"Function {func.__name__} expects {len(func_params)} arguments, but {len(variables)} variables were provided"
192
- )
193
-
194
- # Group the scenarios
195
- grouped = defaultdict(lambda: defaultdict(list))
196
- for scenario in self:
197
- key = tuple(scenario[id_var] for id_var in id_vars)
198
- for var in variables:
199
- grouped[key][var].append(scenario[var])
200
-
201
- # Apply the function to each group
202
- result = []
203
- for key, group in grouped.items():
204
- try:
205
- aggregated = func(*[group[var] for var in variables])
206
- except Exception as e:
207
- raise ValueError(f"Error applying function to group {key}: {str(e)}")
208
-
209
- if not isinstance(aggregated, dict):
210
- raise ValueError(f"Function {func.__name__} must return a dictionary")
211
-
212
- new_scenario = dict(zip(id_vars, key))
213
- new_scenario.update(aggregated)
214
- result.append(Scenario(new_scenario))
215
-
216
- return ScenarioList(result)
217
-
218
- @property
219
- def parameters(self) -> set:
220
- """Return the set of parameters in the ScenarioList
221
-
222
- Example:
223
-
224
- >>> s = ScenarioList([Scenario({'a': 1}), Scenario({'b': 2})])
225
- >>> s.parameters == {'a', 'b'}
226
- True
227
- """
228
- if len(self) == 0:
229
- return set()
230
-
231
- return set.union(*[set(s.keys()) for s in self])
232
-
233
- def __hash__(self) -> int:
234
- """Return the hash of the ScenarioList.
235
-
236
- >>> s = ScenarioList.example()
237
- >>> hash(s)
238
- 1262252885757976162
239
- """
240
- from edsl.utilities.utilities import dict_hash
241
-
242
- return dict_hash(self._to_dict(sort=True))
243
-
244
- def __repr__(self):
245
- return f"ScenarioList({self.data})"
246
-
247
- def __mul__(self, other: ScenarioList) -> ScenarioList:
248
- """Takes the cross product of two ScenarioLists.
249
-
250
- >>> s1 = ScenarioList.from_list("a", [1, 2])
251
- >>> s2 = ScenarioList.from_list("b", [3, 4])
252
- >>> s1 * s2
253
- ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
254
- """
255
- from itertools import product
256
-
257
- new_sl = []
258
- for s1, s2 in list(product(self, other)):
259
- new_sl.append(s1 + s2)
260
- return ScenarioList(new_sl)
261
-
262
- def times(self, other: ScenarioList) -> ScenarioList:
263
- """Takes the cross product of two ScenarioLists.
264
-
265
- Example:
266
-
267
- >>> s1 = ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
268
- >>> s2 = ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
269
- >>> s1.times(s2)
270
- ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2}), Scenario({'a': 2, 'b': 1}), Scenario({'a': 2, 'b': 2})])
271
- """
272
- return self.__mul__(other)
273
-
274
- def shuffle(self, seed: Optional[str] = "edsl") -> ScenarioList:
275
- """Shuffle the ScenarioList.
276
-
277
- >>> s = ScenarioList.from_list("a", [1,2,3,4])
278
- >>> s.shuffle()
279
- ScenarioList([Scenario({'a': 3}), Scenario({'a': 4}), Scenario({'a': 1}), Scenario({'a': 2})])
280
- """
281
- random.seed(seed)
282
- random.shuffle(self.data)
283
- return self
284
-
285
- def _repr_html_(self) -> str:
286
- from edsl.utilities.utilities import data_to_html
287
-
288
- data = self.to_dict()
289
- _ = data.pop("edsl_version")
290
- _ = data.pop("edsl_class_name")
291
- for s in data["scenarios"]:
292
- _ = s.pop("edsl_version")
293
- _ = s.pop("edsl_class_name")
294
- for scenario in data["scenarios"]:
295
- for key, value in scenario.items():
296
- if hasattr(value, "to_dict"):
297
- data[key] = value.to_dict()
298
- return data_to_html(data)
299
-
300
- def tally(self, field) -> dict:
301
- """Return a tally of the values in the field.
302
-
303
- Example:
304
-
305
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
306
- >>> s.tally('b')
307
- {1: 1, 2: 1}
308
- """
309
- return dict(Counter([scenario[field] for scenario in self]))
310
-
311
- def sample(self, n: int, seed="edsl") -> ScenarioList:
312
- """Return a random sample from the ScenarioList
313
-
314
- >>> s = ScenarioList.from_list("a", [1,2,3,4,5,6])
315
- >>> s.sample(3)
316
- ScenarioList([Scenario({'a': 2}), Scenario({'a': 1}), Scenario({'a': 3})])
317
- """
318
-
319
- random.seed(seed)
320
-
321
- return ScenarioList(random.sample(self.data, n))
322
-
323
- def expand(self, expand_field: str, number_field=False) -> ScenarioList:
324
- """Expand the ScenarioList by a field.
325
-
326
- Example:
327
-
328
- >>> s = ScenarioList( [ Scenario({'a':1, 'b':[1,2]}) ] )
329
- >>> s.expand('b')
330
- ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
331
- """
332
- new_scenarios = []
333
- for scenario in self:
334
- values = scenario[expand_field]
335
- if not isinstance(values, Iterable) or isinstance(values, str):
336
- values = [values]
337
- for index, value in enumerate(values):
338
- new_scenario = scenario.copy()
339
- new_scenario[expand_field] = value
340
- if number_field:
341
- new_scenario[expand_field + "_number"] = index + 1
342
- new_scenarios.append(new_scenario)
343
- return ScenarioList(new_scenarios)
344
-
345
- def concatenate(self, fields: List[str], separator: str = ";") -> "ScenarioList":
346
- """Concatenate specified fields into a single field.
347
-
348
- Args:
349
- fields (List[str]): List of field names to concatenate.
350
- separator (str, optional): Separator to use between field values. Defaults to ";".
351
-
352
- Returns:
353
- ScenarioList: A new ScenarioList with concatenated fields.
354
-
355
- Example:
356
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
357
- >>> s.concatenate(['a', 'b', 'c'])
358
- ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
359
- """
360
- new_scenarios = []
361
- for scenario in self:
362
- new_scenario = scenario.copy()
363
- concat_values = []
364
- for field in fields:
365
- if field in new_scenario:
366
- concat_values.append(str(new_scenario[field]))
367
- del new_scenario[field]
368
-
369
- new_field_name = f"concat_{'_'.join(fields)}"
370
- new_scenario[new_field_name] = separator.join(concat_values)
371
- new_scenarios.append(new_scenario)
372
-
373
- return ScenarioList(new_scenarios)
374
-
375
- def unpack_dict(
376
- self, field: str, prefix: Optional[str] = None, drop_field: bool = False
377
- ) -> ScenarioList:
378
- """Unpack a dictionary field into separate fields.
379
-
380
- Example:
381
-
382
- >>> s = ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}})])
383
- >>> s.unpack_dict('b')
384
- ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'c': 2, 'd': 3})])
385
- """
386
- new_scenarios = []
387
- for scenario in self:
388
- new_scenario = scenario.copy()
389
- for key, value in scenario[field].items():
390
- if prefix:
391
- new_scenario[prefix + key] = value
392
- else:
393
- new_scenario[key] = value
394
- if drop_field:
395
- new_scenario.pop(field)
396
- new_scenarios.append(new_scenario)
397
- return ScenarioList(new_scenarios)
398
-
399
- def transform(
400
- self, field: str, func: Callable, new_name: Optional[str] = None
401
- ) -> ScenarioList:
402
- """Transform a field using a function."""
403
- new_scenarios = []
404
- for scenario in self:
405
- new_scenario = scenario.copy()
406
- new_scenario[new_name or field] = func(scenario[field])
407
- new_scenarios.append(new_scenario)
408
- return ScenarioList(new_scenarios)
409
-
410
- def mutate(
411
- self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None
412
- ) -> ScenarioList:
413
- """
414
- Return a new ScenarioList with a new variable added.
415
-
416
- Example:
417
-
418
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
419
- >>> s.mutate("c = a + b")
420
- ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 1, 'b': 1, 'c': 2})])
421
-
422
- """
423
- if "=" not in new_var_string:
424
- raise Exception(
425
- f"Mutate requires an '=' in the string, but '{new_var_string}' doesn't have one."
426
- )
427
- raw_var_name, expression = new_var_string.split("=", 1)
428
- var_name = raw_var_name.strip()
429
- from edsl.utilities.utilities import is_valid_variable_name
430
-
431
- if not is_valid_variable_name(var_name):
432
- raise Exception(f"{var_name} is not a valid variable name.")
433
-
434
- # create the evaluator
435
- functions_dict = functions_dict or {}
436
-
437
- def create_evaluator(scenario) -> EvalWithCompoundTypes:
438
- return EvalWithCompoundTypes(names=scenario, functions=functions_dict)
439
-
440
- def new_scenario(old_scenario: Scenario, var_name: str) -> Scenario:
441
- evaluator = create_evaluator(old_scenario)
442
- value = evaluator.eval(expression)
443
- new_s = old_scenario.copy()
444
- new_s[var_name] = value
445
- return new_s
446
-
447
- try:
448
- new_data = [new_scenario(s, var_name) for s in self]
449
- except Exception as e:
450
- raise Exception(f"Error in mutate. Exception:{e}")
451
-
452
- return ScenarioList(new_data)
453
-
454
- def order_by(self, *fields: str, reverse: bool = False) -> ScenarioList:
455
- """Order the scenarios by one or more fields.
456
-
457
- Example:
458
-
459
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
460
- >>> s.order_by('b', 'a')
461
- ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
462
- """
463
-
464
- def get_sort_key(scenario: Any) -> tuple:
465
- return tuple(scenario[field] for field in fields)
466
-
467
- return ScenarioList(sorted(self, key=get_sort_key, reverse=reverse))
468
-
469
- def filter(self, expression: str) -> ScenarioList:
470
- """
471
- Filter a list of scenarios based on an expression.
472
-
473
- Example:
474
-
475
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
476
- >>> s.filter("b == 2")
477
- ScenarioList([Scenario({'a': 1, 'b': 2})])
478
- """
479
-
480
- def create_evaluator(scenario: Scenario):
481
- """Create an evaluator for the given result.
482
- The 'combined_dict' is a mapping of all values for that Result object.
483
- """
484
- return EvalWithCompoundTypes(names=scenario)
485
-
486
- try:
487
- # iterates through all the results and evaluates the expression
488
- new_data = [
489
- scenario
490
- for scenario in self.data
491
- if create_evaluator(scenario).eval(expression)
492
- ]
493
- except Exception as e:
494
- print(f"Exception:{e}")
495
- raise Exception(f"Error in filter. Exception:{e}")
496
-
497
- return ScenarioList(new_data)
498
-
499
- def from_urls(
500
- self, urls: list[str], field_name: Optional[str] = "text"
501
- ) -> ScenarioList:
502
- """Create a ScenarioList from a list of URLs.
503
-
504
- :param urls: A list of URLs.
505
- :param field_name: The name of the field to store the text from the URLs.
506
-
507
-
508
- """
509
- return ScenarioList([Scenario.from_url(url, field_name) for url in urls])
510
-
511
- def select(self, *fields) -> ScenarioList:
512
- """
513
- Selects scenarios with only the references fields.
514
-
515
- Example:
516
-
517
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
518
- >>> s.select('a')
519
- ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
520
- """
521
- if len(fields) == 1:
522
- fields_to_select = [list(fields)[0]]
523
- else:
524
- fields_to_select = list(fields)
525
-
526
- return ScenarioList(
527
- [scenario.select(fields_to_select) for scenario in self.data]
528
- )
529
-
530
- def drop(self, *fields) -> ScenarioList:
531
- """Drop fields from the scenarios.
532
-
533
- Example:
534
-
535
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
536
- >>> s.drop('a')
537
- ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
538
- """
539
- return ScenarioList([scenario.drop(fields) for scenario in self.data])
540
-
541
- @classmethod
542
- def from_list(
543
- cls, name: str, values: list, func: Optional[Callable] = None
544
- ) -> ScenarioList:
545
- """Create a ScenarioList from a list of values.
546
-
547
- Example:
548
-
549
- >>> ScenarioList.from_list('name', ['Alice', 'Bob'])
550
- ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
551
- """
552
- if not func:
553
- func = lambda x: x
554
- return cls([Scenario({name: func(value)}) for value in values])
555
-
556
- def to_dataset(self) -> "Dataset":
557
- """
558
- >>> s = ScenarioList.from_list("a", [1,2,3])
559
- >>> s.to_dataset()
560
- Dataset([{'a': [1, 2, 3]}])
561
- >>> s = ScenarioList.from_list("a", [1,2,3]).add_list("b", [4,5,6])
562
- >>> s.to_dataset()
563
- Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
564
- """
565
- from edsl.results.Dataset import Dataset
566
-
567
- keys = self[0].keys()
568
- data = [{key: [scenario[key] for scenario in self.data]} for key in keys]
569
- return Dataset(data)
570
-
571
- def split(
572
- self, field: str, split_on: str, index: int, new_name: Optional[str] = None
573
- ) -> ScenarioList:
574
- """Split a scenario fiel in multiple fields."""
575
- if new_name is None:
576
- new_name = field + "_split_" + str(index)
577
- new_scenarios = []
578
- for scenario in self:
579
- new_scenario = scenario.copy()
580
- new_scenario[new_name] = scenario[field].split(split_on)[index]
581
- new_scenarios.append(new_scenario)
582
- return ScenarioList(new_scenarios)
583
-
584
- def add_list(self, name, values) -> ScenarioList:
585
- """Add a list of values to a ScenarioList.
586
-
587
- Example:
588
-
589
- >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
590
- >>> s.add_list('age', [30, 25])
591
- ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
592
- """
593
- for i, value in enumerate(values):
594
- if i < len(self):
595
- self[i][name] = value
596
- else:
597
- self.append(Scenario({name: value}))
598
- return self
599
-
600
- def add_value(self, name: str, value: Any) -> ScenarioList:
601
- """Add a value to all scenarios in a ScenarioList.
602
-
603
- Example:
604
-
605
- >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
606
- >>> s.add_value('age', 30)
607
- ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 30})])
608
- """
609
- for scenario in self:
610
- scenario[name] = value
611
- return self
612
-
613
- def rename(self, replacement_dict: dict) -> ScenarioList:
614
- """Rename the fields in the scenarios.
615
-
616
- Example:
617
-
618
- >>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
619
- >>> s.rename({'name': 'first_name', 'age': 'years'})
620
- ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
621
-
622
- """
623
-
624
- new_list = ScenarioList([])
625
- for obj in self:
626
- new_obj = obj.rename(replacement_dict)
627
- new_list.append(new_obj)
628
- return new_list
629
-
630
- @classmethod
631
- def from_sqlite(cls, filepath: str, table: str):
632
- import sqlite3
633
-
634
- with sqlite3.connect(filepath) as conn:
635
- cursor = conn.cursor()
636
- cursor.execute(f"SELECT * FROM {table}")
637
- columns = [description[0] for description in cursor.description]
638
- data = cursor.fetchall()
639
- return cls([Scenario(dict(zip(columns, row))) for row in data])
640
-
641
- @classmethod
642
- def from_latex(cls, tex_file_path: str):
643
- with open(tex_file_path, "r") as file:
644
- lines = file.readlines()
645
-
646
- processed_lines = []
647
- non_blank_lines = [
648
- (i, line.strip()) for i, line in enumerate(lines) if line.strip()
649
- ]
650
-
651
- for index, (line_no, text) in enumerate(non_blank_lines):
652
- entry = {
653
- "line_no": line_no + 1, # Using 1-based index for line numbers
654
- "text": text,
655
- "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
656
- "line_after": (
657
- non_blank_lines[index + 1][1]
658
- if index < len(non_blank_lines) - 1
659
- else None
660
- ),
661
- }
662
- processed_lines.append(entry)
663
-
664
- return ScenarioList([Scenario(entry) for entry in processed_lines])
665
-
666
- @classmethod
667
- def from_google_doc(cls, url: str) -> ScenarioList:
668
- """Create a ScenarioList from a Google Doc.
669
-
670
- This method downloads the Google Doc as a Word file (.docx), saves it to a temporary file,
671
- and then reads it using the from_docx class method.
672
-
673
- Args:
674
- url (str): The URL to the Google Doc.
675
-
676
- Returns:
677
- ScenarioList: An instance of the ScenarioList class.
678
-
679
- """
680
- import tempfile
681
- import requests
682
- from docx import Document
683
-
684
- if "/edit" in url:
685
- doc_id = url.split("/d/")[1].split("/edit")[0]
686
- else:
687
- raise ValueError("Invalid Google Doc URL format.")
688
-
689
- export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
690
-
691
- # Download the Google Doc as a Word file (.docx)
692
- response = requests.get(export_url)
693
- response.raise_for_status() # Ensure the request was successful
694
-
695
- # Save the Word file to a temporary file
696
- with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
697
- temp_file.write(response.content)
698
- temp_filename = temp_file.name
699
-
700
- # Call the from_docx class method with the temporary file
701
- return cls.from_docx(temp_filename)
702
-
703
- @classmethod
704
- def from_pandas(cls, df) -> ScenarioList:
705
- """Create a ScenarioList from a pandas DataFrame.
706
-
707
- Example:
708
-
709
- >>> import pandas as pd
710
- >>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25], 'location': ['New York', 'Los Angeles']})
711
- >>> ScenarioList.from_pandas(df)
712
- ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
713
- """
714
- return cls([Scenario(row) for row in df.to_dict(orient="records")])
715
-
716
- @classmethod
717
- def from_wikipedia(cls, url: str, table_index: int = 0):
718
- """
719
- Extracts a table from a Wikipedia page.
720
-
721
- Parameters:
722
- url (str): The URL of the Wikipedia page.
723
- table_index (int): The index of the table to extract (default is 0).
724
-
725
- Returns:
726
- pd.DataFrame: A DataFrame containing the extracted table.
727
- # # Example usage
728
- # url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
729
- # df = from_wikipedia(url, 0)
730
-
731
- # if not df.empty:
732
- # print(df.head())
733
- # else:
734
- # print("Failed to extract table.")
735
-
736
-
737
- """
738
- import pandas as pd
739
- import requests
740
- from requests.exceptions import RequestException
741
-
742
- try:
743
- # Check if the URL is reachable
744
- response = requests.get(url)
745
- response.raise_for_status() # Raises HTTPError for bad responses
746
-
747
- # Extract tables from the Wikipedia page
748
- tables = pd.read_html(url)
749
-
750
- # Ensure the requested table index is within the range of available tables
751
- if table_index >= len(tables) or table_index < 0:
752
- raise IndexError(
753
- f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
754
- )
755
-
756
- # Return the requested table as a DataFrame
757
- # return tables[table_index]
758
- return cls.from_pandas(tables[table_index])
759
-
760
- except RequestException as e:
761
- print(f"Error fetching the URL: {e}")
762
- except ValueError as e:
763
- print(f"Error parsing tables: {e}")
764
- except IndexError as e:
765
- print(e)
766
- except Exception as e:
767
- print(f"An unexpected error occurred: {e}")
768
-
769
- # Return an empty DataFrame in case of an error
770
- # return cls.from_pandas(pd.DataFrame())
771
-
772
- def to_key_value(self, field: str, value=None) -> Union[dict, set]:
773
- """Return the set of values in the field.
774
-
775
- Example:
776
-
777
- >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
778
- >>> s.to_key_value('name') == {'Alice', 'Bob'}
779
- True
780
- """
781
- if value is None:
782
- return {scenario[field] for scenario in self}
783
- else:
784
- return {scenario[field]: scenario[value] for scenario in self}
785
-
786
- @classmethod
787
- def from_excel(
788
- cls, filename: str, sheet_name: Optional[str] = None
789
- ) -> ScenarioList:
790
- """Create a ScenarioList from an Excel file.
791
-
792
- If the Excel file contains multiple sheets and no sheet_name is provided,
793
- the method will print the available sheets and require the user to specify one.
794
-
795
- Example:
796
-
797
- >>> import tempfile
798
- >>> import os
799
- >>> import pandas as pd
800
- >>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
801
- ... df1 = pd.DataFrame({
802
- ... 'name': ['Alice', 'Bob'],
803
- ... 'age': [30, 25],
804
- ... 'location': ['New York', 'Los Angeles']
805
- ... })
806
- ... df2 = pd.DataFrame({
807
- ... 'name': ['Charlie', 'David'],
808
- ... 'age': [35, 40],
809
- ... 'location': ['Chicago', 'Boston']
810
- ... })
811
- ... with pd.ExcelWriter(f.name) as writer:
812
- ... df1.to_excel(writer, sheet_name='Sheet1', index=False)
813
- ... df2.to_excel(writer, sheet_name='Sheet2', index=False)
814
- ... temp_filename = f.name
815
- >>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
816
- >>> len(scenario_list)
817
- 2
818
- >>> scenario_list[0]['name']
819
- 'Alice'
820
- >>> scenario_list = ScenarioList.from_excel(temp_filename) # Should raise an error and list sheets
821
- Traceback (most recent call last):
822
- ...
823
- ValueError: Please provide a sheet name to load data from.
824
- """
825
- from edsl.scenarios.Scenario import Scenario
826
- import pandas as pd
827
-
828
- # Get all sheets
829
- all_sheets = pd.read_excel(filename, sheet_name=None)
830
-
831
- # If no sheet_name is provided and there is more than one sheet, print available sheets
832
- if sheet_name is None:
833
- if len(all_sheets) > 1:
834
- print("The Excel file contains multiple sheets:")
835
- for name in all_sheets.keys():
836
- print(f"- {name}")
837
- raise ValueError("Please provide a sheet name to load data from.")
838
- else:
839
- # If there is only one sheet, use it
840
- sheet_name = list(all_sheets.keys())[0]
841
-
842
- # Load the specified or determined sheet
843
- df = pd.read_excel(filename, sheet_name=sheet_name)
844
-
845
- observations = []
846
- for _, row in df.iterrows():
847
- observations.append(Scenario(row.to_dict()))
848
-
849
- return cls(observations)
850
-
851
- @classmethod
852
- def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
853
- """Create a ScenarioList from a Google Sheet.
854
-
855
- This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
856
- and then reads it using the from_excel class method.
857
-
858
- Args:
859
- url (str): The URL to the Google Sheet.
860
- sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
861
- the same as from_excel regarding multiple sheets.
862
-
863
- Returns:
864
- ScenarioList: An instance of the ScenarioList class.
865
-
866
- """
867
- import pandas as pd
868
- import tempfile
869
- import requests
870
-
871
- if "/edit" in url:
872
- sheet_id = url.split("/d/")[1].split("/edit")[0]
873
- else:
874
- raise ValueError("Invalid Google Sheet URL format.")
875
-
876
- export_url = (
877
- f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
878
- )
879
-
880
- # Download the Google Sheet as an Excel file
881
- response = requests.get(export_url)
882
- response.raise_for_status() # Ensure the request was successful
883
-
884
- # Save the Excel file to a temporary file
885
- with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
886
- temp_file.write(response.content)
887
- temp_filename = temp_file.name
888
-
889
- # Call the from_excel class method with the temporary file
890
- return cls.from_excel(temp_filename, sheet_name=sheet_name)
891
-
892
- @classmethod
893
- def from_csv(cls, source: Union[str, urllib.parse.ParseResult]) -> ScenarioList:
894
- """Create a ScenarioList from a CSV file or URL.
895
-
896
- Args:
897
- source: A string representing either a local file path or a URL to a CSV file,
898
- or a urllib.parse.ParseResult object for a URL.
899
-
900
- Returns:
901
- ScenarioList: A ScenarioList object containing the data from the CSV.
902
-
903
- Example:
904
-
905
- >>> import tempfile
906
- >>> import os
907
- >>> with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv') as f:
908
- ... _ = f.write("name,age,location\\nAlice,30,New York\\nBob,25,Los Angeles\\n")
909
- ... temp_filename = f.name
910
- >>> scenario_list = ScenarioList.from_csv(temp_filename)
911
- >>> len(scenario_list)
912
- 2
913
- >>> scenario_list[0]['name']
914
- 'Alice'
915
- >>> scenario_list[1]['age']
916
- '25'
917
-
918
- >>> url = "https://example.com/data.csv"
919
- >>> ## scenario_list_from_url = ScenarioList.from_csv(url)
920
- """
921
- from edsl.scenarios.Scenario import Scenario
922
-
923
- def is_url(source):
924
- try:
925
- result = urllib.parse.urlparse(source)
926
- return all([result.scheme, result.netloc])
927
- except ValueError:
928
- return False
929
-
930
- if isinstance(source, str) and is_url(source):
931
- with urllib.request.urlopen(source) as response:
932
- csv_content = response.read().decode("utf-8")
933
- csv_file = StringIO(csv_content)
934
- elif isinstance(source, urllib.parse.ParseResult):
935
- with urllib.request.urlopen(source.geturl()) as response:
936
- csv_content = response.read().decode("utf-8")
937
- csv_file = StringIO(csv_content)
938
- else:
939
- csv_file = open(source, "r")
940
-
941
- try:
942
- reader = csv.reader(csv_file)
943
- header = next(reader)
944
- observations = [Scenario(dict(zip(header, row))) for row in reader]
945
- finally:
946
- csv_file.close()
947
-
948
- return cls(observations)
949
-
950
- def _to_dict(self, sort=False) -> dict:
951
- if sort:
952
- data = sorted(self, key=lambda x: hash(x))
953
- else:
954
- data = self
955
- return {"scenarios": [s._to_dict() for s in data]}
956
-
957
- @add_edsl_version
958
- def to_dict(self) -> dict[str, Any]:
959
- """Return the `ScenarioList` as a dictionary.
960
-
961
- Example:
962
-
963
- >>> s = ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood-fired pizza'})])
964
- >>> s.to_dict()
965
- {'scenarios': [{'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}, {'food': 'wood-fired pizza', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}], 'edsl_version': '...', 'edsl_class_name': 'ScenarioList'}
966
- """
967
- return {"scenarios": [s.to_dict() for s in self]}
968
-
969
- @classmethod
970
- def gen(cls, scenario_dicts_list: List[dict]) -> ScenarioList:
971
- """Create a `ScenarioList` from a list of dictionaries.
972
-
973
- Example:
974
-
975
- >>> ScenarioList.gen([{'name': 'Alice'}, {'name': 'Bob'}])
976
- ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
977
-
978
- """
979
- from edsl.scenarios.Scenario import Scenario
980
-
981
- return cls([Scenario(s) for s in scenario_dicts_list])
982
-
983
- @classmethod
984
- @remove_edsl_version
985
- def from_dict(cls, data) -> ScenarioList:
986
- """Create a `ScenarioList` from a dictionary."""
987
- from edsl.scenarios.Scenario import Scenario
988
-
989
- return cls([Scenario.from_dict(s) for s in data["scenarios"]])
990
-
991
- @classmethod
992
- def from_nested_dict(cls, data: dict) -> ScenarioList:
993
- """Create a `ScenarioList` from a nested dictionary."""
994
- from edsl.scenarios.Scenario import Scenario
995
-
996
- s = ScenarioList()
997
- for key, value in data.items():
998
- s.add_list(key, value)
999
- return s
1000
-
1001
- def code(self) -> str:
1002
- ## TODO: Refactor to only use the questions actually in the survey
1003
- """Create the Python code representation of a survey."""
1004
- header_lines = [
1005
- "from edsl.scenarios.Scenario import Scenario",
1006
- "from edsl.scenarios.ScenarioList import ScenarioList",
1007
- ]
1008
- lines = ["\n".join(header_lines)]
1009
- names = []
1010
- for index, scenario in enumerate(self):
1011
- lines.append(f"scenario_{index} = " + repr(scenario))
1012
- names.append(f"scenario_{index}")
1013
- lines.append(f"scenarios = ScenarioList([{', '.join(names)}])")
1014
- return lines
1015
-
1016
- @classmethod
1017
- def example(cls, randomize: bool = False) -> ScenarioList:
1018
- """
1019
- Return an example ScenarioList instance.
1020
-
1021
- :params randomize: If True, use Scenario's randomize method to randomize the values.
1022
- """
1023
- return cls([Scenario.example(randomize), Scenario.example(randomize)])
1024
-
1025
- def rich_print(self) -> None:
1026
- """Display an object as a table."""
1027
- from rich.table import Table
1028
-
1029
- table = Table(title="ScenarioList")
1030
- table.add_column("Index", style="bold")
1031
- table.add_column("Scenario")
1032
- for i, s in enumerate(self):
1033
- table.add_row(str(i), s.rich_print())
1034
- return table
1035
-
1036
- def __getitem__(self, key: Union[int, slice]) -> Any:
1037
- """Return the item at the given index.
1038
-
1039
- Example:
1040
- >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1041
- >>> s[0]
1042
- Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})
1043
-
1044
- >>> s[:1]
1045
- ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1046
-
1047
- """
1048
- if isinstance(key, slice):
1049
- return ScenarioList(super().__getitem__(key))
1050
- elif isinstance(key, int):
1051
- return super().__getitem__(key)
1052
- else:
1053
- return self.to_dict()[key]
1054
-
1055
- def to_agent_list(self):
1056
- """Convert the ScenarioList to an AgentList.
1057
-
1058
- Example:
1059
-
1060
- >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1061
- >>> s.to_agent_list()
1062
- AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5}), Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
1063
- """
1064
- from edsl.agents.AgentList import AgentList
1065
- from edsl.agents.Agent import Agent
1066
-
1067
- return AgentList([Agent(traits=s.data) for s in self])
1068
-
1069
- def chunk(
1070
- self,
1071
- field,
1072
- num_words: Optional[int] = None,
1073
- num_lines: Optional[int] = None,
1074
- include_original=False,
1075
- hash_original=False,
1076
- ) -> "ScenarioList":
1077
- """Chunk the scenarios based on a field.
1078
-
1079
- Example:
1080
-
1081
- >>> s = ScenarioList([Scenario({'text': 'The quick brown fox jumps over the lazy dog.'})])
1082
- >>> s.chunk('text', num_words=3)
1083
- ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0}), Scenario({'text': 'fox jumps over', 'text_chunk': 1}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2})])
1084
- """
1085
- new_scenarios = []
1086
- for scenario in self:
1087
- replacement_scenarios = scenario.chunk(
1088
- field,
1089
- num_words=num_words,
1090
- num_lines=num_lines,
1091
- include_original=include_original,
1092
- hash_original=hash_original,
1093
- )
1094
- new_scenarios.extend(replacement_scenarios)
1095
- return ScenarioList(new_scenarios)
1096
-
1097
-
1098
- if __name__ == "__main__":
1099
- import doctest
1100
-
1101
- doctest.testmod(optionflags=doctest.ELLIPSIS)
1
+ """A list of Scenarios to be used in a survey."""
2
+
3
+ from __future__ import annotations
4
+ from typing import Any, Optional, Union, List, Callable
5
+ import csv
6
+ import random
7
+ from collections import UserList, Counter
8
+ from collections.abc import Iterable
9
+ import urllib.parse
10
+ import urllib.request
11
+ from io import StringIO
12
+ from collections import defaultdict
13
+ import inspect
14
+
15
+ from simpleeval import EvalWithCompoundTypes
16
+
17
+ from edsl.Base import Base
18
+ from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
19
+ from edsl.scenarios.Scenario import Scenario
20
+ from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
21
+ from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
22
+
23
+ from edsl.conjure.naming_utilities import sanitize_string
24
+ from edsl.utilities.utilities import is_valid_variable_name
25
+
26
+
27
+ class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
28
+ pass
29
+
30
+
31
+ class ScenarioList(Base, UserList, ScenarioListMixin):
32
+ """Class for creating a list of scenarios to be used in a survey."""
33
+
34
+ def __init__(self, data: Optional[list] = None, codebook: Optional[dict] = None):
35
+ """Initialize the ScenarioList class."""
36
+ if data is not None:
37
+ super().__init__(data)
38
+ else:
39
+ super().__init__([])
40
+ self.codebook = codebook or {}
41
+
42
+ def unique(self) -> ScenarioList:
43
+ """Return a list of unique scenarios.
44
+
45
+ >>> s = ScenarioList([Scenario({'a': 1}), Scenario({'a': 1}), Scenario({'a': 2})])
46
+ >>> s.unique()
47
+ ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
48
+ """
49
+ return ScenarioList(list(set(self)))
50
+
51
+ @property
52
+ def has_jinja_braces(self) -> bool:
53
+ """Check if the ScenarioList has Jinja braces."""
54
+ return any([scenario.has_jinja_braces for scenario in self])
55
+
56
+ def convert_jinja_braces(self) -> ScenarioList:
57
+ """Convert Jinja braces to Python braces."""
58
+ return ScenarioList([scenario.convert_jinja_braces() for scenario in self])
59
+
60
+ def give_valid_names(self) -> ScenarioList:
61
+ """Give valid names to the scenario keys.
62
+
63
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
64
+ >>> s.give_valid_names()
65
+ ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
66
+ >>> s = ScenarioList([Scenario({'are you there John?': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
67
+ >>> s.give_valid_names()
68
+ ScenarioList([Scenario({'john': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
69
+ """
70
+ codebook = {}
71
+ new_scenaerios = []
72
+ for scenario in self:
73
+ new_scenario = {}
74
+ for key in scenario:
75
+ if not is_valid_variable_name(key):
76
+ if key in codebook:
77
+ new_key = codebook[key]
78
+ else:
79
+ new_key = sanitize_string(key)
80
+ if not is_valid_variable_name(new_key):
81
+ new_key = f"var_{len(codebook)}"
82
+ codebook[key] = new_key
83
+ new_scenario[new_key] = scenario[key]
84
+ else:
85
+ new_scenario[key] = scenario[key]
86
+ new_scenaerios.append(Scenario(new_scenario))
87
+ return ScenarioList(new_scenaerios, codebook)
88
+
89
+ def unpivot(self, id_vars=None, value_vars=None):
90
+ """
91
+ Unpivot the ScenarioList, allowing for id variables to be specified.
92
+
93
+ Parameters:
94
+ id_vars (list): Fields to use as identifier variables (kept in each entry)
95
+ value_vars (list): Fields to unpivot. If None, all fields not in id_vars will be used.
96
+
97
+ Example:
98
+ >>> s = ScenarioList([
99
+ ... Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}),
100
+ ... Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})
101
+ ... ])
102
+ >>> s.unpivot(id_vars=['id', 'year'], value_vars=['a', 'b'])
103
+ ScenarioList([Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}), Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}), Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}), Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})])
104
+ """
105
+ if id_vars is None:
106
+ id_vars = []
107
+ if value_vars is None:
108
+ value_vars = [field for field in self[0].keys() if field not in id_vars]
109
+
110
+ new_scenarios = []
111
+ for scenario in self:
112
+ for var in value_vars:
113
+ new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
114
+ new_scenario["variable"] = var
115
+ new_scenario["value"] = scenario[var]
116
+ new_scenarios.append(Scenario(new_scenario))
117
+
118
+ return ScenarioList(new_scenarios)
119
+
120
+ def pivot(self, id_vars, var_name="variable", value_name="value"):
121
+ """
122
+ Pivot the ScenarioList from long to wide format.
123
+
124
+ Parameters:
125
+ id_vars (list): Fields to use as identifier variables
126
+ var_name (str): Name of the variable column (default: 'variable')
127
+ value_name (str): Name of the value column (default: 'value')
128
+
129
+ Example:
130
+ >>> s = ScenarioList([
131
+ ... Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}),
132
+ ... Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}),
133
+ ... Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}),
134
+ ... Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})
135
+ ... ])
136
+ >>> s.pivot(id_vars=['id', 'year'])
137
+ ScenarioList([Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}), Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})])
138
+ """
139
+ pivoted_dict = {}
140
+
141
+ for scenario in self:
142
+ # Create a tuple of id values to use as a key
143
+ id_key = tuple(scenario[id_var] for id_var in id_vars)
144
+
145
+ # If this combination of id values hasn't been seen before, initialize it
146
+ if id_key not in pivoted_dict:
147
+ pivoted_dict[id_key] = {id_var: scenario[id_var] for id_var in id_vars}
148
+
149
+ # Add the variable-value pair to the dict
150
+ variable = scenario[var_name]
151
+ value = scenario[value_name]
152
+ pivoted_dict[id_key][variable] = value
153
+
154
+ # Convert the dict of dicts to a list of Scenarios
155
+ pivoted_scenarios = [
156
+ Scenario(dict(zip(id_vars, id_key), **values))
157
+ for id_key, values in pivoted_dict.items()
158
+ ]
159
+
160
+ return ScenarioList(pivoted_scenarios)
161
+
162
+ def group_by(self, id_vars, variables, func):
163
+ """
164
+ Group the ScenarioList by id_vars and apply a function to the specified variables.
165
+
166
+ Parameters:
167
+ id_vars (list): Fields to use as identifier variables for grouping
168
+ variables (list): Fields to pass to the aggregation function
169
+ func (callable): Function to apply to the grouped variables.
170
+ Should accept lists of values for each variable.
171
+
172
+ Returns:
173
+ ScenarioList: A new ScenarioList with the grouped and aggregated results
174
+
175
+ Example:
176
+ >>> def avg_sum(a, b):
177
+ ... return {'avg_a': sum(a) / len(a), 'sum_b': sum(b)}
178
+ >>> s = ScenarioList([
179
+ ... Scenario({'group': 'A', 'year': 2020, 'a': 10, 'b': 20}),
180
+ ... Scenario({'group': 'A', 'year': 2021, 'a': 15, 'b': 25}),
181
+ ... Scenario({'group': 'B', 'year': 2020, 'a': 12, 'b': 22}),
182
+ ... Scenario({'group': 'B', 'year': 2021, 'a': 17, 'b': 27})
183
+ ... ])
184
+ >>> s.group_by(id_vars=['group'], variables=['a', 'b'], func=avg_sum)
185
+ ScenarioList([Scenario({'group': 'A', 'avg_a': 12.5, 'sum_b': 45}), Scenario({'group': 'B', 'avg_a': 14.5, 'sum_b': 49})])
186
+ """
187
+ # Check if the function is compatible with the specified variables
188
+ func_params = inspect.signature(func).parameters
189
+ if len(func_params) != len(variables):
190
+ raise ValueError(
191
+ f"Function {func.__name__} expects {len(func_params)} arguments, but {len(variables)} variables were provided"
192
+ )
193
+
194
+ # Group the scenarios
195
+ grouped = defaultdict(lambda: defaultdict(list))
196
+ for scenario in self:
197
+ key = tuple(scenario[id_var] for id_var in id_vars)
198
+ for var in variables:
199
+ grouped[key][var].append(scenario[var])
200
+
201
+ # Apply the function to each group
202
+ result = []
203
+ for key, group in grouped.items():
204
+ try:
205
+ aggregated = func(*[group[var] for var in variables])
206
+ except Exception as e:
207
+ raise ValueError(f"Error applying function to group {key}: {str(e)}")
208
+
209
+ if not isinstance(aggregated, dict):
210
+ raise ValueError(f"Function {func.__name__} must return a dictionary")
211
+
212
+ new_scenario = dict(zip(id_vars, key))
213
+ new_scenario.update(aggregated)
214
+ result.append(Scenario(new_scenario))
215
+
216
+ return ScenarioList(result)
217
+
218
+ @property
219
+ def parameters(self) -> set:
220
+ """Return the set of parameters in the ScenarioList
221
+
222
+ Example:
223
+
224
+ >>> s = ScenarioList([Scenario({'a': 1}), Scenario({'b': 2})])
225
+ >>> s.parameters == {'a', 'b'}
226
+ True
227
+ """
228
+ if len(self) == 0:
229
+ return set()
230
+
231
+ return set.union(*[set(s.keys()) for s in self])
232
+
233
+ def __hash__(self) -> int:
234
+ """Return the hash of the ScenarioList.
235
+
236
+ >>> s = ScenarioList.example()
237
+ >>> hash(s)
238
+ 1262252885757976162
239
+ """
240
+ from edsl.utilities.utilities import dict_hash
241
+
242
+ return dict_hash(self._to_dict(sort=True))
243
+
244
+ def __repr__(self):
245
+ return f"ScenarioList({self.data})"
246
+
247
+ def __mul__(self, other: ScenarioList) -> ScenarioList:
248
+ """Takes the cross product of two ScenarioLists.
249
+
250
+ >>> s1 = ScenarioList.from_list("a", [1, 2])
251
+ >>> s2 = ScenarioList.from_list("b", [3, 4])
252
+ >>> s1 * s2
253
+ ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
254
+ """
255
+ from itertools import product
256
+
257
+ new_sl = []
258
+ for s1, s2 in list(product(self, other)):
259
+ new_sl.append(s1 + s2)
260
+ return ScenarioList(new_sl)
261
+
262
+ def times(self, other: ScenarioList) -> ScenarioList:
263
+ """Takes the cross product of two ScenarioLists.
264
+
265
+ Example:
266
+
267
+ >>> s1 = ScenarioList([Scenario({'a': 1}), Scenario({'a': 2})])
268
+ >>> s2 = ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
269
+ >>> s1.times(s2)
270
+ ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2}), Scenario({'a': 2, 'b': 1}), Scenario({'a': 2, 'b': 2})])
271
+ """
272
+ return self.__mul__(other)
273
+
274
+ def shuffle(self, seed: Optional[str] = "edsl") -> ScenarioList:
275
+ """Shuffle the ScenarioList.
276
+
277
+ >>> s = ScenarioList.from_list("a", [1,2,3,4])
278
+ >>> s.shuffle()
279
+ ScenarioList([Scenario({'a': 3}), Scenario({'a': 4}), Scenario({'a': 1}), Scenario({'a': 2})])
280
+ """
281
+ random.seed(seed)
282
+ random.shuffle(self.data)
283
+ return self
284
+
285
+ def _repr_html_(self) -> str:
286
+ from edsl.utilities.utilities import data_to_html
287
+
288
+ data = self.to_dict()
289
+ _ = data.pop("edsl_version")
290
+ _ = data.pop("edsl_class_name")
291
+ for s in data["scenarios"]:
292
+ _ = s.pop("edsl_version")
293
+ _ = s.pop("edsl_class_name")
294
+ for scenario in data["scenarios"]:
295
+ for key, value in scenario.items():
296
+ if hasattr(value, "to_dict"):
297
+ data[key] = value.to_dict()
298
+ return data_to_html(data)
299
+
300
+ def tally(self, field) -> dict:
301
+ """Return a tally of the values in the field.
302
+
303
+ Example:
304
+
305
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
306
+ >>> s.tally('b')
307
+ {1: 1, 2: 1}
308
+ """
309
+ return dict(Counter([scenario[field] for scenario in self]))
310
+
311
+ def sample(self, n: int, seed="edsl") -> ScenarioList:
312
+ """Return a random sample from the ScenarioList
313
+
314
+ >>> s = ScenarioList.from_list("a", [1,2,3,4,5,6])
315
+ >>> s.sample(3)
316
+ ScenarioList([Scenario({'a': 2}), Scenario({'a': 1}), Scenario({'a': 3})])
317
+ """
318
+
319
+ random.seed(seed)
320
+
321
+ return ScenarioList(random.sample(self.data, n))
322
+
323
+ def expand(self, expand_field: str, number_field=False) -> ScenarioList:
324
+ """Expand the ScenarioList by a field.
325
+
326
+ Example:
327
+
328
+ >>> s = ScenarioList( [ Scenario({'a':1, 'b':[1,2]}) ] )
329
+ >>> s.expand('b')
330
+ ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
331
+ """
332
+ new_scenarios = []
333
+ for scenario in self:
334
+ values = scenario[expand_field]
335
+ if not isinstance(values, Iterable) or isinstance(values, str):
336
+ values = [values]
337
+ for index, value in enumerate(values):
338
+ new_scenario = scenario.copy()
339
+ new_scenario[expand_field] = value
340
+ if number_field:
341
+ new_scenario[expand_field + "_number"] = index + 1
342
+ new_scenarios.append(new_scenario)
343
+ return ScenarioList(new_scenarios)
344
+
345
+ def concatenate(self, fields: List[str], separator: str = ";") -> "ScenarioList":
346
+ """Concatenate specified fields into a single field.
347
+
348
+ Args:
349
+ fields (List[str]): List of field names to concatenate.
350
+ separator (str, optional): Separator to use between field values. Defaults to ";".
351
+
352
+ Returns:
353
+ ScenarioList: A new ScenarioList with concatenated fields.
354
+
355
+ Example:
356
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
357
+ >>> s.concatenate(['a', 'b', 'c'])
358
+ ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
359
+ """
360
+ new_scenarios = []
361
+ for scenario in self:
362
+ new_scenario = scenario.copy()
363
+ concat_values = []
364
+ for field in fields:
365
+ if field in new_scenario:
366
+ concat_values.append(str(new_scenario[field]))
367
+ del new_scenario[field]
368
+
369
+ new_field_name = f"concat_{'_'.join(fields)}"
370
+ new_scenario[new_field_name] = separator.join(concat_values)
371
+ new_scenarios.append(new_scenario)
372
+
373
+ return ScenarioList(new_scenarios)
374
+
375
+ def unpack_dict(
376
+ self, field: str, prefix: Optional[str] = None, drop_field: bool = False
377
+ ) -> ScenarioList:
378
+ """Unpack a dictionary field into separate fields.
379
+
380
+ Example:
381
+
382
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}})])
383
+ >>> s.unpack_dict('b')
384
+ ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'c': 2, 'd': 3})])
385
+ """
386
+ new_scenarios = []
387
+ for scenario in self:
388
+ new_scenario = scenario.copy()
389
+ for key, value in scenario[field].items():
390
+ if prefix:
391
+ new_scenario[prefix + key] = value
392
+ else:
393
+ new_scenario[key] = value
394
+ if drop_field:
395
+ new_scenario.pop(field)
396
+ new_scenarios.append(new_scenario)
397
+ return ScenarioList(new_scenarios)
398
+
399
+ def transform(
400
+ self, field: str, func: Callable, new_name: Optional[str] = None
401
+ ) -> ScenarioList:
402
+ """Transform a field using a function."""
403
+ new_scenarios = []
404
+ for scenario in self:
405
+ new_scenario = scenario.copy()
406
+ new_scenario[new_name or field] = func(scenario[field])
407
+ new_scenarios.append(new_scenario)
408
+ return ScenarioList(new_scenarios)
409
+
410
+ def mutate(
411
+ self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None
412
+ ) -> ScenarioList:
413
+ """
414
+ Return a new ScenarioList with a new variable added.
415
+
416
+ Example:
417
+
418
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
419
+ >>> s.mutate("c = a + b")
420
+ ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 1, 'b': 1, 'c': 2})])
421
+
422
+ """
423
+ if "=" not in new_var_string:
424
+ raise Exception(
425
+ f"Mutate requires an '=' in the string, but '{new_var_string}' doesn't have one."
426
+ )
427
+ raw_var_name, expression = new_var_string.split("=", 1)
428
+ var_name = raw_var_name.strip()
429
+ from edsl.utilities.utilities import is_valid_variable_name
430
+
431
+ if not is_valid_variable_name(var_name):
432
+ raise Exception(f"{var_name} is not a valid variable name.")
433
+
434
+ # create the evaluator
435
+ functions_dict = functions_dict or {}
436
+
437
+ def create_evaluator(scenario) -> EvalWithCompoundTypes:
438
+ return EvalWithCompoundTypes(names=scenario, functions=functions_dict)
439
+
440
+ def new_scenario(old_scenario: Scenario, var_name: str) -> Scenario:
441
+ evaluator = create_evaluator(old_scenario)
442
+ value = evaluator.eval(expression)
443
+ new_s = old_scenario.copy()
444
+ new_s[var_name] = value
445
+ return new_s
446
+
447
+ try:
448
+ new_data = [new_scenario(s, var_name) for s in self]
449
+ except Exception as e:
450
+ raise Exception(f"Error in mutate. Exception:{e}")
451
+
452
+ return ScenarioList(new_data)
453
+
454
+ def order_by(self, *fields: str, reverse: bool = False) -> ScenarioList:
455
+ """Order the scenarios by one or more fields.
456
+
457
+ Example:
458
+
459
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
460
+ >>> s.order_by('b', 'a')
461
+ ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
462
+ """
463
+
464
+ def get_sort_key(scenario: Any) -> tuple:
465
+ return tuple(scenario[field] for field in fields)
466
+
467
+ return ScenarioList(sorted(self, key=get_sort_key, reverse=reverse))
468
+
469
+ def filter(self, expression: str) -> ScenarioList:
470
+ """
471
+ Filter a list of scenarios based on an expression.
472
+
473
+ Example:
474
+
475
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
476
+ >>> s.filter("b == 2")
477
+ ScenarioList([Scenario({'a': 1, 'b': 2})])
478
+ """
479
+
480
+ def create_evaluator(scenario: Scenario):
481
+ """Create an evaluator for the given result.
482
+ The 'combined_dict' is a mapping of all values for that Result object.
483
+ """
484
+ return EvalWithCompoundTypes(names=scenario)
485
+
486
+ try:
487
+ # iterates through all the results and evaluates the expression
488
+ new_data = [
489
+ scenario
490
+ for scenario in self.data
491
+ if create_evaluator(scenario).eval(expression)
492
+ ]
493
+ except Exception as e:
494
+ print(f"Exception:{e}")
495
+ raise Exception(f"Error in filter. Exception:{e}")
496
+
497
+ return ScenarioList(new_data)
498
+
499
+ def from_urls(
500
+ self, urls: list[str], field_name: Optional[str] = "text"
501
+ ) -> ScenarioList:
502
+ """Create a ScenarioList from a list of URLs.
503
+
504
+ :param urls: A list of URLs.
505
+ :param field_name: The name of the field to store the text from the URLs.
506
+
507
+
508
+ """
509
+ return ScenarioList([Scenario.from_url(url, field_name) for url in urls])
510
+
511
+ def select(self, *fields) -> ScenarioList:
512
+ """
513
+ Selects scenarios with only the references fields.
514
+
515
+ Example:
516
+
517
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
518
+ >>> s.select('a')
519
+ ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
520
+ """
521
+ if len(fields) == 1:
522
+ fields_to_select = [list(fields)[0]]
523
+ else:
524
+ fields_to_select = list(fields)
525
+
526
+ return ScenarioList(
527
+ [scenario.select(fields_to_select) for scenario in self.data]
528
+ )
529
+
530
+ def drop(self, *fields) -> ScenarioList:
531
+ """Drop fields from the scenarios.
532
+
533
+ Example:
534
+
535
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 1}), Scenario({'a': 1, 'b': 2})])
536
+ >>> s.drop('a')
537
+ ScenarioList([Scenario({'b': 1}), Scenario({'b': 2})])
538
+ """
539
+ return ScenarioList([scenario.drop(fields) for scenario in self.data])
540
+
541
+ @classmethod
542
+ def from_list(
543
+ cls, name: str, values: list, func: Optional[Callable] = None
544
+ ) -> ScenarioList:
545
+ """Create a ScenarioList from a list of values.
546
+
547
+ Example:
548
+
549
+ >>> ScenarioList.from_list('name', ['Alice', 'Bob'])
550
+ ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
551
+ """
552
+ if not func:
553
+ func = lambda x: x
554
+ return cls([Scenario({name: func(value)}) for value in values])
555
+
556
+ def to_dataset(self) -> "Dataset":
557
+ """
558
+ >>> s = ScenarioList.from_list("a", [1,2,3])
559
+ >>> s.to_dataset()
560
+ Dataset([{'a': [1, 2, 3]}])
561
+ >>> s = ScenarioList.from_list("a", [1,2,3]).add_list("b", [4,5,6])
562
+ >>> s.to_dataset()
563
+ Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
564
+ """
565
+ from edsl.results.Dataset import Dataset
566
+
567
+ keys = self[0].keys()
568
+ data = [{key: [scenario[key] for scenario in self.data]} for key in keys]
569
+ return Dataset(data)
570
+
571
+ def split(
572
+ self, field: str, split_on: str, index: int, new_name: Optional[str] = None
573
+ ) -> ScenarioList:
574
+ """Split a scenario fiel in multiple fields."""
575
+ if new_name is None:
576
+ new_name = field + "_split_" + str(index)
577
+ new_scenarios = []
578
+ for scenario in self:
579
+ new_scenario = scenario.copy()
580
+ new_scenario[new_name] = scenario[field].split(split_on)[index]
581
+ new_scenarios.append(new_scenario)
582
+ return ScenarioList(new_scenarios)
583
+
584
+ def add_list(self, name, values) -> ScenarioList:
585
+ """Add a list of values to a ScenarioList.
586
+
587
+ Example:
588
+
589
+ >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
590
+ >>> s.add_list('age', [30, 25])
591
+ ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
592
+ """
593
+ for i, value in enumerate(values):
594
+ if i < len(self):
595
+ self[i][name] = value
596
+ else:
597
+ self.append(Scenario({name: value}))
598
+ return self
599
+
600
+ def add_value(self, name: str, value: Any) -> ScenarioList:
601
+ """Add a value to all scenarios in a ScenarioList.
602
+
603
+ Example:
604
+
605
+ >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
606
+ >>> s.add_value('age', 30)
607
+ ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 30})])
608
+ """
609
+ for scenario in self:
610
+ scenario[name] = value
611
+ return self
612
+
613
+ def rename(self, replacement_dict: dict) -> ScenarioList:
614
+ """Rename the fields in the scenarios.
615
+
616
+ Example:
617
+
618
+ >>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
619
+ >>> s.rename({'name': 'first_name', 'age': 'years'})
620
+ ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
621
+
622
+ """
623
+
624
+ new_list = ScenarioList([])
625
+ for obj in self:
626
+ new_obj = obj.rename(replacement_dict)
627
+ new_list.append(new_obj)
628
+ return new_list
629
+
630
+ @classmethod
631
+ def from_sqlite(cls, filepath: str, table: str):
632
+ import sqlite3
633
+
634
+ with sqlite3.connect(filepath) as conn:
635
+ cursor = conn.cursor()
636
+ cursor.execute(f"SELECT * FROM {table}")
637
+ columns = [description[0] for description in cursor.description]
638
+ data = cursor.fetchall()
639
+ return cls([Scenario(dict(zip(columns, row))) for row in data])
640
+
641
+ @classmethod
642
+ def from_latex(cls, tex_file_path: str):
643
+ with open(tex_file_path, "r") as file:
644
+ lines = file.readlines()
645
+
646
+ processed_lines = []
647
+ non_blank_lines = [
648
+ (i, line.strip()) for i, line in enumerate(lines) if line.strip()
649
+ ]
650
+
651
+ for index, (line_no, text) in enumerate(non_blank_lines):
652
+ entry = {
653
+ "line_no": line_no + 1, # Using 1-based index for line numbers
654
+ "text": text,
655
+ "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
656
+ "line_after": (
657
+ non_blank_lines[index + 1][1]
658
+ if index < len(non_blank_lines) - 1
659
+ else None
660
+ ),
661
+ }
662
+ processed_lines.append(entry)
663
+
664
+ return ScenarioList([Scenario(entry) for entry in processed_lines])
665
+
666
+ @classmethod
667
+ def from_google_doc(cls, url: str) -> ScenarioList:
668
+ """Create a ScenarioList from a Google Doc.
669
+
670
+ This method downloads the Google Doc as a Word file (.docx), saves it to a temporary file,
671
+ and then reads it using the from_docx class method.
672
+
673
+ Args:
674
+ url (str): The URL to the Google Doc.
675
+
676
+ Returns:
677
+ ScenarioList: An instance of the ScenarioList class.
678
+
679
+ """
680
+ import tempfile
681
+ import requests
682
+ from docx import Document
683
+
684
+ if "/edit" in url:
685
+ doc_id = url.split("/d/")[1].split("/edit")[0]
686
+ else:
687
+ raise ValueError("Invalid Google Doc URL format.")
688
+
689
+ export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
690
+
691
+ # Download the Google Doc as a Word file (.docx)
692
+ response = requests.get(export_url)
693
+ response.raise_for_status() # Ensure the request was successful
694
+
695
+ # Save the Word file to a temporary file
696
+ with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
697
+ temp_file.write(response.content)
698
+ temp_filename = temp_file.name
699
+
700
+ # Call the from_docx class method with the temporary file
701
+ return cls.from_docx(temp_filename)
702
+
703
+ @classmethod
704
+ def from_pandas(cls, df) -> ScenarioList:
705
+ """Create a ScenarioList from a pandas DataFrame.
706
+
707
+ Example:
708
+
709
+ >>> import pandas as pd
710
+ >>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25], 'location': ['New York', 'Los Angeles']})
711
+ >>> ScenarioList.from_pandas(df)
712
+ ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
713
+ """
714
+ return cls([Scenario(row) for row in df.to_dict(orient="records")])
715
+
716
+ @classmethod
717
+ def from_wikipedia(cls, url: str, table_index: int = 0):
718
+ """
719
+ Extracts a table from a Wikipedia page.
720
+
721
+ Parameters:
722
+ url (str): The URL of the Wikipedia page.
723
+ table_index (int): The index of the table to extract (default is 0).
724
+
725
+ Returns:
726
+ pd.DataFrame: A DataFrame containing the extracted table.
727
+ # # Example usage
728
+ # url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
729
+ # df = from_wikipedia(url, 0)
730
+
731
+ # if not df.empty:
732
+ # print(df.head())
733
+ # else:
734
+ # print("Failed to extract table.")
735
+
736
+
737
+ """
738
+ import pandas as pd
739
+ import requests
740
+ from requests.exceptions import RequestException
741
+
742
+ try:
743
+ # Check if the URL is reachable
744
+ response = requests.get(url)
745
+ response.raise_for_status() # Raises HTTPError for bad responses
746
+
747
+ # Extract tables from the Wikipedia page
748
+ tables = pd.read_html(url)
749
+
750
+ # Ensure the requested table index is within the range of available tables
751
+ if table_index >= len(tables) or table_index < 0:
752
+ raise IndexError(
753
+ f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
754
+ )
755
+
756
+ # Return the requested table as a DataFrame
757
+ # return tables[table_index]
758
+ return cls.from_pandas(tables[table_index])
759
+
760
+ except RequestException as e:
761
+ print(f"Error fetching the URL: {e}")
762
+ except ValueError as e:
763
+ print(f"Error parsing tables: {e}")
764
+ except IndexError as e:
765
+ print(e)
766
+ except Exception as e:
767
+ print(f"An unexpected error occurred: {e}")
768
+
769
+ # Return an empty DataFrame in case of an error
770
+ # return cls.from_pandas(pd.DataFrame())
771
+
772
+ def to_key_value(self, field: str, value=None) -> Union[dict, set]:
773
+ """Return the set of values in the field.
774
+
775
+ Example:
776
+
777
+ >>> s = ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
778
+ >>> s.to_key_value('name') == {'Alice', 'Bob'}
779
+ True
780
+ """
781
+ if value is None:
782
+ return {scenario[field] for scenario in self}
783
+ else:
784
+ return {scenario[field]: scenario[value] for scenario in self}
785
+
786
+ @classmethod
787
+ def from_excel(
788
+ cls, filename: str, sheet_name: Optional[str] = None
789
+ ) -> ScenarioList:
790
+ """Create a ScenarioList from an Excel file.
791
+
792
+ If the Excel file contains multiple sheets and no sheet_name is provided,
793
+ the method will print the available sheets and require the user to specify one.
794
+
795
+ Example:
796
+
797
+ >>> import tempfile
798
+ >>> import os
799
+ >>> import pandas as pd
800
+ >>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
801
+ ... df1 = pd.DataFrame({
802
+ ... 'name': ['Alice', 'Bob'],
803
+ ... 'age': [30, 25],
804
+ ... 'location': ['New York', 'Los Angeles']
805
+ ... })
806
+ ... df2 = pd.DataFrame({
807
+ ... 'name': ['Charlie', 'David'],
808
+ ... 'age': [35, 40],
809
+ ... 'location': ['Chicago', 'Boston']
810
+ ... })
811
+ ... with pd.ExcelWriter(f.name) as writer:
812
+ ... df1.to_excel(writer, sheet_name='Sheet1', index=False)
813
+ ... df2.to_excel(writer, sheet_name='Sheet2', index=False)
814
+ ... temp_filename = f.name
815
+ >>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
816
+ >>> len(scenario_list)
817
+ 2
818
+ >>> scenario_list[0]['name']
819
+ 'Alice'
820
+ >>> scenario_list = ScenarioList.from_excel(temp_filename) # Should raise an error and list sheets
821
+ Traceback (most recent call last):
822
+ ...
823
+ ValueError: Please provide a sheet name to load data from.
824
+ """
825
+ from edsl.scenarios.Scenario import Scenario
826
+ import pandas as pd
827
+
828
+ # Get all sheets
829
+ all_sheets = pd.read_excel(filename, sheet_name=None)
830
+
831
+ # If no sheet_name is provided and there is more than one sheet, print available sheets
832
+ if sheet_name is None:
833
+ if len(all_sheets) > 1:
834
+ print("The Excel file contains multiple sheets:")
835
+ for name in all_sheets.keys():
836
+ print(f"- {name}")
837
+ raise ValueError("Please provide a sheet name to load data from.")
838
+ else:
839
+ # If there is only one sheet, use it
840
+ sheet_name = list(all_sheets.keys())[0]
841
+
842
+ # Load the specified or determined sheet
843
+ df = pd.read_excel(filename, sheet_name=sheet_name)
844
+
845
+ observations = []
846
+ for _, row in df.iterrows():
847
+ observations.append(Scenario(row.to_dict()))
848
+
849
+ return cls(observations)
850
+
851
+ @classmethod
852
+ def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
853
+ """Create a ScenarioList from a Google Sheet.
854
+
855
+ This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
856
+ and then reads it using the from_excel class method.
857
+
858
+ Args:
859
+ url (str): The URL to the Google Sheet.
860
+ sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
861
+ the same as from_excel regarding multiple sheets.
862
+
863
+ Returns:
864
+ ScenarioList: An instance of the ScenarioList class.
865
+
866
+ """
867
+ import pandas as pd
868
+ import tempfile
869
+ import requests
870
+
871
+ if "/edit" in url:
872
+ sheet_id = url.split("/d/")[1].split("/edit")[0]
873
+ else:
874
+ raise ValueError("Invalid Google Sheet URL format.")
875
+
876
+ export_url = (
877
+ f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
878
+ )
879
+
880
+ # Download the Google Sheet as an Excel file
881
+ response = requests.get(export_url)
882
+ response.raise_for_status() # Ensure the request was successful
883
+
884
+ # Save the Excel file to a temporary file
885
+ with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
886
+ temp_file.write(response.content)
887
+ temp_filename = temp_file.name
888
+
889
+ # Call the from_excel class method with the temporary file
890
+ return cls.from_excel(temp_filename, sheet_name=sheet_name)
891
+
892
+ @classmethod
893
+ def from_csv(cls, source: Union[str, urllib.parse.ParseResult]) -> ScenarioList:
894
+ """Create a ScenarioList from a CSV file or URL.
895
+
896
+ Args:
897
+ source: A string representing either a local file path or a URL to a CSV file,
898
+ or a urllib.parse.ParseResult object for a URL.
899
+
900
+ Returns:
901
+ ScenarioList: A ScenarioList object containing the data from the CSV.
902
+
903
+ Example:
904
+
905
+ >>> import tempfile
906
+ >>> import os
907
+ >>> with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv') as f:
908
+ ... _ = f.write("name,age,location\\nAlice,30,New York\\nBob,25,Los Angeles\\n")
909
+ ... temp_filename = f.name
910
+ >>> scenario_list = ScenarioList.from_csv(temp_filename)
911
+ >>> len(scenario_list)
912
+ 2
913
+ >>> scenario_list[0]['name']
914
+ 'Alice'
915
+ >>> scenario_list[1]['age']
916
+ '25'
917
+
918
+ >>> url = "https://example.com/data.csv"
919
+ >>> ## scenario_list_from_url = ScenarioList.from_csv(url)
920
+ """
921
+ from edsl.scenarios.Scenario import Scenario
922
+
923
+ def is_url(source):
924
+ try:
925
+ result = urllib.parse.urlparse(source)
926
+ return all([result.scheme, result.netloc])
927
+ except ValueError:
928
+ return False
929
+
930
+ if isinstance(source, str) and is_url(source):
931
+ with urllib.request.urlopen(source) as response:
932
+ csv_content = response.read().decode("utf-8")
933
+ csv_file = StringIO(csv_content)
934
+ elif isinstance(source, urllib.parse.ParseResult):
935
+ with urllib.request.urlopen(source.geturl()) as response:
936
+ csv_content = response.read().decode("utf-8")
937
+ csv_file = StringIO(csv_content)
938
+ else:
939
+ csv_file = open(source, "r")
940
+
941
+ try:
942
+ reader = csv.reader(csv_file)
943
+ header = next(reader)
944
+ observations = [Scenario(dict(zip(header, row))) for row in reader]
945
+ finally:
946
+ csv_file.close()
947
+
948
+ return cls(observations)
949
+
950
+ def _to_dict(self, sort=False) -> dict:
951
+ if sort:
952
+ data = sorted(self, key=lambda x: hash(x))
953
+ else:
954
+ data = self
955
+ return {"scenarios": [s._to_dict() for s in data]}
956
+
957
+ @add_edsl_version
958
+ def to_dict(self) -> dict[str, Any]:
959
+ """Return the `ScenarioList` as a dictionary.
960
+
961
+ Example:
962
+
963
+ >>> s = ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood-fired pizza'})])
964
+ >>> s.to_dict()
965
+ {'scenarios': [{'food': 'wood chips', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}, {'food': 'wood-fired pizza', 'edsl_version': '...', 'edsl_class_name': 'Scenario'}], 'edsl_version': '...', 'edsl_class_name': 'ScenarioList'}
966
+ """
967
+ return {"scenarios": [s.to_dict() for s in self]}
968
+
969
+ @classmethod
970
+ def gen(cls, scenario_dicts_list: List[dict]) -> ScenarioList:
971
+ """Create a `ScenarioList` from a list of dictionaries.
972
+
973
+ Example:
974
+
975
+ >>> ScenarioList.gen([{'name': 'Alice'}, {'name': 'Bob'}])
976
+ ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
977
+
978
+ """
979
+ from edsl.scenarios.Scenario import Scenario
980
+
981
+ return cls([Scenario(s) for s in scenario_dicts_list])
982
+
983
+ @classmethod
984
+ @remove_edsl_version
985
+ def from_dict(cls, data) -> ScenarioList:
986
+ """Create a `ScenarioList` from a dictionary."""
987
+ from edsl.scenarios.Scenario import Scenario
988
+
989
+ return cls([Scenario.from_dict(s) for s in data["scenarios"]])
990
+
991
+ @classmethod
992
+ def from_nested_dict(cls, data: dict) -> ScenarioList:
993
+ """Create a `ScenarioList` from a nested dictionary."""
994
+ from edsl.scenarios.Scenario import Scenario
995
+
996
+ s = ScenarioList()
997
+ for key, value in data.items():
998
+ s.add_list(key, value)
999
+ return s
1000
+
1001
+ def code(self) -> str:
1002
+ ## TODO: Refactor to only use the questions actually in the survey
1003
+ """Create the Python code representation of a survey."""
1004
+ header_lines = [
1005
+ "from edsl.scenarios.Scenario import Scenario",
1006
+ "from edsl.scenarios.ScenarioList import ScenarioList",
1007
+ ]
1008
+ lines = ["\n".join(header_lines)]
1009
+ names = []
1010
+ for index, scenario in enumerate(self):
1011
+ lines.append(f"scenario_{index} = " + repr(scenario))
1012
+ names.append(f"scenario_{index}")
1013
+ lines.append(f"scenarios = ScenarioList([{', '.join(names)}])")
1014
+ return lines
1015
+
1016
+ @classmethod
1017
+ def example(cls, randomize: bool = False) -> ScenarioList:
1018
+ """
1019
+ Return an example ScenarioList instance.
1020
+
1021
+ :params randomize: If True, use Scenario's randomize method to randomize the values.
1022
+ """
1023
+ return cls([Scenario.example(randomize), Scenario.example(randomize)])
1024
+
1025
+ def rich_print(self) -> None:
1026
+ """Display an object as a table."""
1027
+ from rich.table import Table
1028
+
1029
+ table = Table(title="ScenarioList")
1030
+ table.add_column("Index", style="bold")
1031
+ table.add_column("Scenario")
1032
+ for i, s in enumerate(self):
1033
+ table.add_row(str(i), s.rich_print())
1034
+ return table
1035
+
1036
+ def __getitem__(self, key: Union[int, slice]) -> Any:
1037
+ """Return the item at the given index.
1038
+
1039
+ Example:
1040
+ >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1041
+ >>> s[0]
1042
+ Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})
1043
+
1044
+ >>> s[:1]
1045
+ ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1046
+
1047
+ """
1048
+ if isinstance(key, slice):
1049
+ return ScenarioList(super().__getitem__(key))
1050
+ elif isinstance(key, int):
1051
+ return super().__getitem__(key)
1052
+ else:
1053
+ return self.to_dict()[key]
1054
+
1055
+ def to_agent_list(self):
1056
+ """Convert the ScenarioList to an AgentList.
1057
+
1058
+ Example:
1059
+
1060
+ >>> s = ScenarioList([Scenario({'age': 22, 'hair': 'brown', 'height': 5.5}), Scenario({'age': 22, 'hair': 'brown', 'height': 5.5})])
1061
+ >>> s.to_agent_list()
1062
+ AgentList([Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5}), Agent(traits = {'age': 22, 'hair': 'brown', 'height': 5.5})])
1063
+ """
1064
+ from edsl.agents.AgentList import AgentList
1065
+ from edsl.agents.Agent import Agent
1066
+
1067
+ return AgentList([Agent(traits=s.data) for s in self])
1068
+
1069
+ def chunk(
1070
+ self,
1071
+ field,
1072
+ num_words: Optional[int] = None,
1073
+ num_lines: Optional[int] = None,
1074
+ include_original=False,
1075
+ hash_original=False,
1076
+ ) -> "ScenarioList":
1077
+ """Chunk the scenarios based on a field.
1078
+
1079
+ Example:
1080
+
1081
+ >>> s = ScenarioList([Scenario({'text': 'The quick brown fox jumps over the lazy dog.'})])
1082
+ >>> s.chunk('text', num_words=3)
1083
+ ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0}), Scenario({'text': 'fox jumps over', 'text_chunk': 1}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2})])
1084
+ """
1085
+ new_scenarios = []
1086
+ for scenario in self:
1087
+ replacement_scenarios = scenario.chunk(
1088
+ field,
1089
+ num_words=num_words,
1090
+ num_lines=num_lines,
1091
+ include_original=include_original,
1092
+ hash_original=hash_original,
1093
+ )
1094
+ new_scenarios.extend(replacement_scenarios)
1095
+ return ScenarioList(new_scenarios)
1096
+
1097
+
1098
+ if __name__ == "__main__":
1099
+ import doctest
1100
+
1101
+ doctest.testmod(optionflags=doctest.ELLIPSIS)