edsl 0.1.31.dev4__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. edsl/Base.py +9 -3
  2. edsl/TemplateLoader.py +24 -0
  3. edsl/__init__.py +8 -3
  4. edsl/__version__.py +1 -1
  5. edsl/agents/Agent.py +40 -8
  6. edsl/agents/AgentList.py +43 -0
  7. edsl/agents/Invigilator.py +136 -221
  8. edsl/agents/InvigilatorBase.py +148 -59
  9. edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +154 -85
  10. edsl/agents/__init__.py +1 -0
  11. edsl/auto/AutoStudy.py +117 -0
  12. edsl/auto/StageBase.py +230 -0
  13. edsl/auto/StageGenerateSurvey.py +178 -0
  14. edsl/auto/StageLabelQuestions.py +125 -0
  15. edsl/auto/StagePersona.py +61 -0
  16. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  17. edsl/auto/StagePersonaDimensionValues.py +74 -0
  18. edsl/auto/StagePersonaDimensions.py +69 -0
  19. edsl/auto/StageQuestions.py +73 -0
  20. edsl/auto/SurveyCreatorPipeline.py +21 -0
  21. edsl/auto/utilities.py +224 -0
  22. edsl/config.py +48 -47
  23. edsl/conjure/Conjure.py +6 -0
  24. edsl/coop/PriceFetcher.py +58 -0
  25. edsl/coop/coop.py +50 -7
  26. edsl/data/Cache.py +35 -1
  27. edsl/data/CacheHandler.py +3 -4
  28. edsl/data_transfer_models.py +73 -38
  29. edsl/enums.py +8 -0
  30. edsl/exceptions/general.py +10 -8
  31. edsl/exceptions/language_models.py +25 -1
  32. edsl/exceptions/questions.py +62 -5
  33. edsl/exceptions/results.py +4 -0
  34. edsl/inference_services/AnthropicService.py +13 -11
  35. edsl/inference_services/AwsBedrock.py +112 -0
  36. edsl/inference_services/AzureAI.py +214 -0
  37. edsl/inference_services/DeepInfraService.py +4 -3
  38. edsl/inference_services/GoogleService.py +16 -12
  39. edsl/inference_services/GroqService.py +5 -4
  40. edsl/inference_services/InferenceServiceABC.py +58 -3
  41. edsl/inference_services/InferenceServicesCollection.py +13 -8
  42. edsl/inference_services/MistralAIService.py +120 -0
  43. edsl/inference_services/OllamaService.py +18 -0
  44. edsl/inference_services/OpenAIService.py +55 -56
  45. edsl/inference_services/TestService.py +80 -0
  46. edsl/inference_services/TogetherAIService.py +170 -0
  47. edsl/inference_services/models_available_cache.py +25 -0
  48. edsl/inference_services/registry.py +19 -1
  49. edsl/jobs/Answers.py +10 -12
  50. edsl/jobs/FailedQuestion.py +78 -0
  51. edsl/jobs/Jobs.py +137 -41
  52. edsl/jobs/buckets/BucketCollection.py +24 -15
  53. edsl/jobs/buckets/TokenBucket.py +105 -18
  54. edsl/jobs/interviews/Interview.py +393 -83
  55. edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +22 -18
  56. edsl/jobs/interviews/InterviewExceptionEntry.py +167 -0
  57. edsl/jobs/runners/JobsRunnerAsyncio.py +152 -160
  58. edsl/jobs/runners/JobsRunnerStatus.py +331 -0
  59. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  60. edsl/jobs/tasks/TaskCreators.py +1 -1
  61. edsl/jobs/tasks/TaskHistory.py +205 -126
  62. edsl/language_models/LanguageModel.py +297 -177
  63. edsl/language_models/ModelList.py +2 -2
  64. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  65. edsl/language_models/fake_openai_call.py +15 -0
  66. edsl/language_models/fake_openai_service.py +61 -0
  67. edsl/language_models/registry.py +25 -8
  68. edsl/language_models/repair.py +0 -19
  69. edsl/language_models/utilities.py +61 -0
  70. edsl/notebooks/Notebook.py +20 -2
  71. edsl/prompts/Prompt.py +52 -2
  72. edsl/questions/AnswerValidatorMixin.py +23 -26
  73. edsl/questions/QuestionBase.py +330 -249
  74. edsl/questions/QuestionBaseGenMixin.py +133 -0
  75. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  76. edsl/questions/QuestionBudget.py +99 -42
  77. edsl/questions/QuestionCheckBox.py +227 -36
  78. edsl/questions/QuestionExtract.py +98 -28
  79. edsl/questions/QuestionFreeText.py +47 -31
  80. edsl/questions/QuestionFunctional.py +7 -0
  81. edsl/questions/QuestionList.py +141 -23
  82. edsl/questions/QuestionMultipleChoice.py +159 -66
  83. edsl/questions/QuestionNumerical.py +88 -47
  84. edsl/questions/QuestionRank.py +182 -25
  85. edsl/questions/Quick.py +41 -0
  86. edsl/questions/RegisterQuestionsMeta.py +31 -12
  87. edsl/questions/ResponseValidatorABC.py +170 -0
  88. edsl/questions/__init__.py +3 -4
  89. edsl/questions/decorators.py +21 -0
  90. edsl/questions/derived/QuestionLikertFive.py +10 -5
  91. edsl/questions/derived/QuestionLinearScale.py +15 -2
  92. edsl/questions/derived/QuestionTopK.py +10 -1
  93. edsl/questions/derived/QuestionYesNo.py +24 -3
  94. edsl/questions/descriptors.py +43 -7
  95. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  96. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  97. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  98. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  99. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  100. edsl/questions/prompt_templates/question_list.jinja +17 -0
  101. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  102. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  103. edsl/questions/question_registry.py +6 -2
  104. edsl/questions/templates/__init__.py +0 -0
  105. edsl/questions/templates/budget/__init__.py +0 -0
  106. edsl/questions/templates/budget/answering_instructions.jinja +7 -0
  107. edsl/questions/templates/budget/question_presentation.jinja +7 -0
  108. edsl/questions/templates/checkbox/__init__.py +0 -0
  109. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  110. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  111. edsl/questions/templates/extract/__init__.py +0 -0
  112. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  113. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  114. edsl/questions/templates/free_text/__init__.py +0 -0
  115. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  116. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  117. edsl/questions/templates/likert_five/__init__.py +0 -0
  118. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  119. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  120. edsl/questions/templates/linear_scale/__init__.py +0 -0
  121. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  122. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  123. edsl/questions/templates/list/__init__.py +0 -0
  124. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  125. edsl/questions/templates/list/question_presentation.jinja +5 -0
  126. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  127. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  128. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  129. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  130. edsl/questions/templates/numerical/__init__.py +0 -0
  131. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  132. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  133. edsl/questions/templates/rank/__init__.py +0 -0
  134. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  135. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  136. edsl/questions/templates/top_k/__init__.py +0 -0
  137. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  138. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  139. edsl/questions/templates/yes_no/__init__.py +0 -0
  140. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  141. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  142. edsl/results/Dataset.py +20 -0
  143. edsl/results/DatasetExportMixin.py +58 -30
  144. edsl/results/DatasetTree.py +145 -0
  145. edsl/results/Result.py +32 -5
  146. edsl/results/Results.py +135 -46
  147. edsl/results/ResultsDBMixin.py +3 -3
  148. edsl/results/Selector.py +118 -0
  149. edsl/results/tree_explore.py +115 -0
  150. edsl/scenarios/FileStore.py +71 -10
  151. edsl/scenarios/Scenario.py +109 -24
  152. edsl/scenarios/ScenarioImageMixin.py +2 -2
  153. edsl/scenarios/ScenarioList.py +546 -21
  154. edsl/scenarios/ScenarioListExportMixin.py +24 -4
  155. edsl/scenarios/ScenarioListPdfMixin.py +153 -4
  156. edsl/study/SnapShot.py +8 -1
  157. edsl/study/Study.py +32 -0
  158. edsl/surveys/Rule.py +15 -3
  159. edsl/surveys/RuleCollection.py +21 -5
  160. edsl/surveys/Survey.py +707 -298
  161. edsl/surveys/SurveyExportMixin.py +71 -9
  162. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  163. edsl/surveys/SurveyQualtricsImport.py +284 -0
  164. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  165. edsl/surveys/instructions/Instruction.py +34 -0
  166. edsl/surveys/instructions/InstructionCollection.py +77 -0
  167. edsl/surveys/instructions/__init__.py +0 -0
  168. edsl/templates/error_reporting/base.html +24 -0
  169. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  170. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  171. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  172. edsl/templates/error_reporting/interview_details.html +116 -0
  173. edsl/templates/error_reporting/interviews.html +10 -0
  174. edsl/templates/error_reporting/overview.html +5 -0
  175. edsl/templates/error_reporting/performance_plot.html +2 -0
  176. edsl/templates/error_reporting/report.css +74 -0
  177. edsl/templates/error_reporting/report.html +118 -0
  178. edsl/templates/error_reporting/report.js +25 -0
  179. edsl/utilities/utilities.py +40 -1
  180. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/METADATA +8 -2
  181. edsl-0.1.33.dist-info/RECORD +295 -0
  182. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -271
  183. edsl/jobs/interviews/retry_management.py +0 -37
  184. edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -303
  185. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  186. edsl-0.1.31.dev4.dist-info/RECORD +0 -204
  187. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/LICENSE +0 -0
  188. {edsl-0.1.31.dev4.dist-info → edsl-0.1.33.dist-info}/WHEEL +0 -0
@@ -6,6 +6,11 @@ import csv
6
6
  import random
7
7
  from collections import UserList, Counter
8
8
  from collections.abc import Iterable
9
+ import urllib.parse
10
+ import urllib.request
11
+ from io import StringIO
12
+ from collections import defaultdict
13
+ import inspect
9
14
 
10
15
  from simpleeval import EvalWithCompoundTypes
11
16
 
@@ -15,6 +20,9 @@ from edsl.scenarios.Scenario import Scenario
15
20
  from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
16
21
  from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
17
22
 
23
+ from edsl.conjure.naming_utilities import sanitize_string
24
+ from edsl.utilities.utilities import is_valid_variable_name
25
+
18
26
 
19
27
  class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
20
28
  pass
@@ -23,12 +31,180 @@ class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
23
31
  class ScenarioList(Base, UserList, ScenarioListMixin):
24
32
  """Class for creating a list of scenarios to be used in a survey."""
25
33
 
26
- def __init__(self, data: Optional[list] = None):
34
+ def __init__(self, data: Optional[list] = None, codebook: Optional[dict] = None):
27
35
  """Initialize the ScenarioList class."""
28
36
  if data is not None:
29
37
  super().__init__(data)
30
38
  else:
31
39
  super().__init__([])
40
+ self.codebook = codebook or {}
41
+
42
+ @property
43
+ def has_jinja_braces(self) -> bool:
44
+ """Check if the ScenarioList has Jinja braces."""
45
+ return any([scenario.has_jinja_braces for scenario in self])
46
+
47
+ def convert_jinja_braces(self) -> ScenarioList:
48
+ """Convert Jinja braces to Python braces."""
49
+ return ScenarioList([scenario.convert_jinja_braces() for scenario in self])
50
+
51
+ def give_valid_names(self) -> ScenarioList:
52
+ """Give valid names to the scenario keys.
53
+
54
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
55
+ >>> s.give_valid_names()
56
+ ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
57
+ >>> s = ScenarioList([Scenario({'are you there John?': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
58
+ >>> s.give_valid_names()
59
+ ScenarioList([Scenario({'john': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
60
+ """
61
+ codebook = {}
62
+ new_scenaerios = []
63
+ for scenario in self:
64
+ new_scenario = {}
65
+ for key in scenario:
66
+ if not is_valid_variable_name(key):
67
+ if key in codebook:
68
+ new_key = codebook[key]
69
+ else:
70
+ new_key = sanitize_string(key)
71
+ if not is_valid_variable_name(new_key):
72
+ new_key = f"var_{len(codebook)}"
73
+ codebook[key] = new_key
74
+ new_scenario[new_key] = scenario[key]
75
+ else:
76
+ new_scenario[key] = scenario[key]
77
+ new_scenaerios.append(Scenario(new_scenario))
78
+ return ScenarioList(new_scenaerios, codebook)
79
+
80
+ def unpivot(self, id_vars=None, value_vars=None):
81
+ """
82
+ Unpivot the ScenarioList, allowing for id variables to be specified.
83
+
84
+ Parameters:
85
+ id_vars (list): Fields to use as identifier variables (kept in each entry)
86
+ value_vars (list): Fields to unpivot. If None, all fields not in id_vars will be used.
87
+
88
+ Example:
89
+ >>> s = ScenarioList([
90
+ ... Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}),
91
+ ... Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})
92
+ ... ])
93
+ >>> s.unpivot(id_vars=['id', 'year'], value_vars=['a', 'b'])
94
+ ScenarioList([Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}), Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}), Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}), Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})])
95
+ """
96
+ if id_vars is None:
97
+ id_vars = []
98
+ if value_vars is None:
99
+ value_vars = [field for field in self[0].keys() if field not in id_vars]
100
+
101
+ new_scenarios = []
102
+ for scenario in self:
103
+ for var in value_vars:
104
+ new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
105
+ new_scenario["variable"] = var
106
+ new_scenario["value"] = scenario[var]
107
+ new_scenarios.append(Scenario(new_scenario))
108
+
109
+ return ScenarioList(new_scenarios)
110
+
111
+ def pivot(self, id_vars, var_name="variable", value_name="value"):
112
+ """
113
+ Pivot the ScenarioList from long to wide format.
114
+
115
+ Parameters:
116
+ id_vars (list): Fields to use as identifier variables
117
+ var_name (str): Name of the variable column (default: 'variable')
118
+ value_name (str): Name of the value column (default: 'value')
119
+
120
+ Example:
121
+ >>> s = ScenarioList([
122
+ ... Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}),
123
+ ... Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}),
124
+ ... Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}),
125
+ ... Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})
126
+ ... ])
127
+ >>> s.pivot(id_vars=['id', 'year'])
128
+ ScenarioList([Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}), Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})])
129
+ """
130
+ pivoted_dict = {}
131
+
132
+ for scenario in self:
133
+ # Create a tuple of id values to use as a key
134
+ id_key = tuple(scenario[id_var] for id_var in id_vars)
135
+
136
+ # If this combination of id values hasn't been seen before, initialize it
137
+ if id_key not in pivoted_dict:
138
+ pivoted_dict[id_key] = {id_var: scenario[id_var] for id_var in id_vars}
139
+
140
+ # Add the variable-value pair to the dict
141
+ variable = scenario[var_name]
142
+ value = scenario[value_name]
143
+ pivoted_dict[id_key][variable] = value
144
+
145
+ # Convert the dict of dicts to a list of Scenarios
146
+ pivoted_scenarios = [
147
+ Scenario(dict(zip(id_vars, id_key), **values))
148
+ for id_key, values in pivoted_dict.items()
149
+ ]
150
+
151
+ return ScenarioList(pivoted_scenarios)
152
+
153
+ def group_by(self, id_vars, variables, func):
154
+ """
155
+ Group the ScenarioList by id_vars and apply a function to the specified variables.
156
+
157
+ Parameters:
158
+ id_vars (list): Fields to use as identifier variables for grouping
159
+ variables (list): Fields to pass to the aggregation function
160
+ func (callable): Function to apply to the grouped variables.
161
+ Should accept lists of values for each variable.
162
+
163
+ Returns:
164
+ ScenarioList: A new ScenarioList with the grouped and aggregated results
165
+
166
+ Example:
167
+ >>> def avg_sum(a, b):
168
+ ... return {'avg_a': sum(a) / len(a), 'sum_b': sum(b)}
169
+ >>> s = ScenarioList([
170
+ ... Scenario({'group': 'A', 'year': 2020, 'a': 10, 'b': 20}),
171
+ ... Scenario({'group': 'A', 'year': 2021, 'a': 15, 'b': 25}),
172
+ ... Scenario({'group': 'B', 'year': 2020, 'a': 12, 'b': 22}),
173
+ ... Scenario({'group': 'B', 'year': 2021, 'a': 17, 'b': 27})
174
+ ... ])
175
+ >>> s.group_by(id_vars=['group'], variables=['a', 'b'], func=avg_sum)
176
+ ScenarioList([Scenario({'group': 'A', 'avg_a': 12.5, 'sum_b': 45}), Scenario({'group': 'B', 'avg_a': 14.5, 'sum_b': 49})])
177
+ """
178
+ # Check if the function is compatible with the specified variables
179
+ func_params = inspect.signature(func).parameters
180
+ if len(func_params) != len(variables):
181
+ raise ValueError(
182
+ f"Function {func.__name__} expects {len(func_params)} arguments, but {len(variables)} variables were provided"
183
+ )
184
+
185
+ # Group the scenarios
186
+ grouped = defaultdict(lambda: defaultdict(list))
187
+ for scenario in self:
188
+ key = tuple(scenario[id_var] for id_var in id_vars)
189
+ for var in variables:
190
+ grouped[key][var].append(scenario[var])
191
+
192
+ # Apply the function to each group
193
+ result = []
194
+ for key, group in grouped.items():
195
+ try:
196
+ aggregated = func(*[group[var] for var in variables])
197
+ except Exception as e:
198
+ raise ValueError(f"Error applying function to group {key}: {str(e)}")
199
+
200
+ if not isinstance(aggregated, dict):
201
+ raise ValueError(f"Function {func.__name__} must return a dictionary")
202
+
203
+ new_scenario = dict(zip(id_vars, key))
204
+ new_scenario.update(aggregated)
205
+ result.append(Scenario(new_scenario))
206
+
207
+ return ScenarioList(result)
32
208
 
33
209
  @property
34
210
  def parameters(self) -> set:
@@ -106,6 +282,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
106
282
  for s in data["scenarios"]:
107
283
  _ = s.pop("edsl_version")
108
284
  _ = s.pop("edsl_class_name")
285
+ for scenario in data["scenarios"]:
286
+ for key, value in scenario.items():
287
+ if hasattr(value, "to_dict"):
288
+ data[key] = value.to_dict()
109
289
  return data_to_html(data)
110
290
 
111
291
  def tally(self, field) -> dict:
@@ -153,6 +333,71 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
153
333
  new_scenarios.append(new_scenario)
154
334
  return ScenarioList(new_scenarios)
155
335
 
336
+ def concatenate(self, fields: List[str], separator: str = ";") -> "ScenarioList":
337
+ """Concatenate specified fields into a single field.
338
+
339
+ Args:
340
+ fields (List[str]): List of field names to concatenate.
341
+ separator (str, optional): Separator to use between field values. Defaults to ";".
342
+
343
+ Returns:
344
+ ScenarioList: A new ScenarioList with concatenated fields.
345
+
346
+ Example:
347
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
348
+ >>> s.concatenate(['a', 'b', 'c'])
349
+ ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
350
+ """
351
+ new_scenarios = []
352
+ for scenario in self:
353
+ new_scenario = scenario.copy()
354
+ concat_values = []
355
+ for field in fields:
356
+ if field in new_scenario:
357
+ concat_values.append(str(new_scenario[field]))
358
+ del new_scenario[field]
359
+
360
+ new_field_name = f"concat_{'_'.join(fields)}"
361
+ new_scenario[new_field_name] = separator.join(concat_values)
362
+ new_scenarios.append(new_scenario)
363
+
364
+ return ScenarioList(new_scenarios)
365
+
366
+ def unpack_dict(
367
+ self, field: str, prefix: Optional[str] = None, drop_field: bool = False
368
+ ) -> ScenarioList:
369
+ """Unpack a dictionary field into separate fields.
370
+
371
+ Example:
372
+
373
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}})])
374
+ >>> s.unpack_dict('b')
375
+ ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'c': 2, 'd': 3})])
376
+ """
377
+ new_scenarios = []
378
+ for scenario in self:
379
+ new_scenario = scenario.copy()
380
+ for key, value in scenario[field].items():
381
+ if prefix:
382
+ new_scenario[prefix + key] = value
383
+ else:
384
+ new_scenario[key] = value
385
+ if drop_field:
386
+ new_scenario.pop(field)
387
+ new_scenarios.append(new_scenario)
388
+ return ScenarioList(new_scenarios)
389
+
390
+ def transform(
391
+ self, field: str, func: Callable, new_name: Optional[str] = None
392
+ ) -> ScenarioList:
393
+ """Transform a field using a function."""
394
+ new_scenarios = []
395
+ for scenario in self:
396
+ new_scenario = scenario.copy()
397
+ new_scenario[new_name or field] = func(scenario[field])
398
+ new_scenarios.append(new_scenario)
399
+ return ScenarioList(new_scenarios)
400
+
156
401
  def mutate(
157
402
  self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None
158
403
  ) -> ScenarioList:
@@ -242,6 +487,18 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
242
487
 
243
488
  return ScenarioList(new_data)
244
489
 
490
+ def from_urls(
491
+ self, urls: list[str], field_name: Optional[str] = "text"
492
+ ) -> ScenarioList:
493
+ """Create a ScenarioList from a list of URLs.
494
+
495
+ :param urls: A list of URLs.
496
+ :param field_name: The name of the field to store the text from the URLs.
497
+
498
+
499
+ """
500
+ return ScenarioList([Scenario.from_url(url, field_name) for url in urls])
501
+
245
502
  def select(self, *fields) -> ScenarioList:
246
503
  """
247
504
  Selects scenarios with only the references fields.
@@ -288,12 +545,28 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
288
545
  >>> s = ScenarioList.from_list("a", [1,2,3])
289
546
  >>> s.to_dataset()
290
547
  Dataset([{'a': [1, 2, 3]}])
548
+ >>> s = ScenarioList.from_list("a", [1,2,3]).add_list("b", [4,5,6])
549
+ >>> s.to_dataset()
550
+ Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
291
551
  """
292
552
  from edsl.results.Dataset import Dataset
293
553
 
294
554
  keys = self[0].keys()
295
- data = {key: [scenario[key] for scenario in self.data] for key in keys}
296
- return Dataset([data])
555
+ data = [{key: [scenario[key] for scenario in self.data]} for key in keys]
556
+ return Dataset(data)
557
+
558
+ def split(
559
+ self, field: str, split_on: str, index: int, new_name: Optional[str] = None
560
+ ) -> ScenarioList:
561
+ """Split a scenario fiel in multiple fields."""
562
+ if new_name is None:
563
+ new_name = field + "_split_" + str(index)
564
+ new_scenarios = []
565
+ for scenario in self:
566
+ new_scenario = scenario.copy()
567
+ new_scenario[new_name] = scenario[field].split(split_on)[index]
568
+ new_scenarios.append(new_scenario)
569
+ return ScenarioList(new_scenarios)
297
570
 
298
571
  def add_list(self, name, values) -> ScenarioList:
299
572
  """Add a list of values to a ScenarioList.
@@ -352,6 +625,68 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
352
625
  data = cursor.fetchall()
353
626
  return cls([Scenario(dict(zip(columns, row))) for row in data])
354
627
 
628
+ @classmethod
629
+ def from_latex(cls, tex_file_path: str):
630
+ with open(tex_file_path, "r") as file:
631
+ lines = file.readlines()
632
+
633
+ processed_lines = []
634
+ non_blank_lines = [
635
+ (i, line.strip()) for i, line in enumerate(lines) if line.strip()
636
+ ]
637
+
638
+ for index, (line_no, text) in enumerate(non_blank_lines):
639
+ entry = {
640
+ "line_no": line_no + 1, # Using 1-based index for line numbers
641
+ "text": text,
642
+ "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
643
+ "line_after": (
644
+ non_blank_lines[index + 1][1]
645
+ if index < len(non_blank_lines) - 1
646
+ else None
647
+ ),
648
+ }
649
+ processed_lines.append(entry)
650
+
651
+ return ScenarioList([Scenario(entry) for entry in processed_lines])
652
+
653
+ @classmethod
654
+ def from_google_doc(cls, url: str) -> ScenarioList:
655
+ """Create a ScenarioList from a Google Doc.
656
+
657
+ This method downloads the Google Doc as a Word file (.docx), saves it to a temporary file,
658
+ and then reads it using the from_docx class method.
659
+
660
+ Args:
661
+ url (str): The URL to the Google Doc.
662
+
663
+ Returns:
664
+ ScenarioList: An instance of the ScenarioList class.
665
+
666
+ """
667
+ import tempfile
668
+ import requests
669
+ from docx import Document
670
+
671
+ if "/edit" in url:
672
+ doc_id = url.split("/d/")[1].split("/edit")[0]
673
+ else:
674
+ raise ValueError("Invalid Google Doc URL format.")
675
+
676
+ export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
677
+
678
+ # Download the Google Doc as a Word file (.docx)
679
+ response = requests.get(export_url)
680
+ response.raise_for_status() # Ensure the request was successful
681
+
682
+ # Save the Word file to a temporary file
683
+ with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
684
+ temp_file.write(response.content)
685
+ temp_filename = temp_file.name
686
+
687
+ # Call the from_docx class method with the temporary file
688
+ return cls.from_docx(temp_filename)
689
+
355
690
  @classmethod
356
691
  def from_pandas(cls, df) -> ScenarioList:
357
692
  """Create a ScenarioList from a pandas DataFrame.
@@ -365,6 +700,62 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
365
700
  """
366
701
  return cls([Scenario(row) for row in df.to_dict(orient="records")])
367
702
 
703
+ @classmethod
704
+ def from_wikipedia(cls, url: str, table_index: int = 0):
705
+ """
706
+ Extracts a table from a Wikipedia page.
707
+
708
+ Parameters:
709
+ url (str): The URL of the Wikipedia page.
710
+ table_index (int): The index of the table to extract (default is 0).
711
+
712
+ Returns:
713
+ pd.DataFrame: A DataFrame containing the extracted table.
714
+ # # Example usage
715
+ # url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
716
+ # df = from_wikipedia(url, 0)
717
+
718
+ # if not df.empty:
719
+ # print(df.head())
720
+ # else:
721
+ # print("Failed to extract table.")
722
+
723
+
724
+ """
725
+ import pandas as pd
726
+ import requests
727
+ from requests.exceptions import RequestException
728
+
729
+ try:
730
+ # Check if the URL is reachable
731
+ response = requests.get(url)
732
+ response.raise_for_status() # Raises HTTPError for bad responses
733
+
734
+ # Extract tables from the Wikipedia page
735
+ tables = pd.read_html(url)
736
+
737
+ # Ensure the requested table index is within the range of available tables
738
+ if table_index >= len(tables) or table_index < 0:
739
+ raise IndexError(
740
+ f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
741
+ )
742
+
743
+ # Return the requested table as a DataFrame
744
+ # return tables[table_index]
745
+ return cls.from_pandas(tables[table_index])
746
+
747
+ except RequestException as e:
748
+ print(f"Error fetching the URL: {e}")
749
+ except ValueError as e:
750
+ print(f"Error parsing tables: {e}")
751
+ except IndexError as e:
752
+ print(e)
753
+ except Exception as e:
754
+ print(f"An unexpected error occurred: {e}")
755
+
756
+ # Return an empty DataFrame in case of an error
757
+ # return cls.from_pandas(pd.DataFrame())
758
+
368
759
  def to_key_value(self, field: str, value=None) -> Union[dict, set]:
369
760
  """Return the set of values in the field.
370
761
 
@@ -380,8 +771,121 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
380
771
  return {scenario[field]: scenario[value] for scenario in self}
381
772
 
382
773
  @classmethod
383
- def from_csv(cls, filename: str) -> ScenarioList:
384
- """Create a ScenarioList from a CSV file.
774
+ def from_excel(
775
+ cls, filename: str, sheet_name: Optional[str] = None
776
+ ) -> ScenarioList:
777
+ """Create a ScenarioList from an Excel file.
778
+
779
+ If the Excel file contains multiple sheets and no sheet_name is provided,
780
+ the method will print the available sheets and require the user to specify one.
781
+
782
+ Example:
783
+
784
+ >>> import tempfile
785
+ >>> import os
786
+ >>> import pandas as pd
787
+ >>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
788
+ ... df1 = pd.DataFrame({
789
+ ... 'name': ['Alice', 'Bob'],
790
+ ... 'age': [30, 25],
791
+ ... 'location': ['New York', 'Los Angeles']
792
+ ... })
793
+ ... df2 = pd.DataFrame({
794
+ ... 'name': ['Charlie', 'David'],
795
+ ... 'age': [35, 40],
796
+ ... 'location': ['Chicago', 'Boston']
797
+ ... })
798
+ ... with pd.ExcelWriter(f.name) as writer:
799
+ ... df1.to_excel(writer, sheet_name='Sheet1', index=False)
800
+ ... df2.to_excel(writer, sheet_name='Sheet2', index=False)
801
+ ... temp_filename = f.name
802
+ >>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
803
+ >>> len(scenario_list)
804
+ 2
805
+ >>> scenario_list[0]['name']
806
+ 'Alice'
807
+ >>> scenario_list = ScenarioList.from_excel(temp_filename) # Should raise an error and list sheets
808
+ Traceback (most recent call last):
809
+ ...
810
+ ValueError: Please provide a sheet name to load data from.
811
+ """
812
+ from edsl.scenarios.Scenario import Scenario
813
+ import pandas as pd
814
+
815
+ # Get all sheets
816
+ all_sheets = pd.read_excel(filename, sheet_name=None)
817
+
818
+ # If no sheet_name is provided and there is more than one sheet, print available sheets
819
+ if sheet_name is None:
820
+ if len(all_sheets) > 1:
821
+ print("The Excel file contains multiple sheets:")
822
+ for name in all_sheets.keys():
823
+ print(f"- {name}")
824
+ raise ValueError("Please provide a sheet name to load data from.")
825
+ else:
826
+ # If there is only one sheet, use it
827
+ sheet_name = list(all_sheets.keys())[0]
828
+
829
+ # Load the specified or determined sheet
830
+ df = pd.read_excel(filename, sheet_name=sheet_name)
831
+
832
+ observations = []
833
+ for _, row in df.iterrows():
834
+ observations.append(Scenario(row.to_dict()))
835
+
836
+ return cls(observations)
837
+
838
+ @classmethod
839
+ def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
840
+ """Create a ScenarioList from a Google Sheet.
841
+
842
+ This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
843
+ and then reads it using the from_excel class method.
844
+
845
+ Args:
846
+ url (str): The URL to the Google Sheet.
847
+ sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
848
+ the same as from_excel regarding multiple sheets.
849
+
850
+ Returns:
851
+ ScenarioList: An instance of the ScenarioList class.
852
+
853
+ """
854
+ import pandas as pd
855
+ import tempfile
856
+ import requests
857
+
858
+ if "/edit" in url:
859
+ sheet_id = url.split("/d/")[1].split("/edit")[0]
860
+ else:
861
+ raise ValueError("Invalid Google Sheet URL format.")
862
+
863
+ export_url = (
864
+ f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
865
+ )
866
+
867
+ # Download the Google Sheet as an Excel file
868
+ response = requests.get(export_url)
869
+ response.raise_for_status() # Ensure the request was successful
870
+
871
+ # Save the Excel file to a temporary file
872
+ with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
873
+ temp_file.write(response.content)
874
+ temp_filename = temp_file.name
875
+
876
+ # Call the from_excel class method with the temporary file
877
+ return cls.from_excel(temp_filename, sheet_name=sheet_name)
878
+
879
+ @classmethod
880
+ def from_csv(cls, source: Union[str, urllib.parse.ParseResult]) -> ScenarioList:
881
+ """Create a ScenarioList from a CSV file or URL.
882
+
883
+ Args:
884
+ source: A string representing either a local file path or a URL to a CSV file,
885
+ or a urllib.parse.ParseResult object for a URL.
886
+
887
+ Returns:
888
+ ScenarioList: A ScenarioList object containing the data from the CSV.
385
889
 
386
890
  Example:
387
891
 
@@ -397,15 +901,37 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
397
901
  'Alice'
398
902
  >>> scenario_list[1]['age']
399
903
  '25'
904
+
905
+ >>> url = "https://example.com/data.csv"
906
+ >>> ## scenario_list_from_url = ScenarioList.from_csv(url)
400
907
  """
401
908
  from edsl.scenarios.Scenario import Scenario
402
909
 
403
- observations = []
404
- with open(filename, "r") as f:
405
- reader = csv.reader(f)
910
+ def is_url(source):
911
+ try:
912
+ result = urllib.parse.urlparse(source)
913
+ return all([result.scheme, result.netloc])
914
+ except ValueError:
915
+ return False
916
+
917
+ if isinstance(source, str) and is_url(source):
918
+ with urllib.request.urlopen(source) as response:
919
+ csv_content = response.read().decode("utf-8")
920
+ csv_file = StringIO(csv_content)
921
+ elif isinstance(source, urllib.parse.ParseResult):
922
+ with urllib.request.urlopen(source.geturl()) as response:
923
+ csv_content = response.read().decode("utf-8")
924
+ csv_file = StringIO(csv_content)
925
+ else:
926
+ csv_file = open(source, "r")
927
+
928
+ try:
929
+ reader = csv.reader(csv_file)
406
930
  header = next(reader)
407
- for row in reader:
408
- observations.append(Scenario(dict(zip(header, row))))
931
+ observations = [Scenario(dict(zip(header, row))) for row in reader]
932
+ finally:
933
+ csv_file.close()
934
+
409
935
  return cls(observations)
410
936
 
411
937
  def _to_dict(self, sort=False) -> dict:
@@ -449,6 +975,16 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
449
975
 
450
976
  return cls([Scenario.from_dict(s) for s in data["scenarios"]])
451
977
 
978
+ @classmethod
979
+ def from_nested_dict(cls, data: dict) -> ScenarioList:
980
+ """Create a `ScenarioList` from a nested dictionary."""
981
+ from edsl.scenarios.Scenario import Scenario
982
+
983
+ s = ScenarioList()
984
+ for key, value in data.items():
985
+ s.add_list(key, value)
986
+ return s
987
+
452
988
  def code(self) -> str:
453
989
  ## TODO: Refactor to only use the questions actually in the survey
454
990
  """Create the Python code representation of a survey."""
@@ -484,17 +1020,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
484
1020
  table.add_row(str(i), s.rich_print())
485
1021
  return table
486
1022
 
487
- # def print(
488
- # self,
489
- # format: Optional[str] = None,
490
- # max_rows: Optional[int] = None,
491
- # pretty_labels: Optional[dict] = None,
492
- # filename: str = None,
493
- # ):
494
- # from edsl.utilities.interface import print_scenario_list
495
-
496
- # print_scenario_list(self[:max_rows])
497
-
498
1023
  def __getitem__(self, key: Union[int, slice]) -> Any:
499
1024
  """Return the item at the given index.
500
1025