edsl 0.1.33.dev1__py3-none-any.whl → 0.1.33.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. edsl/TemplateLoader.py +24 -0
  2. edsl/__init__.py +8 -4
  3. edsl/agents/Agent.py +46 -14
  4. edsl/agents/AgentList.py +43 -0
  5. edsl/agents/Invigilator.py +125 -212
  6. edsl/agents/InvigilatorBase.py +140 -32
  7. edsl/agents/PromptConstructionMixin.py +43 -66
  8. edsl/agents/__init__.py +1 -0
  9. edsl/auto/AutoStudy.py +117 -0
  10. edsl/auto/StageBase.py +230 -0
  11. edsl/auto/StageGenerateSurvey.py +178 -0
  12. edsl/auto/StageLabelQuestions.py +125 -0
  13. edsl/auto/StagePersona.py +61 -0
  14. edsl/auto/StagePersonaDimensionValueRanges.py +88 -0
  15. edsl/auto/StagePersonaDimensionValues.py +74 -0
  16. edsl/auto/StagePersonaDimensions.py +69 -0
  17. edsl/auto/StageQuestions.py +73 -0
  18. edsl/auto/SurveyCreatorPipeline.py +21 -0
  19. edsl/auto/utilities.py +224 -0
  20. edsl/config.py +38 -39
  21. edsl/coop/PriceFetcher.py +58 -0
  22. edsl/coop/coop.py +39 -5
  23. edsl/data/Cache.py +35 -1
  24. edsl/data_transfer_models.py +120 -38
  25. edsl/enums.py +2 -0
  26. edsl/exceptions/language_models.py +25 -1
  27. edsl/exceptions/questions.py +62 -5
  28. edsl/exceptions/results.py +4 -0
  29. edsl/inference_services/AnthropicService.py +13 -11
  30. edsl/inference_services/AwsBedrock.py +19 -17
  31. edsl/inference_services/AzureAI.py +37 -20
  32. edsl/inference_services/GoogleService.py +16 -12
  33. edsl/inference_services/GroqService.py +2 -0
  34. edsl/inference_services/InferenceServiceABC.py +24 -0
  35. edsl/inference_services/MistralAIService.py +120 -0
  36. edsl/inference_services/OpenAIService.py +41 -50
  37. edsl/inference_services/TestService.py +71 -0
  38. edsl/inference_services/models_available_cache.py +0 -6
  39. edsl/inference_services/registry.py +4 -0
  40. edsl/jobs/Answers.py +10 -12
  41. edsl/jobs/FailedQuestion.py +78 -0
  42. edsl/jobs/Jobs.py +18 -13
  43. edsl/jobs/buckets/TokenBucket.py +39 -14
  44. edsl/jobs/interviews/Interview.py +297 -77
  45. edsl/jobs/interviews/InterviewExceptionEntry.py +83 -19
  46. edsl/jobs/interviews/interview_exception_tracking.py +0 -70
  47. edsl/jobs/interviews/retry_management.py +3 -1
  48. edsl/jobs/runners/JobsRunnerAsyncio.py +116 -70
  49. edsl/jobs/runners/JobsRunnerStatusMixin.py +1 -1
  50. edsl/jobs/tasks/QuestionTaskCreator.py +30 -23
  51. edsl/jobs/tasks/TaskHistory.py +131 -213
  52. edsl/language_models/LanguageModel.py +239 -129
  53. edsl/language_models/ModelList.py +2 -2
  54. edsl/language_models/RegisterLanguageModelsMeta.py +14 -29
  55. edsl/language_models/fake_openai_call.py +15 -0
  56. edsl/language_models/fake_openai_service.py +61 -0
  57. edsl/language_models/registry.py +15 -2
  58. edsl/language_models/repair.py +0 -19
  59. edsl/language_models/utilities.py +61 -0
  60. edsl/prompts/Prompt.py +52 -2
  61. edsl/questions/AnswerValidatorMixin.py +23 -26
  62. edsl/questions/QuestionBase.py +273 -242
  63. edsl/questions/QuestionBaseGenMixin.py +133 -0
  64. edsl/questions/QuestionBasePromptsMixin.py +266 -0
  65. edsl/questions/QuestionBudget.py +6 -0
  66. edsl/questions/QuestionCheckBox.py +227 -35
  67. edsl/questions/QuestionExtract.py +98 -27
  68. edsl/questions/QuestionFreeText.py +46 -29
  69. edsl/questions/QuestionFunctional.py +7 -0
  70. edsl/questions/QuestionList.py +141 -22
  71. edsl/questions/QuestionMultipleChoice.py +173 -64
  72. edsl/questions/QuestionNumerical.py +87 -46
  73. edsl/questions/QuestionRank.py +182 -24
  74. edsl/questions/RegisterQuestionsMeta.py +31 -12
  75. edsl/questions/ResponseValidatorABC.py +169 -0
  76. edsl/questions/__init__.py +3 -4
  77. edsl/questions/decorators.py +21 -0
  78. edsl/questions/derived/QuestionLikertFive.py +10 -5
  79. edsl/questions/derived/QuestionLinearScale.py +11 -1
  80. edsl/questions/derived/QuestionTopK.py +6 -0
  81. edsl/questions/derived/QuestionYesNo.py +16 -1
  82. edsl/questions/descriptors.py +43 -7
  83. edsl/questions/prompt_templates/question_budget.jinja +13 -0
  84. edsl/questions/prompt_templates/question_checkbox.jinja +32 -0
  85. edsl/questions/prompt_templates/question_extract.jinja +11 -0
  86. edsl/questions/prompt_templates/question_free_text.jinja +3 -0
  87. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -0
  88. edsl/questions/prompt_templates/question_list.jinja +17 -0
  89. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -0
  90. edsl/questions/prompt_templates/question_numerical.jinja +37 -0
  91. edsl/questions/question_registry.py +6 -2
  92. edsl/questions/templates/__init__.py +0 -0
  93. edsl/questions/templates/checkbox/__init__.py +0 -0
  94. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -0
  95. edsl/questions/templates/checkbox/question_presentation.jinja +22 -0
  96. edsl/questions/templates/extract/answering_instructions.jinja +7 -0
  97. edsl/questions/templates/extract/question_presentation.jinja +1 -0
  98. edsl/questions/templates/free_text/__init__.py +0 -0
  99. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  100. edsl/questions/templates/free_text/question_presentation.jinja +1 -0
  101. edsl/questions/templates/likert_five/__init__.py +0 -0
  102. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -0
  103. edsl/questions/templates/likert_five/question_presentation.jinja +12 -0
  104. edsl/questions/templates/linear_scale/__init__.py +0 -0
  105. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -0
  106. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -0
  107. edsl/questions/templates/list/__init__.py +0 -0
  108. edsl/questions/templates/list/answering_instructions.jinja +4 -0
  109. edsl/questions/templates/list/question_presentation.jinja +5 -0
  110. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  111. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -0
  112. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  113. edsl/questions/templates/multiple_choice/question_presentation.jinja +12 -0
  114. edsl/questions/templates/numerical/__init__.py +0 -0
  115. edsl/questions/templates/numerical/answering_instructions.jinja +8 -0
  116. edsl/questions/templates/numerical/question_presentation.jinja +7 -0
  117. edsl/questions/templates/rank/answering_instructions.jinja +11 -0
  118. edsl/questions/templates/rank/question_presentation.jinja +15 -0
  119. edsl/questions/templates/top_k/__init__.py +0 -0
  120. edsl/questions/templates/top_k/answering_instructions.jinja +8 -0
  121. edsl/questions/templates/top_k/question_presentation.jinja +22 -0
  122. edsl/questions/templates/yes_no/__init__.py +0 -0
  123. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -0
  124. edsl/questions/templates/yes_no/question_presentation.jinja +12 -0
  125. edsl/results/Dataset.py +20 -0
  126. edsl/results/DatasetExportMixin.py +41 -47
  127. edsl/results/DatasetTree.py +145 -0
  128. edsl/results/Result.py +32 -5
  129. edsl/results/Results.py +131 -45
  130. edsl/results/ResultsDBMixin.py +3 -3
  131. edsl/results/Selector.py +118 -0
  132. edsl/results/tree_explore.py +115 -0
  133. edsl/scenarios/Scenario.py +10 -4
  134. edsl/scenarios/ScenarioList.py +348 -39
  135. edsl/scenarios/ScenarioListExportMixin.py +9 -0
  136. edsl/study/SnapShot.py +8 -1
  137. edsl/surveys/RuleCollection.py +2 -2
  138. edsl/surveys/Survey.py +634 -315
  139. edsl/surveys/SurveyExportMixin.py +71 -9
  140. edsl/surveys/SurveyFlowVisualizationMixin.py +2 -1
  141. edsl/surveys/SurveyQualtricsImport.py +75 -4
  142. edsl/surveys/instructions/ChangeInstruction.py +47 -0
  143. edsl/surveys/instructions/Instruction.py +34 -0
  144. edsl/surveys/instructions/InstructionCollection.py +77 -0
  145. edsl/surveys/instructions/__init__.py +0 -0
  146. edsl/templates/error_reporting/base.html +24 -0
  147. edsl/templates/error_reporting/exceptions_by_model.html +35 -0
  148. edsl/templates/error_reporting/exceptions_by_question_name.html +17 -0
  149. edsl/templates/error_reporting/exceptions_by_type.html +17 -0
  150. edsl/templates/error_reporting/interview_details.html +111 -0
  151. edsl/templates/error_reporting/interviews.html +10 -0
  152. edsl/templates/error_reporting/overview.html +5 -0
  153. edsl/templates/error_reporting/performance_plot.html +2 -0
  154. edsl/templates/error_reporting/report.css +74 -0
  155. edsl/templates/error_reporting/report.html +118 -0
  156. edsl/templates/error_reporting/report.js +25 -0
  157. {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/METADATA +4 -2
  158. edsl-0.1.33.dev2.dist-info/RECORD +289 -0
  159. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +0 -286
  160. edsl/utilities/gcp_bucket/simple_example.py +0 -9
  161. edsl-0.1.33.dev1.dist-info/RECORD +0 -209
  162. {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/LICENSE +0 -0
  163. {edsl-0.1.33.dev1.dist-info → edsl-0.1.33.dev2.dist-info}/WHEEL +0 -0
@@ -6,6 +6,11 @@ import csv
6
6
  import random
7
7
  from collections import UserList, Counter
8
8
  from collections.abc import Iterable
9
+ import urllib.parse
10
+ import urllib.request
11
+ from io import StringIO
12
+ from collections import defaultdict
13
+ import inspect
9
14
 
10
15
  from simpleeval import EvalWithCompoundTypes
11
16
 
@@ -15,6 +20,9 @@ from edsl.scenarios.Scenario import Scenario
15
20
  from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
16
21
  from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
17
22
 
23
+ from edsl.conjure.naming_utilities import sanitize_string
24
+ from edsl.utilities.utilities import is_valid_variable_name
25
+
18
26
 
19
27
  class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
20
28
  pass
@@ -23,12 +31,171 @@ class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
23
31
  class ScenarioList(Base, UserList, ScenarioListMixin):
24
32
  """Class for creating a list of scenarios to be used in a survey."""
25
33
 
26
- def __init__(self, data: Optional[list] = None):
34
+ def __init__(self, data: Optional[list] = None, codebook: Optional[dict] = None):
27
35
  """Initialize the ScenarioList class."""
28
36
  if data is not None:
29
37
  super().__init__(data)
30
38
  else:
31
39
  super().__init__([])
40
+ self.codebook = codebook or {}
41
+
42
+ def give_valid_names(self) -> ScenarioList:
43
+ """Give valid names to the scenario keys.
44
+
45
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
46
+ >>> s.give_valid_names()
47
+ ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
48
+ >>> s = ScenarioList([Scenario({'are you there John?': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
49
+ >>> s.give_valid_names()
50
+ ScenarioList([Scenario({'john': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
51
+ """
52
+ codebook = {}
53
+ new_scenaerios = []
54
+ for scenario in self:
55
+ new_scenario = {}
56
+ for key in scenario:
57
+ if not is_valid_variable_name(key):
58
+ if key in codebook:
59
+ new_key = codebook[key]
60
+ else:
61
+ new_key = sanitize_string(key)
62
+ if not is_valid_variable_name(new_key):
63
+ new_key = f"var_{len(codebook)}"
64
+ codebook[key] = new_key
65
+ new_scenario[new_key] = scenario[key]
66
+ else:
67
+ new_scenario[key] = scenario[key]
68
+ new_scenaerios.append(Scenario(new_scenario))
69
+ return ScenarioList(new_scenaerios, codebook)
70
+
71
+ def unpivot(self, id_vars=None, value_vars=None):
72
+ """
73
+ Unpivot the ScenarioList, allowing for id variables to be specified.
74
+
75
+ Parameters:
76
+ id_vars (list): Fields to use as identifier variables (kept in each entry)
77
+ value_vars (list): Fields to unpivot. If None, all fields not in id_vars will be used.
78
+
79
+ Example:
80
+ >>> s = ScenarioList([
81
+ ... Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}),
82
+ ... Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})
83
+ ... ])
84
+ >>> s.unpivot(id_vars=['id', 'year'], value_vars=['a', 'b'])
85
+ ScenarioList([Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}), Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}), Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}), Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})])
86
+ """
87
+ if id_vars is None:
88
+ id_vars = []
89
+ if value_vars is None:
90
+ value_vars = [field for field in self[0].keys() if field not in id_vars]
91
+
92
+ new_scenarios = []
93
+ for scenario in self:
94
+ for var in value_vars:
95
+ new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
96
+ new_scenario["variable"] = var
97
+ new_scenario["value"] = scenario[var]
98
+ new_scenarios.append(Scenario(new_scenario))
99
+
100
+ return ScenarioList(new_scenarios)
101
+
102
+ def pivot(self, id_vars, var_name="variable", value_name="value"):
103
+ """
104
+ Pivot the ScenarioList from long to wide format.
105
+
106
+ Parameters:
107
+ id_vars (list): Fields to use as identifier variables
108
+ var_name (str): Name of the variable column (default: 'variable')
109
+ value_name (str): Name of the value column (default: 'value')
110
+
111
+ Example:
112
+ >>> s = ScenarioList([
113
+ ... Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}),
114
+ ... Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}),
115
+ ... Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}),
116
+ ... Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})
117
+ ... ])
118
+ >>> s.pivot(id_vars=['id', 'year'])
119
+ ScenarioList([Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}), Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})])
120
+ """
121
+ pivoted_dict = {}
122
+
123
+ for scenario in self:
124
+ # Create a tuple of id values to use as a key
125
+ id_key = tuple(scenario[id_var] for id_var in id_vars)
126
+
127
+ # If this combination of id values hasn't been seen before, initialize it
128
+ if id_key not in pivoted_dict:
129
+ pivoted_dict[id_key] = {id_var: scenario[id_var] for id_var in id_vars}
130
+
131
+ # Add the variable-value pair to the dict
132
+ variable = scenario[var_name]
133
+ value = scenario[value_name]
134
+ pivoted_dict[id_key][variable] = value
135
+
136
+ # Convert the dict of dicts to a list of Scenarios
137
+ pivoted_scenarios = [
138
+ Scenario(dict(zip(id_vars, id_key), **values))
139
+ for id_key, values in pivoted_dict.items()
140
+ ]
141
+
142
+ return ScenarioList(pivoted_scenarios)
143
+
144
+ def group_by(self, id_vars, variables, func):
145
+ """
146
+ Group the ScenarioList by id_vars and apply a function to the specified variables.
147
+
148
+ Parameters:
149
+ id_vars (list): Fields to use as identifier variables for grouping
150
+ variables (list): Fields to pass to the aggregation function
151
+ func (callable): Function to apply to the grouped variables.
152
+ Should accept lists of values for each variable.
153
+
154
+ Returns:
155
+ ScenarioList: A new ScenarioList with the grouped and aggregated results
156
+
157
+ Example:
158
+ >>> def avg_sum(a, b):
159
+ ... return {'avg_a': sum(a) / len(a), 'sum_b': sum(b)}
160
+ >>> s = ScenarioList([
161
+ ... Scenario({'group': 'A', 'year': 2020, 'a': 10, 'b': 20}),
162
+ ... Scenario({'group': 'A', 'year': 2021, 'a': 15, 'b': 25}),
163
+ ... Scenario({'group': 'B', 'year': 2020, 'a': 12, 'b': 22}),
164
+ ... Scenario({'group': 'B', 'year': 2021, 'a': 17, 'b': 27})
165
+ ... ])
166
+ >>> s.group_by(id_vars=['group'], variables=['a', 'b'], func=avg_sum)
167
+ ScenarioList([Scenario({'group': 'A', 'avg_a': 12.5, 'sum_b': 45}), Scenario({'group': 'B', 'avg_a': 14.5, 'sum_b': 49})])
168
+ """
169
+ # Check if the function is compatible with the specified variables
170
+ func_params = inspect.signature(func).parameters
171
+ if len(func_params) != len(variables):
172
+ raise ValueError(
173
+ f"Function {func.__name__} expects {len(func_params)} arguments, but {len(variables)} variables were provided"
174
+ )
175
+
176
+ # Group the scenarios
177
+ grouped = defaultdict(lambda: defaultdict(list))
178
+ for scenario in self:
179
+ key = tuple(scenario[id_var] for id_var in id_vars)
180
+ for var in variables:
181
+ grouped[key][var].append(scenario[var])
182
+
183
+ # Apply the function to each group
184
+ result = []
185
+ for key, group in grouped.items():
186
+ try:
187
+ aggregated = func(*[group[var] for var in variables])
188
+ except Exception as e:
189
+ raise ValueError(f"Error applying function to group {key}: {str(e)}")
190
+
191
+ if not isinstance(aggregated, dict):
192
+ raise ValueError(f"Function {func.__name__} must return a dictionary")
193
+
194
+ new_scenario = dict(zip(id_vars, key))
195
+ new_scenario.update(aggregated)
196
+ result.append(Scenario(new_scenario))
197
+
198
+ return ScenarioList(result)
32
199
 
33
200
  @property
34
201
  def parameters(self) -> set:
@@ -153,6 +320,71 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
153
320
  new_scenarios.append(new_scenario)
154
321
  return ScenarioList(new_scenarios)
155
322
 
323
+ def concatenate(self, fields: List[str], separator: str = ";") -> "ScenarioList":
324
+ """Concatenate specified fields into a single field.
325
+
326
+ Args:
327
+ fields (List[str]): List of field names to concatenate.
328
+ separator (str, optional): Separator to use between field values. Defaults to ";".
329
+
330
+ Returns:
331
+ ScenarioList: A new ScenarioList with concatenated fields.
332
+
333
+ Example:
334
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
335
+ >>> s.concatenate(['a', 'b', 'c'])
336
+ ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
337
+ """
338
+ new_scenarios = []
339
+ for scenario in self:
340
+ new_scenario = scenario.copy()
341
+ concat_values = []
342
+ for field in fields:
343
+ if field in new_scenario:
344
+ concat_values.append(str(new_scenario[field]))
345
+ del new_scenario[field]
346
+
347
+ new_field_name = f"concat_{'_'.join(fields)}"
348
+ new_scenario[new_field_name] = separator.join(concat_values)
349
+ new_scenarios.append(new_scenario)
350
+
351
+ return ScenarioList(new_scenarios)
352
+
353
+ def unpack_dict(
354
+ self, field: str, prefix: Optional[str] = None, drop_field: bool = False
355
+ ) -> ScenarioList:
356
+ """Unpack a dictionary field into separate fields.
357
+
358
+ Example:
359
+
360
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}})])
361
+ >>> s.unpack_dict('b')
362
+ ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'c': 2, 'd': 3})])
363
+ """
364
+ new_scenarios = []
365
+ for scenario in self:
366
+ new_scenario = scenario.copy()
367
+ for key, value in scenario[field].items():
368
+ if prefix:
369
+ new_scenario[prefix + key] = value
370
+ else:
371
+ new_scenario[key] = value
372
+ if drop_field:
373
+ new_scenario.pop(field)
374
+ new_scenarios.append(new_scenario)
375
+ return ScenarioList(new_scenarios)
376
+
377
+ def transform(
378
+ self, field: str, func: Callable, new_name: Optional[str] = None
379
+ ) -> ScenarioList:
380
+ """Transform a field using a function."""
381
+ new_scenarios = []
382
+ for scenario in self:
383
+ new_scenario = scenario.copy()
384
+ new_scenario[new_name or field] = func(scenario[field])
385
+ new_scenarios.append(new_scenario)
386
+ return ScenarioList(new_scenarios)
387
+
156
388
  def mutate(
157
389
  self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None
158
390
  ) -> ScenarioList:
@@ -310,6 +542,19 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
310
542
  data = [{key: [scenario[key] for scenario in self.data]} for key in keys]
311
543
  return Dataset(data)
312
544
 
545
+ def split(
546
+ self, field: str, split_on: str, index: int, new_name: Optional[str] = None
547
+ ) -> ScenarioList:
548
+ """Split a scenario fiel in multiple fields."""
549
+ if new_name is None:
550
+ new_name = field + "_split_" + str(index)
551
+ new_scenarios = []
552
+ for scenario in self:
553
+ new_scenario = scenario.copy()
554
+ new_scenario[new_name] = scenario[field].split(split_on)[index]
555
+ new_scenarios.append(new_scenario)
556
+ return ScenarioList(new_scenarios)
557
+
313
558
  def add_list(self, name, values) -> ScenarioList:
314
559
  """Add a list of values to a ScenarioList.
315
560
 
@@ -392,37 +637,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
392
637
 
393
638
  return ScenarioList([Scenario(entry) for entry in processed_lines])
394
639
 
395
- @classmethod
396
- def from_docx(cls, docx_file_path: str):
397
- from docx import Document
398
-
399
- doc = Document(docx_file_path)
400
- lines = []
401
-
402
- # Extract text from paragraphs, treating each paragraph as a line
403
- for para in doc.paragraphs:
404
- lines.extend(para.text.splitlines())
405
-
406
- processed_lines = []
407
- non_blank_lines = [
408
- (i, line.strip()) for i, line in enumerate(lines) if line.strip()
409
- ]
410
-
411
- for index, (line_no, text) in enumerate(non_blank_lines):
412
- entry = {
413
- "line_no": line_no + 1, # Using 1-based index for line numbers
414
- "text": text,
415
- "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
416
- "line_after": (
417
- non_blank_lines[index + 1][1]
418
- if index < len(non_blank_lines) - 1
419
- else None
420
- ),
421
- }
422
- processed_lines.append(entry)
423
-
424
- return ScenarioList([Scenario(entry) for entry in processed_lines])
425
-
426
640
  @classmethod
427
641
  def from_google_doc(cls, url: str) -> ScenarioList:
428
642
  """Create a ScenarioList from a Google Doc.
@@ -473,6 +687,62 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
473
687
  """
474
688
  return cls([Scenario(row) for row in df.to_dict(orient="records")])
475
689
 
690
+ @classmethod
691
+ def from_wikipedia(cls, url: str, table_index: int = 0):
692
+ """
693
+ Extracts a table from a Wikipedia page.
694
+
695
+ Parameters:
696
+ url (str): The URL of the Wikipedia page.
697
+ table_index (int): The index of the table to extract (default is 0).
698
+
699
+ Returns:
700
+ pd.DataFrame: A DataFrame containing the extracted table.
701
+ # # Example usage
702
+ # url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
703
+ # df = from_wikipedia(url, 0)
704
+
705
+ # if not df.empty:
706
+ # print(df.head())
707
+ # else:
708
+ # print("Failed to extract table.")
709
+
710
+
711
+ """
712
+ import pandas as pd
713
+ import requests
714
+ from requests.exceptions import RequestException
715
+
716
+ try:
717
+ # Check if the URL is reachable
718
+ response = requests.get(url)
719
+ response.raise_for_status() # Raises HTTPError for bad responses
720
+
721
+ # Extract tables from the Wikipedia page
722
+ tables = pd.read_html(url)
723
+
724
+ # Ensure the requested table index is within the range of available tables
725
+ if table_index >= len(tables) or table_index < 0:
726
+ raise IndexError(
727
+ f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
728
+ )
729
+
730
+ # Return the requested table as a DataFrame
731
+ # return tables[table_index]
732
+ return cls.from_pandas(tables[table_index])
733
+
734
+ except RequestException as e:
735
+ print(f"Error fetching the URL: {e}")
736
+ except ValueError as e:
737
+ print(f"Error parsing tables: {e}")
738
+ except IndexError as e:
739
+ print(e)
740
+ except Exception as e:
741
+ print(f"An unexpected error occurred: {e}")
742
+
743
+ # Return an empty DataFrame in case of an error
744
+ # return cls.from_pandas(pd.DataFrame())
745
+
476
746
  def to_key_value(self, field: str, value=None) -> Union[dict, set]:
477
747
  """Return the set of values in the field.
478
748
 
@@ -594,8 +864,15 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
594
864
  return cls.from_excel(temp_filename, sheet_name=sheet_name)
595
865
 
596
866
  @classmethod
597
- def from_csv(cls, filename: str) -> ScenarioList:
598
- """Create a ScenarioList from a CSV file.
867
+ def from_csv(cls, source: Union[str, urllib.parse.ParseResult]) -> ScenarioList:
868
+ """Create a ScenarioList from a CSV file or URL.
869
+
870
+ Args:
871
+ source: A string representing either a local file path or a URL to a CSV file,
872
+ or a urllib.parse.ParseResult object for a URL.
873
+
874
+ Returns:
875
+ ScenarioList: A ScenarioList object containing the data from the CSV.
599
876
 
600
877
  Example:
601
878
 
@@ -611,15 +888,37 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
611
888
  'Alice'
612
889
  >>> scenario_list[1]['age']
613
890
  '25'
891
+
892
+ >>> url = "https://example.com/data.csv"
893
+ >>> ## scenario_list_from_url = ScenarioList.from_csv(url)
614
894
  """
615
895
  from edsl.scenarios.Scenario import Scenario
616
896
 
617
- observations = []
618
- with open(filename, "r") as f:
619
- reader = csv.reader(f)
897
+ def is_url(source):
898
+ try:
899
+ result = urllib.parse.urlparse(source)
900
+ return all([result.scheme, result.netloc])
901
+ except ValueError:
902
+ return False
903
+
904
+ if isinstance(source, str) and is_url(source):
905
+ with urllib.request.urlopen(source) as response:
906
+ csv_content = response.read().decode("utf-8")
907
+ csv_file = StringIO(csv_content)
908
+ elif isinstance(source, urllib.parse.ParseResult):
909
+ with urllib.request.urlopen(source.geturl()) as response:
910
+ csv_content = response.read().decode("utf-8")
911
+ csv_file = StringIO(csv_content)
912
+ else:
913
+ csv_file = open(source, "r")
914
+
915
+ try:
916
+ reader = csv.reader(csv_file)
620
917
  header = next(reader)
621
- for row in reader:
622
- observations.append(Scenario(dict(zip(header, row))))
918
+ observations = [Scenario(dict(zip(header, row))) for row in reader]
919
+ finally:
920
+ csv_file.close()
921
+
623
922
  return cls(observations)
624
923
 
625
924
  def _to_dict(self, sort=False) -> dict:
@@ -663,6 +962,16 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
663
962
 
664
963
  return cls([Scenario.from_dict(s) for s in data["scenarios"]])
665
964
 
965
+ @classmethod
966
+ def from_nested_dict(cls, data: dict) -> ScenarioList:
967
+ """Create a `ScenarioList` from a nested dictionary."""
968
+ from edsl.scenarios.Scenario import Scenario
969
+
970
+ s = ScenarioList()
971
+ for key, value in data.items():
972
+ s.add_list(key, value)
973
+ return s
974
+
666
975
  def code(self) -> str:
667
976
  ## TODO: Refactor to only use the questions actually in the survey
668
977
  """Create the Python code representation of a survey."""
@@ -41,3 +41,12 @@ class ScenarioListExportMixin(DatasetExportMixin):
41
41
  def __init_subclass__(cls, **kwargs):
42
42
  super().__init_subclass__(**kwargs)
43
43
  decorate_methods_from_mixin(cls, DatasetExportMixin)
44
+
45
+ def to_docx(self, filename: str):
46
+ """Export the ScenarioList to a .docx file."""
47
+ dataset = self.to_dataset()
48
+ from edsl.results.DatasetTree import Tree
49
+
50
+ tree = Tree(dataset)
51
+ tree.construct_tree()
52
+ tree.to_docx(filename)
edsl/study/SnapShot.py CHANGED
@@ -57,10 +57,17 @@ class SnapShot:
57
57
  from edsl.Base import Base
58
58
  from edsl.study.Study import Study
59
59
 
60
+ def is_edsl_object(obj):
61
+ package_name = "edsl"
62
+ cls = obj.__class__
63
+ module_name = cls.__module__
64
+ return module_name.startswith(package_name)
65
+
60
66
  for name, value in namespace.items():
61
67
  # TODO check this code logic (if there are other objects with to_dict method that are not from edsl)
62
68
  if (
63
- hasattr(value, "to_dict")
69
+ is_edsl_object(value)
70
+ and hasattr(value, "to_dict")
64
71
  and not inspect.isclass(value)
65
72
  and value.__class__ not in [o.__class__ for o in self.exclude]
66
73
  ):
@@ -1,6 +1,6 @@
1
1
  """A collection of rules for a survey."""
2
2
 
3
- from typing import List, Union, Any
3
+ from typing import List, Union, Any, Optional
4
4
  from collections import defaultdict, UserList
5
5
 
6
6
  from edsl.exceptions import (
@@ -24,7 +24,7 @@ NextQuestion = namedtuple(
24
24
  class RuleCollection(UserList):
25
25
  """A collection of rules for a particular survey."""
26
26
 
27
- def __init__(self, num_questions: int = None, rules: List[Rule] = None):
27
+ def __init__(self, num_questions: Optional[int] = None, rules: List[Rule] = None):
28
28
  """Initialize the RuleCollection object.
29
29
 
30
30
  :param num_questions: The number of questions in the survey.