edsl 0.1.33__py3-none-any.whl → 0.1.33.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. edsl/Base.py +3 -9
  2. edsl/__init__.py +3 -8
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +8 -40
  5. edsl/agents/AgentList.py +0 -43
  6. edsl/agents/Invigilator.py +219 -135
  7. edsl/agents/InvigilatorBase.py +59 -148
  8. edsl/agents/{PromptConstructor.py → PromptConstructionMixin.py} +89 -138
  9. edsl/agents/__init__.py +0 -1
  10. edsl/config.py +56 -47
  11. edsl/coop/coop.py +7 -50
  12. edsl/data/Cache.py +1 -35
  13. edsl/data_transfer_models.py +38 -73
  14. edsl/enums.py +0 -4
  15. edsl/exceptions/language_models.py +1 -25
  16. edsl/exceptions/questions.py +5 -62
  17. edsl/exceptions/results.py +0 -4
  18. edsl/inference_services/AnthropicService.py +11 -13
  19. edsl/inference_services/AwsBedrock.py +17 -19
  20. edsl/inference_services/AzureAI.py +20 -37
  21. edsl/inference_services/GoogleService.py +12 -16
  22. edsl/inference_services/GroqService.py +0 -2
  23. edsl/inference_services/InferenceServiceABC.py +3 -58
  24. edsl/inference_services/OpenAIService.py +54 -48
  25. edsl/inference_services/models_available_cache.py +6 -0
  26. edsl/inference_services/registry.py +0 -6
  27. edsl/jobs/Answers.py +12 -10
  28. edsl/jobs/Jobs.py +21 -36
  29. edsl/jobs/buckets/BucketCollection.py +15 -24
  30. edsl/jobs/buckets/TokenBucket.py +14 -93
  31. edsl/jobs/interviews/Interview.py +78 -366
  32. edsl/jobs/interviews/InterviewExceptionEntry.py +19 -85
  33. edsl/jobs/interviews/InterviewTaskBuildingMixin.py +286 -0
  34. edsl/jobs/interviews/{InterviewExceptionCollection.py → interview_exception_tracking.py} +68 -14
  35. edsl/jobs/interviews/retry_management.py +37 -0
  36. edsl/jobs/runners/JobsRunnerAsyncio.py +175 -146
  37. edsl/jobs/runners/JobsRunnerStatusMixin.py +333 -0
  38. edsl/jobs/tasks/QuestionTaskCreator.py +23 -30
  39. edsl/jobs/tasks/TaskHistory.py +213 -148
  40. edsl/language_models/LanguageModel.py +156 -261
  41. edsl/language_models/ModelList.py +2 -2
  42. edsl/language_models/RegisterLanguageModelsMeta.py +29 -14
  43. edsl/language_models/registry.py +6 -23
  44. edsl/language_models/repair.py +19 -0
  45. edsl/prompts/Prompt.py +2 -52
  46. edsl/questions/AnswerValidatorMixin.py +26 -23
  47. edsl/questions/QuestionBase.py +249 -329
  48. edsl/questions/QuestionBudget.py +41 -99
  49. edsl/questions/QuestionCheckBox.py +35 -227
  50. edsl/questions/QuestionExtract.py +27 -98
  51. edsl/questions/QuestionFreeText.py +29 -52
  52. edsl/questions/QuestionFunctional.py +0 -7
  53. edsl/questions/QuestionList.py +22 -141
  54. edsl/questions/QuestionMultipleChoice.py +65 -159
  55. edsl/questions/QuestionNumerical.py +46 -88
  56. edsl/questions/QuestionRank.py +24 -182
  57. edsl/questions/RegisterQuestionsMeta.py +12 -31
  58. edsl/questions/__init__.py +4 -3
  59. edsl/questions/derived/QuestionLikertFive.py +5 -10
  60. edsl/questions/derived/QuestionLinearScale.py +2 -15
  61. edsl/questions/derived/QuestionTopK.py +1 -10
  62. edsl/questions/derived/QuestionYesNo.py +3 -24
  63. edsl/questions/descriptors.py +7 -43
  64. edsl/questions/question_registry.py +2 -6
  65. edsl/results/Dataset.py +0 -20
  66. edsl/results/DatasetExportMixin.py +48 -46
  67. edsl/results/Result.py +5 -32
  68. edsl/results/Results.py +46 -135
  69. edsl/results/ResultsDBMixin.py +3 -3
  70. edsl/scenarios/FileStore.py +10 -71
  71. edsl/scenarios/Scenario.py +25 -96
  72. edsl/scenarios/ScenarioImageMixin.py +2 -2
  73. edsl/scenarios/ScenarioList.py +39 -361
  74. edsl/scenarios/ScenarioListExportMixin.py +0 -9
  75. edsl/scenarios/ScenarioListPdfMixin.py +4 -150
  76. edsl/study/SnapShot.py +1 -8
  77. edsl/study/Study.py +0 -32
  78. edsl/surveys/Rule.py +1 -10
  79. edsl/surveys/RuleCollection.py +5 -21
  80. edsl/surveys/Survey.py +310 -636
  81. edsl/surveys/SurveyExportMixin.py +9 -71
  82. edsl/surveys/SurveyFlowVisualizationMixin.py +1 -2
  83. edsl/surveys/SurveyQualtricsImport.py +4 -75
  84. edsl/utilities/gcp_bucket/simple_example.py +9 -0
  85. edsl/utilities/utilities.py +1 -9
  86. {edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/METADATA +2 -5
  87. edsl-0.1.33.dev1.dist-info/RECORD +209 -0
  88. edsl/TemplateLoader.py +0 -24
  89. edsl/auto/AutoStudy.py +0 -117
  90. edsl/auto/StageBase.py +0 -230
  91. edsl/auto/StageGenerateSurvey.py +0 -178
  92. edsl/auto/StageLabelQuestions.py +0 -125
  93. edsl/auto/StagePersona.py +0 -61
  94. edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
  95. edsl/auto/StagePersonaDimensionValues.py +0 -74
  96. edsl/auto/StagePersonaDimensions.py +0 -69
  97. edsl/auto/StageQuestions.py +0 -73
  98. edsl/auto/SurveyCreatorPipeline.py +0 -21
  99. edsl/auto/utilities.py +0 -224
  100. edsl/coop/PriceFetcher.py +0 -58
  101. edsl/inference_services/MistralAIService.py +0 -120
  102. edsl/inference_services/TestService.py +0 -80
  103. edsl/inference_services/TogetherAIService.py +0 -170
  104. edsl/jobs/FailedQuestion.py +0 -78
  105. edsl/jobs/runners/JobsRunnerStatus.py +0 -331
  106. edsl/language_models/fake_openai_call.py +0 -15
  107. edsl/language_models/fake_openai_service.py +0 -61
  108. edsl/language_models/utilities.py +0 -61
  109. edsl/questions/QuestionBaseGenMixin.py +0 -133
  110. edsl/questions/QuestionBasePromptsMixin.py +0 -266
  111. edsl/questions/Quick.py +0 -41
  112. edsl/questions/ResponseValidatorABC.py +0 -170
  113. edsl/questions/decorators.py +0 -21
  114. edsl/questions/prompt_templates/question_budget.jinja +0 -13
  115. edsl/questions/prompt_templates/question_checkbox.jinja +0 -32
  116. edsl/questions/prompt_templates/question_extract.jinja +0 -11
  117. edsl/questions/prompt_templates/question_free_text.jinja +0 -3
  118. edsl/questions/prompt_templates/question_linear_scale.jinja +0 -11
  119. edsl/questions/prompt_templates/question_list.jinja +0 -17
  120. edsl/questions/prompt_templates/question_multiple_choice.jinja +0 -33
  121. edsl/questions/prompt_templates/question_numerical.jinja +0 -37
  122. edsl/questions/templates/__init__.py +0 -0
  123. edsl/questions/templates/budget/__init__.py +0 -0
  124. edsl/questions/templates/budget/answering_instructions.jinja +0 -7
  125. edsl/questions/templates/budget/question_presentation.jinja +0 -7
  126. edsl/questions/templates/checkbox/__init__.py +0 -0
  127. edsl/questions/templates/checkbox/answering_instructions.jinja +0 -10
  128. edsl/questions/templates/checkbox/question_presentation.jinja +0 -22
  129. edsl/questions/templates/extract/__init__.py +0 -0
  130. edsl/questions/templates/extract/answering_instructions.jinja +0 -7
  131. edsl/questions/templates/extract/question_presentation.jinja +0 -1
  132. edsl/questions/templates/free_text/__init__.py +0 -0
  133. edsl/questions/templates/free_text/answering_instructions.jinja +0 -0
  134. edsl/questions/templates/free_text/question_presentation.jinja +0 -1
  135. edsl/questions/templates/likert_five/__init__.py +0 -0
  136. edsl/questions/templates/likert_five/answering_instructions.jinja +0 -10
  137. edsl/questions/templates/likert_five/question_presentation.jinja +0 -12
  138. edsl/questions/templates/linear_scale/__init__.py +0 -0
  139. edsl/questions/templates/linear_scale/answering_instructions.jinja +0 -5
  140. edsl/questions/templates/linear_scale/question_presentation.jinja +0 -5
  141. edsl/questions/templates/list/__init__.py +0 -0
  142. edsl/questions/templates/list/answering_instructions.jinja +0 -4
  143. edsl/questions/templates/list/question_presentation.jinja +0 -5
  144. edsl/questions/templates/multiple_choice/__init__.py +0 -0
  145. edsl/questions/templates/multiple_choice/answering_instructions.jinja +0 -9
  146. edsl/questions/templates/multiple_choice/html.jinja +0 -0
  147. edsl/questions/templates/multiple_choice/question_presentation.jinja +0 -12
  148. edsl/questions/templates/numerical/__init__.py +0 -0
  149. edsl/questions/templates/numerical/answering_instructions.jinja +0 -8
  150. edsl/questions/templates/numerical/question_presentation.jinja +0 -7
  151. edsl/questions/templates/rank/__init__.py +0 -0
  152. edsl/questions/templates/rank/answering_instructions.jinja +0 -11
  153. edsl/questions/templates/rank/question_presentation.jinja +0 -15
  154. edsl/questions/templates/top_k/__init__.py +0 -0
  155. edsl/questions/templates/top_k/answering_instructions.jinja +0 -8
  156. edsl/questions/templates/top_k/question_presentation.jinja +0 -22
  157. edsl/questions/templates/yes_no/__init__.py +0 -0
  158. edsl/questions/templates/yes_no/answering_instructions.jinja +0 -6
  159. edsl/questions/templates/yes_no/question_presentation.jinja +0 -12
  160. edsl/results/DatasetTree.py +0 -145
  161. edsl/results/Selector.py +0 -118
  162. edsl/results/tree_explore.py +0 -115
  163. edsl/surveys/instructions/ChangeInstruction.py +0 -47
  164. edsl/surveys/instructions/Instruction.py +0 -34
  165. edsl/surveys/instructions/InstructionCollection.py +0 -77
  166. edsl/surveys/instructions/__init__.py +0 -0
  167. edsl/templates/error_reporting/base.html +0 -24
  168. edsl/templates/error_reporting/exceptions_by_model.html +0 -35
  169. edsl/templates/error_reporting/exceptions_by_question_name.html +0 -17
  170. edsl/templates/error_reporting/exceptions_by_type.html +0 -17
  171. edsl/templates/error_reporting/interview_details.html +0 -116
  172. edsl/templates/error_reporting/interviews.html +0 -10
  173. edsl/templates/error_reporting/overview.html +0 -5
  174. edsl/templates/error_reporting/performance_plot.html +0 -2
  175. edsl/templates/error_reporting/report.css +0 -74
  176. edsl/templates/error_reporting/report.html +0 -118
  177. edsl/templates/error_reporting/report.js +0 -25
  178. edsl-0.1.33.dist-info/RECORD +0 -295
  179. {edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/LICENSE +0 -0
  180. {edsl-0.1.33.dist-info → edsl-0.1.33.dev1.dist-info}/WHEEL +0 -0
@@ -6,11 +6,6 @@ import csv
6
6
  import random
7
7
  from collections import UserList, Counter
8
8
  from collections.abc import Iterable
9
- import urllib.parse
10
- import urllib.request
11
- from io import StringIO
12
- from collections import defaultdict
13
- import inspect
14
9
 
15
10
  from simpleeval import EvalWithCompoundTypes
16
11
 
@@ -20,9 +15,6 @@ from edsl.scenarios.Scenario import Scenario
20
15
  from edsl.scenarios.ScenarioListPdfMixin import ScenarioListPdfMixin
21
16
  from edsl.scenarios.ScenarioListExportMixin import ScenarioListExportMixin
22
17
 
23
- from edsl.conjure.naming_utilities import sanitize_string
24
- from edsl.utilities.utilities import is_valid_variable_name
25
-
26
18
 
27
19
  class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
28
20
  pass
@@ -31,180 +23,12 @@ class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
31
23
  class ScenarioList(Base, UserList, ScenarioListMixin):
32
24
  """Class for creating a list of scenarios to be used in a survey."""
33
25
 
34
- def __init__(self, data: Optional[list] = None, codebook: Optional[dict] = None):
26
+ def __init__(self, data: Optional[list] = None):
35
27
  """Initialize the ScenarioList class."""
36
28
  if data is not None:
37
29
  super().__init__(data)
38
30
  else:
39
31
  super().__init__([])
40
- self.codebook = codebook or {}
41
-
42
- @property
43
- def has_jinja_braces(self) -> bool:
44
- """Check if the ScenarioList has Jinja braces."""
45
- return any([scenario.has_jinja_braces for scenario in self])
46
-
47
- def convert_jinja_braces(self) -> ScenarioList:
48
- """Convert Jinja braces to Python braces."""
49
- return ScenarioList([scenario.convert_jinja_braces() for scenario in self])
50
-
51
- def give_valid_names(self) -> ScenarioList:
52
- """Give valid names to the scenario keys.
53
-
54
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
55
- >>> s.give_valid_names()
56
- ScenarioList([Scenario({'a': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
57
- >>> s = ScenarioList([Scenario({'are you there John?': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
58
- >>> s.give_valid_names()
59
- ScenarioList([Scenario({'john': 1, 'b': 2}), Scenario({'a': 1, 'b': 1})])
60
- """
61
- codebook = {}
62
- new_scenaerios = []
63
- for scenario in self:
64
- new_scenario = {}
65
- for key in scenario:
66
- if not is_valid_variable_name(key):
67
- if key in codebook:
68
- new_key = codebook[key]
69
- else:
70
- new_key = sanitize_string(key)
71
- if not is_valid_variable_name(new_key):
72
- new_key = f"var_{len(codebook)}"
73
- codebook[key] = new_key
74
- new_scenario[new_key] = scenario[key]
75
- else:
76
- new_scenario[key] = scenario[key]
77
- new_scenaerios.append(Scenario(new_scenario))
78
- return ScenarioList(new_scenaerios, codebook)
79
-
80
- def unpivot(self, id_vars=None, value_vars=None):
81
- """
82
- Unpivot the ScenarioList, allowing for id variables to be specified.
83
-
84
- Parameters:
85
- id_vars (list): Fields to use as identifier variables (kept in each entry)
86
- value_vars (list): Fields to unpivot. If None, all fields not in id_vars will be used.
87
-
88
- Example:
89
- >>> s = ScenarioList([
90
- ... Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}),
91
- ... Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})
92
- ... ])
93
- >>> s.unpivot(id_vars=['id', 'year'], value_vars=['a', 'b'])
94
- ScenarioList([Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}), Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}), Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}), Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})])
95
- """
96
- if id_vars is None:
97
- id_vars = []
98
- if value_vars is None:
99
- value_vars = [field for field in self[0].keys() if field not in id_vars]
100
-
101
- new_scenarios = []
102
- for scenario in self:
103
- for var in value_vars:
104
- new_scenario = {id_var: scenario[id_var] for id_var in id_vars}
105
- new_scenario["variable"] = var
106
- new_scenario["value"] = scenario[var]
107
- new_scenarios.append(Scenario(new_scenario))
108
-
109
- return ScenarioList(new_scenarios)
110
-
111
- def pivot(self, id_vars, var_name="variable", value_name="value"):
112
- """
113
- Pivot the ScenarioList from long to wide format.
114
-
115
- Parameters:
116
- id_vars (list): Fields to use as identifier variables
117
- var_name (str): Name of the variable column (default: 'variable')
118
- value_name (str): Name of the value column (default: 'value')
119
-
120
- Example:
121
- >>> s = ScenarioList([
122
- ... Scenario({'id': 1, 'year': 2020, 'variable': 'a', 'value': 10}),
123
- ... Scenario({'id': 1, 'year': 2020, 'variable': 'b', 'value': 20}),
124
- ... Scenario({'id': 2, 'year': 2021, 'variable': 'a', 'value': 15}),
125
- ... Scenario({'id': 2, 'year': 2021, 'variable': 'b', 'value': 25})
126
- ... ])
127
- >>> s.pivot(id_vars=['id', 'year'])
128
- ScenarioList([Scenario({'id': 1, 'year': 2020, 'a': 10, 'b': 20}), Scenario({'id': 2, 'year': 2021, 'a': 15, 'b': 25})])
129
- """
130
- pivoted_dict = {}
131
-
132
- for scenario in self:
133
- # Create a tuple of id values to use as a key
134
- id_key = tuple(scenario[id_var] for id_var in id_vars)
135
-
136
- # If this combination of id values hasn't been seen before, initialize it
137
- if id_key not in pivoted_dict:
138
- pivoted_dict[id_key] = {id_var: scenario[id_var] for id_var in id_vars}
139
-
140
- # Add the variable-value pair to the dict
141
- variable = scenario[var_name]
142
- value = scenario[value_name]
143
- pivoted_dict[id_key][variable] = value
144
-
145
- # Convert the dict of dicts to a list of Scenarios
146
- pivoted_scenarios = [
147
- Scenario(dict(zip(id_vars, id_key), **values))
148
- for id_key, values in pivoted_dict.items()
149
- ]
150
-
151
- return ScenarioList(pivoted_scenarios)
152
-
153
- def group_by(self, id_vars, variables, func):
154
- """
155
- Group the ScenarioList by id_vars and apply a function to the specified variables.
156
-
157
- Parameters:
158
- id_vars (list): Fields to use as identifier variables for grouping
159
- variables (list): Fields to pass to the aggregation function
160
- func (callable): Function to apply to the grouped variables.
161
- Should accept lists of values for each variable.
162
-
163
- Returns:
164
- ScenarioList: A new ScenarioList with the grouped and aggregated results
165
-
166
- Example:
167
- >>> def avg_sum(a, b):
168
- ... return {'avg_a': sum(a) / len(a), 'sum_b': sum(b)}
169
- >>> s = ScenarioList([
170
- ... Scenario({'group': 'A', 'year': 2020, 'a': 10, 'b': 20}),
171
- ... Scenario({'group': 'A', 'year': 2021, 'a': 15, 'b': 25}),
172
- ... Scenario({'group': 'B', 'year': 2020, 'a': 12, 'b': 22}),
173
- ... Scenario({'group': 'B', 'year': 2021, 'a': 17, 'b': 27})
174
- ... ])
175
- >>> s.group_by(id_vars=['group'], variables=['a', 'b'], func=avg_sum)
176
- ScenarioList([Scenario({'group': 'A', 'avg_a': 12.5, 'sum_b': 45}), Scenario({'group': 'B', 'avg_a': 14.5, 'sum_b': 49})])
177
- """
178
- # Check if the function is compatible with the specified variables
179
- func_params = inspect.signature(func).parameters
180
- if len(func_params) != len(variables):
181
- raise ValueError(
182
- f"Function {func.__name__} expects {len(func_params)} arguments, but {len(variables)} variables were provided"
183
- )
184
-
185
- # Group the scenarios
186
- grouped = defaultdict(lambda: defaultdict(list))
187
- for scenario in self:
188
- key = tuple(scenario[id_var] for id_var in id_vars)
189
- for var in variables:
190
- grouped[key][var].append(scenario[var])
191
-
192
- # Apply the function to each group
193
- result = []
194
- for key, group in grouped.items():
195
- try:
196
- aggregated = func(*[group[var] for var in variables])
197
- except Exception as e:
198
- raise ValueError(f"Error applying function to group {key}: {str(e)}")
199
-
200
- if not isinstance(aggregated, dict):
201
- raise ValueError(f"Function {func.__name__} must return a dictionary")
202
-
203
- new_scenario = dict(zip(id_vars, key))
204
- new_scenario.update(aggregated)
205
- result.append(Scenario(new_scenario))
206
-
207
- return ScenarioList(result)
208
32
 
209
33
  @property
210
34
  def parameters(self) -> set:
@@ -282,10 +106,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
282
106
  for s in data["scenarios"]:
283
107
  _ = s.pop("edsl_version")
284
108
  _ = s.pop("edsl_class_name")
285
- for scenario in data["scenarios"]:
286
- for key, value in scenario.items():
287
- if hasattr(value, "to_dict"):
288
- data[key] = value.to_dict()
289
109
  return data_to_html(data)
290
110
 
291
111
  def tally(self, field) -> dict:
@@ -333,71 +153,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
333
153
  new_scenarios.append(new_scenario)
334
154
  return ScenarioList(new_scenarios)
335
155
 
336
- def concatenate(self, fields: List[str], separator: str = ";") -> "ScenarioList":
337
- """Concatenate specified fields into a single field.
338
-
339
- Args:
340
- fields (List[str]): List of field names to concatenate.
341
- separator (str, optional): Separator to use between field values. Defaults to ";".
342
-
343
- Returns:
344
- ScenarioList: A new ScenarioList with concatenated fields.
345
-
346
- Example:
347
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
348
- >>> s.concatenate(['a', 'b', 'c'])
349
- ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
350
- """
351
- new_scenarios = []
352
- for scenario in self:
353
- new_scenario = scenario.copy()
354
- concat_values = []
355
- for field in fields:
356
- if field in new_scenario:
357
- concat_values.append(str(new_scenario[field]))
358
- del new_scenario[field]
359
-
360
- new_field_name = f"concat_{'_'.join(fields)}"
361
- new_scenario[new_field_name] = separator.join(concat_values)
362
- new_scenarios.append(new_scenario)
363
-
364
- return ScenarioList(new_scenarios)
365
-
366
- def unpack_dict(
367
- self, field: str, prefix: Optional[str] = None, drop_field: bool = False
368
- ) -> ScenarioList:
369
- """Unpack a dictionary field into separate fields.
370
-
371
- Example:
372
-
373
- >>> s = ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}})])
374
- >>> s.unpack_dict('b')
375
- ScenarioList([Scenario({'a': 1, 'b': {'c': 2, 'd': 3}, 'c': 2, 'd': 3})])
376
- """
377
- new_scenarios = []
378
- for scenario in self:
379
- new_scenario = scenario.copy()
380
- for key, value in scenario[field].items():
381
- if prefix:
382
- new_scenario[prefix + key] = value
383
- else:
384
- new_scenario[key] = value
385
- if drop_field:
386
- new_scenario.pop(field)
387
- new_scenarios.append(new_scenario)
388
- return ScenarioList(new_scenarios)
389
-
390
- def transform(
391
- self, field: str, func: Callable, new_name: Optional[str] = None
392
- ) -> ScenarioList:
393
- """Transform a field using a function."""
394
- new_scenarios = []
395
- for scenario in self:
396
- new_scenario = scenario.copy()
397
- new_scenario[new_name or field] = func(scenario[field])
398
- new_scenarios.append(new_scenario)
399
- return ScenarioList(new_scenarios)
400
-
401
156
  def mutate(
402
157
  self, new_var_string: str, functions_dict: Optional[dict[str, Callable]] = None
403
158
  ) -> ScenarioList:
@@ -555,19 +310,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
555
310
  data = [{key: [scenario[key] for scenario in self.data]} for key in keys]
556
311
  return Dataset(data)
557
312
 
558
- def split(
559
- self, field: str, split_on: str, index: int, new_name: Optional[str] = None
560
- ) -> ScenarioList:
561
- """Split a scenario fiel in multiple fields."""
562
- if new_name is None:
563
- new_name = field + "_split_" + str(index)
564
- new_scenarios = []
565
- for scenario in self:
566
- new_scenario = scenario.copy()
567
- new_scenario[new_name] = scenario[field].split(split_on)[index]
568
- new_scenarios.append(new_scenario)
569
- return ScenarioList(new_scenarios)
570
-
571
313
  def add_list(self, name, values) -> ScenarioList:
572
314
  """Add a list of values to a ScenarioList.
573
315
 
@@ -650,6 +392,37 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
650
392
 
651
393
  return ScenarioList([Scenario(entry) for entry in processed_lines])
652
394
 
395
+ @classmethod
396
+ def from_docx(cls, docx_file_path: str):
397
+ from docx import Document
398
+
399
+ doc = Document(docx_file_path)
400
+ lines = []
401
+
402
+ # Extract text from paragraphs, treating each paragraph as a line
403
+ for para in doc.paragraphs:
404
+ lines.extend(para.text.splitlines())
405
+
406
+ processed_lines = []
407
+ non_blank_lines = [
408
+ (i, line.strip()) for i, line in enumerate(lines) if line.strip()
409
+ ]
410
+
411
+ for index, (line_no, text) in enumerate(non_blank_lines):
412
+ entry = {
413
+ "line_no": line_no + 1, # Using 1-based index for line numbers
414
+ "text": text,
415
+ "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
416
+ "line_after": (
417
+ non_blank_lines[index + 1][1]
418
+ if index < len(non_blank_lines) - 1
419
+ else None
420
+ ),
421
+ }
422
+ processed_lines.append(entry)
423
+
424
+ return ScenarioList([Scenario(entry) for entry in processed_lines])
425
+
653
426
  @classmethod
654
427
  def from_google_doc(cls, url: str) -> ScenarioList:
655
428
  """Create a ScenarioList from a Google Doc.
@@ -700,62 +473,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
700
473
  """
701
474
  return cls([Scenario(row) for row in df.to_dict(orient="records")])
702
475
 
703
- @classmethod
704
- def from_wikipedia(cls, url: str, table_index: int = 0):
705
- """
706
- Extracts a table from a Wikipedia page.
707
-
708
- Parameters:
709
- url (str): The URL of the Wikipedia page.
710
- table_index (int): The index of the table to extract (default is 0).
711
-
712
- Returns:
713
- pd.DataFrame: A DataFrame containing the extracted table.
714
- # # Example usage
715
- # url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
716
- # df = from_wikipedia(url, 0)
717
-
718
- # if not df.empty:
719
- # print(df.head())
720
- # else:
721
- # print("Failed to extract table.")
722
-
723
-
724
- """
725
- import pandas as pd
726
- import requests
727
- from requests.exceptions import RequestException
728
-
729
- try:
730
- # Check if the URL is reachable
731
- response = requests.get(url)
732
- response.raise_for_status() # Raises HTTPError for bad responses
733
-
734
- # Extract tables from the Wikipedia page
735
- tables = pd.read_html(url)
736
-
737
- # Ensure the requested table index is within the range of available tables
738
- if table_index >= len(tables) or table_index < 0:
739
- raise IndexError(
740
- f"Table index {table_index} is out of range. This page has {len(tables)} table(s)."
741
- )
742
-
743
- # Return the requested table as a DataFrame
744
- # return tables[table_index]
745
- return cls.from_pandas(tables[table_index])
746
-
747
- except RequestException as e:
748
- print(f"Error fetching the URL: {e}")
749
- except ValueError as e:
750
- print(f"Error parsing tables: {e}")
751
- except IndexError as e:
752
- print(e)
753
- except Exception as e:
754
- print(f"An unexpected error occurred: {e}")
755
-
756
- # Return an empty DataFrame in case of an error
757
- # return cls.from_pandas(pd.DataFrame())
758
-
759
476
  def to_key_value(self, field: str, value=None) -> Union[dict, set]:
760
477
  """Return the set of values in the field.
761
478
 
@@ -877,15 +594,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
877
594
  return cls.from_excel(temp_filename, sheet_name=sheet_name)
878
595
 
879
596
  @classmethod
880
- def from_csv(cls, source: Union[str, urllib.parse.ParseResult]) -> ScenarioList:
881
- """Create a ScenarioList from a CSV file or URL.
882
-
883
- Args:
884
- source: A string representing either a local file path or a URL to a CSV file,
885
- or a urllib.parse.ParseResult object for a URL.
886
-
887
- Returns:
888
- ScenarioList: A ScenarioList object containing the data from the CSV.
597
+ def from_csv(cls, filename: str) -> ScenarioList:
598
+ """Create a ScenarioList from a CSV file.
889
599
 
890
600
  Example:
891
601
 
@@ -901,37 +611,15 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
901
611
  'Alice'
902
612
  >>> scenario_list[1]['age']
903
613
  '25'
904
-
905
- >>> url = "https://example.com/data.csv"
906
- >>> ## scenario_list_from_url = ScenarioList.from_csv(url)
907
614
  """
908
615
  from edsl.scenarios.Scenario import Scenario
909
616
 
910
- def is_url(source):
911
- try:
912
- result = urllib.parse.urlparse(source)
913
- return all([result.scheme, result.netloc])
914
- except ValueError:
915
- return False
916
-
917
- if isinstance(source, str) and is_url(source):
918
- with urllib.request.urlopen(source) as response:
919
- csv_content = response.read().decode("utf-8")
920
- csv_file = StringIO(csv_content)
921
- elif isinstance(source, urllib.parse.ParseResult):
922
- with urllib.request.urlopen(source.geturl()) as response:
923
- csv_content = response.read().decode("utf-8")
924
- csv_file = StringIO(csv_content)
925
- else:
926
- csv_file = open(source, "r")
927
-
928
- try:
929
- reader = csv.reader(csv_file)
617
+ observations = []
618
+ with open(filename, "r") as f:
619
+ reader = csv.reader(f)
930
620
  header = next(reader)
931
- observations = [Scenario(dict(zip(header, row))) for row in reader]
932
- finally:
933
- csv_file.close()
934
-
621
+ for row in reader:
622
+ observations.append(Scenario(dict(zip(header, row))))
935
623
  return cls(observations)
936
624
 
937
625
  def _to_dict(self, sort=False) -> dict:
@@ -975,16 +663,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
975
663
 
976
664
  return cls([Scenario.from_dict(s) for s in data["scenarios"]])
977
665
 
978
- @classmethod
979
- def from_nested_dict(cls, data: dict) -> ScenarioList:
980
- """Create a `ScenarioList` from a nested dictionary."""
981
- from edsl.scenarios.Scenario import Scenario
982
-
983
- s = ScenarioList()
984
- for key, value in data.items():
985
- s.add_list(key, value)
986
- return s
987
-
988
666
  def code(self) -> str:
989
667
  ## TODO: Refactor to only use the questions actually in the survey
990
668
  """Create the Python code representation of a survey."""
@@ -41,12 +41,3 @@ class ScenarioListExportMixin(DatasetExportMixin):
41
41
  def __init_subclass__(cls, **kwargs):
42
42
  super().__init_subclass__(**kwargs)
43
43
  decorate_methods_from_mixin(cls, DatasetExportMixin)
44
-
45
- def to_docx(self, filename: str):
46
- """Export the ScenarioList to a .docx file."""
47
- dataset = self.to_dataset()
48
- from edsl.results.DatasetTree import Tree
49
-
50
- tree = Tree(dataset)
51
- tree.construct_tree()
52
- tree.to_docx(filename)
@@ -1,161 +1,15 @@
1
1
  import fitz # PyMuPDF
2
2
  import os
3
- import copy
4
3
  import subprocess
5
- import requests
6
- import tempfile
7
- import os
8
-
9
- # import urllib.parse as urlparse
10
- from urllib.parse import urlparse
11
4
 
12
5
  # from edsl import Scenario
13
6
 
14
- import requests
15
- import re
16
- import tempfile
17
- import os
18
- import atexit
19
- from urllib.parse import urlparse, parse_qs
20
-
21
-
22
- class GoogleDriveDownloader:
23
- _temp_dir = None
24
- _temp_file_path = None
25
-
26
- @classmethod
27
- def fetch_from_drive(cls, url, filename=None):
28
- # Extract file ID from the URL
29
- file_id = cls._extract_file_id(url)
30
- if not file_id:
31
- raise ValueError("Invalid Google Drive URL")
32
-
33
- # Construct the download URL
34
- download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
35
-
36
- # Send a GET request to the URL
37
- session = requests.Session()
38
- response = session.get(download_url, stream=True)
39
- response.raise_for_status()
40
-
41
- # Check for large file download prompt
42
- for key, value in response.cookies.items():
43
- if key.startswith("download_warning"):
44
- params = {"id": file_id, "confirm": value}
45
- response = session.get(download_url, params=params, stream=True)
46
- break
47
-
48
- # Create a temporary file to save the download
49
- if not filename:
50
- filename = "downloaded_file"
51
-
52
- if cls._temp_dir is None:
53
- cls._temp_dir = tempfile.TemporaryDirectory()
54
- atexit.register(cls._cleanup)
55
-
56
- cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
57
-
58
- # Write the content to the temporary file
59
- with open(cls._temp_file_path, "wb") as f:
60
- for chunk in response.iter_content(32768):
61
- if chunk:
62
- f.write(chunk)
63
-
64
- print(f"File saved to: {cls._temp_file_path}")
65
-
66
- return cls._temp_file_path
67
-
68
- @staticmethod
69
- def _extract_file_id(url):
70
- # Try to extract file ID from '/file/d/' format
71
- file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
72
- if file_id_match:
73
- return file_id_match.group(1)
74
-
75
- # If not found, try to extract from 'open?id=' format
76
- parsed_url = urlparse(url)
77
- query_params = parse_qs(parsed_url.query)
78
- if "id" in query_params:
79
- return query_params["id"][0]
80
-
81
- return None
82
-
83
- @classmethod
84
- def _cleanup(cls):
85
- if cls._temp_dir:
86
- cls._temp_dir.cleanup()
87
-
88
- @classmethod
89
- def get_temp_file_path(cls):
90
- return cls._temp_file_path
91
-
92
-
93
- def fetch_and_save_pdf(url, filename):
94
- # Send a GET request to the URL
95
- response = requests.get(url)
96
-
97
- # Check if the request was successful
98
- response.raise_for_status()
99
-
100
- # Create a temporary directory
101
- with tempfile.TemporaryDirectory() as temp_dir:
102
- # Construct the full path for the file
103
- temp_file_path = os.path.join(temp_dir, filename)
104
-
105
- # Write the content to the temporary file
106
- with open(temp_file_path, "wb") as file:
107
- file.write(response.content)
108
-
109
- print(f"PDF saved to: {temp_file_path}")
110
-
111
- # Here you can perform operations with the file
112
- # The file will be automatically deleted when you exit this block
113
-
114
- return temp_file_path
115
-
116
-
117
- # Example usage:
118
- # url = "https://example.com/sample.pdf"
119
- # fetch_and_save_pdf(url, "sample.pdf")
120
-
121
7
 
122
8
  class ScenarioListPdfMixin:
123
9
  @classmethod
124
- def from_pdf(cls, filename_or_url, collapse_pages=False):
125
- # Check if the input is a URL
126
- if cls.is_url(filename_or_url):
127
- # Check if it's a Google Drive URL
128
- if "drive.google.com" in filename_or_url:
129
- temp_filename = GoogleDriveDownloader.fetch_from_drive(
130
- filename_or_url, "temp_pdf.pdf"
131
- )
132
- else:
133
- # For other URLs, use the previous fetch_and_save_pdf function
134
- temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
135
-
136
- scenarios = list(cls.extract_text_from_pdf(temp_filename))
137
- else:
138
- # If it's not a URL, assume it's a local file path
139
- scenarios = list(cls.extract_text_from_pdf(filename_or_url))
140
- if not collapse_pages:
141
- return cls(scenarios)
142
- else:
143
- txt = ""
144
- for scenario in scenarios:
145
- txt += scenario["text"]
146
- from edsl.scenarios import Scenario
147
-
148
- base_scenario = copy.copy(scenarios[0])
149
- base_scenario["text"] = txt
150
- return base_scenario
151
-
152
- @staticmethod
153
- def is_url(string):
154
- try:
155
- result = urlparse(string)
156
- return all([result.scheme, result.netloc])
157
- except ValueError:
158
- return False
10
+ def from_pdf(cls, filename):
11
+ scenarios = list(cls.extract_text_from_pdf(filename))
12
+ return cls(scenarios)
159
13
 
160
14
  @classmethod
161
15
  def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
@@ -184,7 +38,7 @@ class ScenarioListPdfMixin:
184
38
  scenario = Scenario._from_filepath_image(image_path)
185
39
  scenarios.append(scenario)
186
40
 
187
- # print(f"Saved {len(images)} pages as images in {output_folder}")
41
+ print(f"Saved {len(images)} pages as images in {output_folder}")
188
42
  return cls(scenarios)
189
43
 
190
44
  @staticmethod
edsl/study/SnapShot.py CHANGED
@@ -57,17 +57,10 @@ class SnapShot:
57
57
  from edsl.Base import Base
58
58
  from edsl.study.Study import Study
59
59
 
60
- def is_edsl_object(obj):
61
- package_name = "edsl"
62
- cls = obj.__class__
63
- module_name = cls.__module__
64
- return module_name.startswith(package_name)
65
-
66
60
  for name, value in namespace.items():
67
61
  # TODO check this code logic (if there are other objects with to_dict method that are not from edsl)
68
62
  if (
69
- is_edsl_object(value)
70
- and hasattr(value, "to_dict")
63
+ hasattr(value, "to_dict")
71
64
  and not inspect.isclass(value)
72
65
  and value.__class__ not in [o.__class__ for o in self.exclude]
73
66
  ):