edsl 0.1.39.dev1__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. edsl/Base.py +169 -116
  2. edsl/__init__.py +14 -6
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +358 -146
  5. edsl/agents/AgentList.py +211 -73
  6. edsl/agents/Invigilator.py +88 -36
  7. edsl/agents/InvigilatorBase.py +59 -70
  8. edsl/agents/PromptConstructor.py +117 -219
  9. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  10. edsl/agents/QuestionOptionProcessor.py +172 -0
  11. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  12. edsl/agents/__init__.py +0 -1
  13. edsl/agents/prompt_helpers.py +3 -3
  14. edsl/config.py +22 -2
  15. edsl/conversation/car_buying.py +2 -1
  16. edsl/coop/CoopFunctionsMixin.py +15 -0
  17. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  18. edsl/coop/PriceFetcher.py +1 -1
  19. edsl/coop/coop.py +104 -42
  20. edsl/coop/utils.py +14 -14
  21. edsl/data/Cache.py +21 -14
  22. edsl/data/CacheEntry.py +12 -15
  23. edsl/data/CacheHandler.py +33 -12
  24. edsl/data/__init__.py +4 -3
  25. edsl/data_transfer_models.py +2 -1
  26. edsl/enums.py +20 -0
  27. edsl/exceptions/__init__.py +50 -50
  28. edsl/exceptions/agents.py +12 -0
  29. edsl/exceptions/inference_services.py +5 -0
  30. edsl/exceptions/questions.py +24 -6
  31. edsl/exceptions/scenarios.py +7 -0
  32. edsl/inference_services/AnthropicService.py +0 -3
  33. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  34. edsl/inference_services/AvailableModelFetcher.py +209 -0
  35. edsl/inference_services/AwsBedrock.py +0 -2
  36. edsl/inference_services/AzureAI.py +0 -2
  37. edsl/inference_services/GoogleService.py +2 -11
  38. edsl/inference_services/InferenceServiceABC.py +18 -85
  39. edsl/inference_services/InferenceServicesCollection.py +105 -80
  40. edsl/inference_services/MistralAIService.py +0 -3
  41. edsl/inference_services/OpenAIService.py +1 -4
  42. edsl/inference_services/PerplexityService.py +0 -3
  43. edsl/inference_services/ServiceAvailability.py +135 -0
  44. edsl/inference_services/TestService.py +11 -8
  45. edsl/inference_services/data_structures.py +62 -0
  46. edsl/jobs/AnswerQuestionFunctionConstructor.py +188 -0
  47. edsl/jobs/Answers.py +1 -14
  48. edsl/jobs/FetchInvigilator.py +40 -0
  49. edsl/jobs/InterviewTaskManager.py +98 -0
  50. edsl/jobs/InterviewsConstructor.py +48 -0
  51. edsl/jobs/Jobs.py +102 -243
  52. edsl/jobs/JobsChecks.py +35 -10
  53. edsl/jobs/JobsComponentConstructor.py +189 -0
  54. edsl/jobs/JobsPrompts.py +5 -3
  55. edsl/jobs/JobsRemoteInferenceHandler.py +128 -80
  56. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  57. edsl/jobs/RequestTokenEstimator.py +30 -0
  58. edsl/jobs/buckets/BucketCollection.py +44 -3
  59. edsl/jobs/buckets/TokenBucket.py +53 -21
  60. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  61. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  62. edsl/jobs/decorators.py +35 -0
  63. edsl/jobs/interviews/Interview.py +77 -380
  64. edsl/jobs/jobs_status_enums.py +9 -0
  65. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  66. edsl/jobs/runners/JobsRunnerAsyncio.py +4 -49
  67. edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
  68. edsl/jobs/tasks/TaskHistory.py +14 -15
  69. edsl/jobs/tasks/task_status_enum.py +0 -2
  70. edsl/language_models/ComputeCost.py +63 -0
  71. edsl/language_models/LanguageModel.py +137 -234
  72. edsl/language_models/ModelList.py +11 -13
  73. edsl/language_models/PriceManager.py +127 -0
  74. edsl/language_models/RawResponseHandler.py +106 -0
  75. edsl/language_models/ServiceDataSources.py +0 -0
  76. edsl/language_models/__init__.py +0 -1
  77. edsl/language_models/key_management/KeyLookup.py +63 -0
  78. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  79. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  80. edsl/language_models/key_management/__init__.py +0 -0
  81. edsl/language_models/key_management/models.py +131 -0
  82. edsl/language_models/registry.py +49 -59
  83. edsl/language_models/repair.py +2 -2
  84. edsl/language_models/utilities.py +5 -4
  85. edsl/notebooks/Notebook.py +19 -14
  86. edsl/notebooks/NotebookToLaTeX.py +142 -0
  87. edsl/prompts/Prompt.py +29 -39
  88. edsl/questions/AnswerValidatorMixin.py +47 -2
  89. edsl/questions/ExceptionExplainer.py +77 -0
  90. edsl/questions/HTMLQuestion.py +103 -0
  91. edsl/questions/LoopProcessor.py +149 -0
  92. edsl/questions/QuestionBase.py +37 -192
  93. edsl/questions/QuestionBaseGenMixin.py +52 -48
  94. edsl/questions/QuestionBasePromptsMixin.py +7 -3
  95. edsl/questions/QuestionCheckBox.py +1 -1
  96. edsl/questions/QuestionExtract.py +1 -1
  97. edsl/questions/QuestionFreeText.py +1 -2
  98. edsl/questions/QuestionList.py +3 -5
  99. edsl/questions/QuestionMatrix.py +265 -0
  100. edsl/questions/QuestionMultipleChoice.py +66 -22
  101. edsl/questions/QuestionNumerical.py +1 -3
  102. edsl/questions/QuestionRank.py +6 -16
  103. edsl/questions/ResponseValidatorABC.py +37 -11
  104. edsl/questions/ResponseValidatorFactory.py +28 -0
  105. edsl/questions/SimpleAskMixin.py +4 -3
  106. edsl/questions/__init__.py +1 -0
  107. edsl/questions/derived/QuestionLinearScale.py +6 -3
  108. edsl/questions/derived/QuestionTopK.py +1 -1
  109. edsl/questions/descriptors.py +17 -3
  110. edsl/questions/question_registry.py +1 -1
  111. edsl/questions/templates/matrix/__init__.py +1 -0
  112. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  113. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  114. edsl/results/CSSParameterizer.py +1 -1
  115. edsl/results/Dataset.py +170 -7
  116. edsl/results/DatasetExportMixin.py +224 -302
  117. edsl/results/DatasetTree.py +28 -8
  118. edsl/results/MarkdownToDocx.py +122 -0
  119. edsl/results/MarkdownToPDF.py +111 -0
  120. edsl/results/Result.py +192 -206
  121. edsl/results/Results.py +120 -113
  122. edsl/results/ResultsExportMixin.py +2 -0
  123. edsl/results/Selector.py +23 -13
  124. edsl/results/TableDisplay.py +98 -171
  125. edsl/results/TextEditor.py +50 -0
  126. edsl/results/__init__.py +1 -1
  127. edsl/results/smart_objects.py +96 -0
  128. edsl/results/table_data_class.py +12 -0
  129. edsl/results/table_renderers.py +118 -0
  130. edsl/scenarios/ConstructDownloadLink.py +109 -0
  131. edsl/scenarios/DirectoryScanner.py +96 -0
  132. edsl/scenarios/DocumentChunker.py +102 -0
  133. edsl/scenarios/DocxScenario.py +16 -0
  134. edsl/scenarios/FileStore.py +118 -239
  135. edsl/scenarios/PdfExtractor.py +40 -0
  136. edsl/scenarios/Scenario.py +90 -193
  137. edsl/scenarios/ScenarioHtmlMixin.py +4 -3
  138. edsl/scenarios/ScenarioJoin.py +10 -6
  139. edsl/scenarios/ScenarioList.py +383 -240
  140. edsl/scenarios/ScenarioListExportMixin.py +0 -7
  141. edsl/scenarios/ScenarioListPdfMixin.py +15 -37
  142. edsl/scenarios/ScenarioSelector.py +156 -0
  143. edsl/scenarios/__init__.py +1 -2
  144. edsl/scenarios/file_methods.py +85 -0
  145. edsl/scenarios/handlers/__init__.py +13 -0
  146. edsl/scenarios/handlers/csv.py +38 -0
  147. edsl/scenarios/handlers/docx.py +76 -0
  148. edsl/scenarios/handlers/html.py +37 -0
  149. edsl/scenarios/handlers/json.py +111 -0
  150. edsl/scenarios/handlers/latex.py +5 -0
  151. edsl/scenarios/handlers/md.py +51 -0
  152. edsl/scenarios/handlers/pdf.py +68 -0
  153. edsl/scenarios/handlers/png.py +39 -0
  154. edsl/scenarios/handlers/pptx.py +105 -0
  155. edsl/scenarios/handlers/py.py +294 -0
  156. edsl/scenarios/handlers/sql.py +313 -0
  157. edsl/scenarios/handlers/sqlite.py +149 -0
  158. edsl/scenarios/handlers/txt.py +33 -0
  159. edsl/study/ObjectEntry.py +1 -1
  160. edsl/study/SnapShot.py +1 -1
  161. edsl/study/Study.py +5 -12
  162. edsl/surveys/ConstructDAG.py +92 -0
  163. edsl/surveys/EditSurvey.py +221 -0
  164. edsl/surveys/InstructionHandler.py +100 -0
  165. edsl/surveys/MemoryManagement.py +72 -0
  166. edsl/surveys/Rule.py +5 -4
  167. edsl/surveys/RuleCollection.py +25 -27
  168. edsl/surveys/RuleManager.py +172 -0
  169. edsl/surveys/Simulator.py +75 -0
  170. edsl/surveys/Survey.py +199 -771
  171. edsl/surveys/SurveyCSS.py +20 -8
  172. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
  173. edsl/surveys/SurveyToApp.py +141 -0
  174. edsl/surveys/__init__.py +4 -2
  175. edsl/surveys/descriptors.py +6 -2
  176. edsl/surveys/instructions/ChangeInstruction.py +1 -2
  177. edsl/surveys/instructions/Instruction.py +4 -13
  178. edsl/surveys/instructions/InstructionCollection.py +11 -6
  179. edsl/templates/error_reporting/interview_details.html +1 -1
  180. edsl/templates/error_reporting/report.html +1 -1
  181. edsl/tools/plotting.py +1 -1
  182. edsl/utilities/PrettyList.py +56 -0
  183. edsl/utilities/is_notebook.py +18 -0
  184. edsl/utilities/is_valid_variable_name.py +11 -0
  185. edsl/utilities/remove_edsl_version.py +24 -0
  186. edsl/utilities/utilities.py +35 -23
  187. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +12 -10
  188. edsl-0.1.39.dev2.dist-info/RECORD +352 -0
  189. edsl/language_models/KeyLookup.py +0 -30
  190. edsl/language_models/unused/ReplicateBase.py +0 -83
  191. edsl/results/ResultsDBMixin.py +0 -238
  192. edsl-0.1.39.dev1.dist-info/RECORD +0 -277
  193. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
  194. {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +0 -0
@@ -27,13 +27,6 @@ def decorate_methods_from_mixin(cls, mixin_cls):
27
27
  return cls
28
28
 
29
29
 
30
- # def decorate_all_methods(cls):
31
- # for attr_name, attr_value in cls.__dict__.items():
32
- # if callable(attr_value):
33
- # setattr(cls, attr_name, to_dataset(attr_value))
34
- # return cls
35
-
36
-
37
30
  # @decorate_all_methods
38
31
  class ScenarioListExportMixin(DatasetExportMixin):
39
32
  """Mixin class for exporting Results objects."""
@@ -1,22 +1,9 @@
1
- import fitz # PyMuPDF
2
1
  import os
3
- import copy
4
- import subprocess
5
- import requests
6
- import tempfile
7
- import os
8
-
9
- # import urllib.parse as urlparse
10
- from urllib.parse import urlparse
11
-
12
- # from edsl import Scenario
13
-
14
- import requests
15
2
  import re
16
- import tempfile
17
- import os
3
+ import copy
18
4
  import atexit
19
- from urllib.parse import urlparse, parse_qs
5
+ import tempfile
6
+ import subprocess
20
7
 
21
8
 
22
9
  class GoogleDriveDownloader:
@@ -25,6 +12,8 @@ class GoogleDriveDownloader:
25
12
 
26
13
  @classmethod
27
14
  def fetch_from_drive(cls, url, filename=None):
15
+ import requests
16
+
28
17
  # Extract file ID from the URL
29
18
  file_id = cls._extract_file_id(url)
30
19
  if not file_id:
@@ -67,6 +56,8 @@ class GoogleDriveDownloader:
67
56
 
68
57
  @staticmethod
69
58
  def _extract_file_id(url):
59
+ from urllib.parse import urlparse, parse_qs
60
+
70
61
  # Try to extract file ID from '/file/d/' format
71
62
  file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
72
63
  if file_id_match:
@@ -92,6 +83,8 @@ class GoogleDriveDownloader:
92
83
 
93
84
  def fetch_and_save_pdf(url, filename):
94
85
  # Send a GET request to the URL
86
+ import requests
87
+
95
88
  response = requests.get(url)
96
89
 
97
90
  # Check if the request was successful
@@ -114,11 +107,6 @@ def fetch_and_save_pdf(url, filename):
114
107
  return temp_file_path
115
108
 
116
109
 
117
- # Example usage:
118
- # url = "https://example.com/sample.pdf"
119
- # fetch_and_save_pdf(url, "sample.pdf")
120
-
121
-
122
110
  class ScenarioListPdfMixin:
123
111
  @classmethod
124
112
  def from_pdf(cls, filename_or_url, collapse_pages=False):
@@ -151,6 +139,8 @@ class ScenarioListPdfMixin:
151
139
 
152
140
  @staticmethod
153
141
  def is_url(string):
142
+ from urllib.parse import urlparse
143
+
154
144
  try:
155
145
  result = urlparse(string)
156
146
  return all([result.scheme, result.netloc])
@@ -189,7 +179,8 @@ class ScenarioListPdfMixin:
189
179
 
190
180
  @staticmethod
191
181
  def extract_text_from_pdf(pdf_path):
192
- from edsl import Scenario
182
+ from edsl.scenarios.Scenario import Scenario
183
+ import fitz # PyMuPDF
193
184
 
194
185
  # TODO: Add test case
195
186
  # Ensure the file exists
@@ -243,19 +234,6 @@ class ScenarioListPdfMixin:
243
234
 
244
235
 
245
236
  if __name__ == "__main__":
246
- pass
247
-
248
- # from edsl import ScenarioList
249
-
250
- # class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
251
- # pass
252
-
253
- # #ScenarioListNew.create_hello_world_pdf('hello_world')
254
- # #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
255
- # #print(scenarios)
237
+ import doctest
256
238
 
257
- # from edsl import ScenarioList, QuestionFreeText
258
- # homo_silicus = ScenarioList.from_pdf('w31122.pdf')
259
- # q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
260
- # results = q.by(homo_silicus).run(progress_bar = True)
261
- # results.select('scenario.page', 'answer.key_point').order_by('page').print()
239
+ doctest.testmod()
@@ -0,0 +1,156 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+
4
+ class ScenarioSelector:
5
+ """
6
+ A class for performing advanced field selection on ScenarioList objects,
7
+ including support for wildcard patterns.
8
+
9
+ Args:
10
+ scenario_list: The ScenarioList object to perform selections on
11
+
12
+ Examples:
13
+ >>> from edsl import Scenario, ScenarioList
14
+ >>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3}), Scenario({'test_1': 4, 'test_2': 5, 'other': 6})])
15
+ >>> selector = ScenarioSelector(scenarios)
16
+ >>> selector.select('test*')
17
+ ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
18
+ """
19
+
20
+ def __init__(self, scenario_list: "ScenarioList"):
21
+ """Initialize with a ScenarioList object."""
22
+ self.scenario_list = scenario_list
23
+ self.available_fields = (
24
+ list(scenario_list.data[0].keys()) if scenario_list.data else []
25
+ )
26
+
27
+ def _match_field_pattern(self, pattern: str, field: str) -> bool:
28
+ """
29
+ Checks if a field name matches a pattern with wildcards.
30
+ Supports '*' as wildcard at start or end of pattern.
31
+
32
+ Args:
33
+ pattern: The pattern to match against, may contain '*' at start or end
34
+ field: The field name to check
35
+
36
+ Examples:
37
+ >>> from edsl.scenarios import ScenarioList, Scenario
38
+ >>> selector = ScenarioSelector(ScenarioList([]))
39
+ >>> selector._match_field_pattern('test*', 'test_field')
40
+ True
41
+ >>> selector._match_field_pattern('*field', 'test_field')
42
+ True
43
+ >>> selector._match_field_pattern('test', 'test')
44
+ True
45
+ >>> selector._match_field_pattern('*test*', 'my_test_field')
46
+ True
47
+ """
48
+ if "*" not in pattern:
49
+ return pattern == field
50
+
51
+ if pattern.startswith("*") and pattern.endswith("*"):
52
+ return pattern[1:-1] in field
53
+ elif pattern.startswith("*"):
54
+ return field.endswith(pattern[1:])
55
+ elif pattern.endswith("*"):
56
+ return field.startswith(pattern[:-1])
57
+ return pattern == field
58
+
59
+ def _get_matching_fields(self, patterns: list[str]) -> list[str]:
60
+ """
61
+ Gets all fields that match any of the given patterns.
62
+
63
+ Args:
64
+ patterns: List of field patterns, may contain wildcards
65
+
66
+ Returns:
67
+ List of field names that match at least one pattern
68
+
69
+ Examples:
70
+ >>> from edsl import Scenario, ScenarioList
71
+ >>> scenarios = ScenarioList([
72
+ ... Scenario({'test_1': 1, 'test_2': 2, 'other': 3})
73
+ ... ])
74
+ >>> selector = ScenarioSelector(scenarios)
75
+ >>> selector._get_matching_fields(['test*'])
76
+ ['test_1', 'test_2']
77
+ """
78
+ matching_fields = set()
79
+ for pattern in patterns:
80
+ matches = [
81
+ field
82
+ for field in self.available_fields
83
+ if self._match_field_pattern(pattern, field)
84
+ ]
85
+ matching_fields.update(matches)
86
+ return sorted(list(matching_fields))
87
+
88
+ def select(self, *fields) -> "ScenarioList":
89
+ """
90
+ Selects scenarios with only the referenced fields.
91
+ Supports wildcard patterns using '*' at the start or end of field names.
92
+
93
+ Args:
94
+ *fields: Field names or patterns to select. Patterns may include '*' for wildcards.
95
+
96
+ Returns:
97
+ A new ScenarioList containing only the matched fields.
98
+
99
+ Raises:
100
+ ValueError: If no fields match the given patterns.
101
+
102
+ Examples:
103
+ >>> from edsl import Scenario, ScenarioList
104
+ >>> scenarios = ScenarioList([
105
+ ... Scenario({'test_1': 1, 'test_2': 2, 'other': 3}),
106
+ ... Scenario({'test_1': 4, 'test_2': 5, 'other': 6})
107
+ ... ])
108
+ >>> selector = ScenarioSelector(scenarios)
109
+ >>> selector.select('test*') # Selects all fields starting with 'test'
110
+ ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
111
+ >>> selector.select('*_1') # Selects all fields ending with '_1'
112
+ ScenarioList([Scenario({'test_1': 1}), Scenario({'test_1': 4})])
113
+ >>> selector.select('test_1', '*_2') # Multiple patterns
114
+ ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
115
+ """
116
+ if not self.scenario_list.data:
117
+ return self.scenario_list.__class__([])
118
+
119
+ # Convert single string to list for consistent processing
120
+ patterns = list(fields)
121
+
122
+ # Get all fields that match the patterns
123
+ fields_to_select = self._get_matching_fields(patterns)
124
+
125
+ # If no fields match, raise an informative error
126
+ if not fields_to_select:
127
+ raise ValueError(
128
+ f"No fields matched the given patterns: {patterns}. "
129
+ f"Available fields are: {self.available_fields}"
130
+ )
131
+
132
+ return self.scenario_list.__class__(
133
+ [scenario.select(fields_to_select) for scenario in self.scenario_list.data]
134
+ )
135
+
136
+ def get_available_fields(self) -> list[str]:
137
+ """
138
+ Returns a list of all available fields in the ScenarioList.
139
+
140
+ Returns:
141
+ List of field names available for selection.
142
+
143
+ Examples:
144
+ >>> from edsl import Scenario, ScenarioList
145
+ >>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3})])
146
+ >>> selector = ScenarioSelector(scenarios)
147
+ >>> selector.get_available_fields()
148
+ ['other', 'test_1', 'test_2']
149
+ """
150
+ return sorted(self.available_fields)
151
+
152
+
153
+ if __name__ == "__main__":
154
+ import doctest
155
+
156
+ doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -1,4 +1,3 @@
1
1
  from edsl.scenarios.Scenario import Scenario
2
2
  from edsl.scenarios.ScenarioList import ScenarioList
3
-
4
- # from edsl.scenarios.FileStore import FileStore
3
+ from edsl.scenarios.FileStore import FileStore
@@ -0,0 +1,85 @@
1
+ from typing import Optional, Dict, Type
2
+ from abc import ABC, abstractmethod
3
+ import importlib.metadata
4
+ import importlib.util
5
+
6
+ from edsl.utilities.is_notebook import is_notebook
7
+
8
+
9
+ class FileMethods(ABC):
10
+ _handlers: Dict[str, Type["FileMethods"]] = {}
11
+
12
+ def __init__(self, path: Optional[str] = None):
13
+ self.path = path
14
+
15
+ def __init_subclass__(cls) -> None:
16
+ """Register subclasses automatically when they're defined."""
17
+ super().__init_subclass__()
18
+ if hasattr(cls, "suffix"):
19
+ FileMethods._handlers[cls.suffix] = cls
20
+
21
+ @classmethod
22
+ def get_handler(cls, suffix: str) -> Optional[Type["FileMethods"]]:
23
+ """Get the appropriate handler class for a given suffix."""
24
+ # Load plugins if they haven't been loaded yet
25
+ if not cls._handlers:
26
+ cls.load_plugins()
27
+ return cls._handlers.get(suffix.lower())
28
+
29
+ @classmethod
30
+ def load_plugins(cls):
31
+ """Load all file handler plugins including built-ins and external plugins."""
32
+
33
+ from edsl.scenarios import handlers
34
+
35
+ # Then load any external plugins
36
+ try:
37
+ entries = importlib.metadata.entry_points(group="file_handlers")
38
+ except TypeError: # some Python 3.9 bullshit
39
+ # entries = importlib.metadata.entry_points()
40
+ entries = []
41
+
42
+ for ep in entries:
43
+ try:
44
+ handler_class = ep.load()
45
+ # Registration happens automatically via __init_subclass__
46
+ except Exception as e:
47
+ print(f"Failed to load external handler {ep.name}: {e}")
48
+
49
+ @classmethod
50
+ def get_handler_for_path(cls, path: str) -> Optional[Type["FileMethods"]]:
51
+ """Get the appropriate handler class for a file path."""
52
+ suffix = path.split(".")[-1].lower() if "." in path else ""
53
+ return cls.get_handler(suffix)
54
+
55
+ @classmethod
56
+ def create(cls, path: str) -> Optional["FileMethods"]:
57
+ """Create an appropriate handler instance for the given path."""
58
+ handler_class = cls.get_handler_for_path(path)
59
+ if handler_class:
60
+ return handler_class(path)
61
+ return None
62
+
63
+ @classmethod
64
+ def supported_file_types(cls):
65
+ if not cls._handlers:
66
+ cls.load_plugins()
67
+ return list(cls._handlers.keys())
68
+
69
+ @abstractmethod
70
+ def view_system(self):
71
+ ...
72
+
73
+ @abstractmethod
74
+ def view_notebook(self):
75
+ ...
76
+
77
+ def view(self):
78
+ if is_notebook():
79
+ self.view_notebook()
80
+ else:
81
+ self.view_system()
82
+
83
+ @abstractmethod
84
+ def example(self):
85
+ ...
@@ -0,0 +1,13 @@
1
+ from .pdf import PdfMethods
2
+ from .docx import DocxMethods
3
+ from .png import PngMethods
4
+ from .txt import TxtMethods
5
+ from .html import HtmlMethods
6
+ from .md import MarkdownMethods
7
+ from .csv import CsvMethods
8
+ from .json import JsonMethods
9
+ from .sql import SqlMethods
10
+ from .pptx import PptxMethods
11
+ from .latex import LaTeXMethods
12
+ from .py import PyMethods
13
+ from .sqlite import SQLiteMethods
@@ -0,0 +1,38 @@
1
+ import tempfile
2
+ from edsl.scenarios.file_methods import FileMethods
3
+
4
+
5
+ class CsvMethods(FileMethods):
6
+ suffix = "csv"
7
+
8
+ def view_system(self):
9
+ import os
10
+ import subprocess
11
+
12
+ if os.path.exists(self.path):
13
+ try:
14
+ if (os_name := os.name) == "posix":
15
+ subprocess.run(["open", self.path], check=True) # macOS
16
+ elif os_name == "nt":
17
+ os.startfile(self.path) # Windows
18
+ else:
19
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
20
+ except Exception as e:
21
+ print(f"Error opening CSV: {e}")
22
+ else:
23
+ print("CSV file was not found.")
24
+
25
+ def view_notebook(self):
26
+ import pandas as pd
27
+ from IPython.display import display
28
+
29
+ df = pd.read_csv(self.path)
30
+ display(df)
31
+
32
+ def example(self):
33
+ import pandas as pd
34
+
35
+ df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
36
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as f:
37
+ df.to_csv(f.name, index=False)
38
+ return f.name
@@ -0,0 +1,76 @@
1
+ from edsl.scenarios.file_methods import FileMethods
2
+ import os
3
+ import tempfile
4
+
5
+
6
+ class DocxMethods(FileMethods):
7
+ suffix = "docx"
8
+
9
+ def extract_text(self):
10
+ from docx import Document
11
+
12
+ self.doc = Document(self.path)
13
+
14
+ # Extract all text
15
+ full_text = []
16
+ for para in self.doc.paragraphs:
17
+ full_text.append(para.text)
18
+
19
+ text = "\n".join(full_text)
20
+ return text
21
+
22
+ def view_system(self):
23
+ import os
24
+ import subprocess
25
+
26
+ if os.path.exists(self.path):
27
+ try:
28
+ if (os_name := os.name) == "posix":
29
+ subprocess.run(["open", self.path], check=True) # macOS
30
+ elif os_name == "nt":
31
+ os.startfile(self.path) # Windows
32
+ else:
33
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
34
+ except Exception as e:
35
+ print(f"Error opening DOCX: {e}")
36
+ else:
37
+ print("DOCX file was not found.")
38
+
39
+ def view_notebook(self):
40
+ import mammoth
41
+ from IPython.display import HTML, display
42
+
43
+ with open(self.path, "rb") as docx_file:
44
+ result = mammoth.convert_to_html(docx_file)
45
+ html = f"""
46
+ <div style="width: 800px; height: 800px; padding: 20px;
47
+ border: 1px solid #ccc; overflow-y: auto;">
48
+ {result.value}
49
+ </div>
50
+ """
51
+ display(HTML(html))
52
+
53
+ def example(self):
54
+ from docx import Document
55
+ from edsl.scenarios.Scenario import Scenario
56
+ from edsl.scenarios.ScenarioList import ScenarioList
57
+
58
+ os.makedirs("test_dir", exist_ok=True)
59
+ doc1 = Document()
60
+ _ = doc1.add_heading("First Survey")
61
+ doc1.save("test_dir/test1.docx")
62
+ doc2 = Document()
63
+ _ = doc2.add_heading("Second Survey")
64
+
65
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
66
+ doc2.save(tmp.name)
67
+ tmp.close()
68
+
69
+ return tmp.name
70
+
71
+
72
+ if __name__ == "__main__":
73
+ docx_temp = DocxMethods.example()
74
+ from edsl.scenarios.FileStore import FileStore
75
+
76
+ fs = FileStore(docx_temp)
@@ -0,0 +1,37 @@
1
+ from edsl.scenarios.file_methods import FileMethods
2
+ import tempfile
3
+
4
+
5
+ class HtmlMethods(FileMethods):
6
+ suffix = "html"
7
+
8
+ def view_system(self):
9
+ import webbrowser
10
+
11
+ # with open(self.path, "r") as f:
12
+ # html_string = f.read()
13
+
14
+ # html_path = self.to_tempfile()
15
+ # webbrowser.open("file://" + html_path)
16
+ webbrowser.open("file://" + self.path)
17
+
18
+ def view_notebook(self):
19
+ from IPython.display import IFrame, display
20
+
21
+ display(IFrame(self.path, width=800, height=800))
22
+
23
+ def example(self):
24
+ html_string = b"""
25
+ <html>
26
+ <head>
27
+ <title>Test</title>
28
+ </head>
29
+ <body>
30
+ <h1>Hello, World!</h1>
31
+ </body>
32
+ </html>
33
+ """
34
+
35
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
36
+ f.write(html_string)
37
+ return f.name
@@ -0,0 +1,111 @@
1
+ from edsl.scenarios.file_methods import FileMethods
2
+ import tempfile
3
+ import json
4
+ from typing import Optional, Dict, Any
5
+
6
+
7
+ class JsonMethods(FileMethods):
8
+ suffix = "json"
9
+
10
+ def view_system(self):
11
+ import os
12
+ import subprocess
13
+
14
+ if os.path.exists(self.path):
15
+ try:
16
+ if (os_name := os.name) == "posix":
17
+ subprocess.run(["open", self.path], check=True) # macOS
18
+ elif os_name == "nt":
19
+ os.startfile(self.path) # Windows
20
+ else:
21
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
22
+ except Exception as e:
23
+ print(f"Error opening JSON: {e}")
24
+ else:
25
+ print("JSON file was not found.")
26
+
27
+ def view_notebook(self):
28
+ from IPython.display import FileLink, JSON, display
29
+ import json
30
+
31
+ # Read and parse the JSON file
32
+ try:
33
+ with open(self.path, "r", encoding="utf-8") as f:
34
+ content = json.load(f)
35
+
36
+ # Display formatted JSON
37
+ display(JSON(content))
38
+
39
+ # Provide download link
40
+ display(FileLink(self.path))
41
+ except json.JSONDecodeError as e:
42
+ print(f"Error parsing JSON: {e}")
43
+ except Exception as e:
44
+ print(f"Error reading file: {e}")
45
+
46
+ def validate_json(self, schema: Optional[Dict[str, Any]] = None) -> bool:
47
+ """
48
+ Validate the JSON file against a schema if provided,
49
+ or check if it's valid JSON if no schema is provided.
50
+ """
51
+ try:
52
+ with open(self.path, "r", encoding="utf-8") as f:
53
+ content = json.load(f)
54
+
55
+ if schema is not None:
56
+ from jsonschema import validate
57
+
58
+ validate(instance=content, schema=schema)
59
+
60
+ return True
61
+ except json.JSONDecodeError as e:
62
+ print(f"Invalid JSON format: {e}")
63
+ return False
64
+ except Exception as e:
65
+ print(f"Validation error: {e}")
66
+ return False
67
+
68
+ def pretty_print(self):
69
+ """Pretty print the JSON content with proper indentation."""
70
+ try:
71
+ with open(self.path, "r", encoding="utf-8") as f:
72
+ content = json.load(f)
73
+
74
+ pretty_json = json.dumps(content, indent=2, sort_keys=True)
75
+ print(pretty_json)
76
+ except Exception as e:
77
+ print(f"Error pretty printing JSON: {e}")
78
+
79
+ def example(self):
80
+ sample_json = {
81
+ "person": {
82
+ "name": "John Doe",
83
+ "age": 30,
84
+ "contact": {"email": "john@example.com", "phone": "+1-555-555-5555"},
85
+ "interests": ["programming", "data science", "machine learning"],
86
+ "active": True,
87
+ "metadata": {"last_updated": "2024-01-01", "version": 1.0},
88
+ }
89
+ }
90
+
91
+ with tempfile.NamedTemporaryFile(
92
+ delete=False, suffix=".json", mode="w", encoding="utf-8"
93
+ ) as f:
94
+ json.dump(sample_json, f, indent=2)
95
+ return f.name
96
+
97
+ def format_file(self):
98
+ """Read, format, and write back the JSON with consistent formatting."""
99
+ try:
100
+ # Read the current content
101
+ with open(self.path, "r", encoding="utf-8") as f:
102
+ content = json.load(f)
103
+
104
+ # Write back with consistent formatting
105
+ with open(self.path, "w", encoding="utf-8") as f:
106
+ json.dump(content, f, indent=2, sort_keys=True)
107
+
108
+ return True
109
+ except Exception as e:
110
+ print(f"Error formatting JSON file: {e}")
111
+ return False
@@ -0,0 +1,5 @@
1
+ from edsl.scenarios.handlers.txt import TxtMethods
2
+
3
+
4
+ class LaTeXMethods(TxtMethods):
5
+ pass