edsl 0.1.38.dev4__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. edsl/Base.py +197 -116
  2. edsl/__init__.py +15 -7
  3. edsl/__version__.py +1 -1
  4. edsl/agents/Agent.py +351 -147
  5. edsl/agents/AgentList.py +211 -73
  6. edsl/agents/Invigilator.py +101 -50
  7. edsl/agents/InvigilatorBase.py +62 -70
  8. edsl/agents/PromptConstructor.py +143 -225
  9. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  10. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  11. edsl/agents/__init__.py +0 -1
  12. edsl/agents/prompt_helpers.py +3 -3
  13. edsl/agents/question_option_processor.py +172 -0
  14. edsl/auto/AutoStudy.py +18 -5
  15. edsl/auto/StageBase.py +53 -40
  16. edsl/auto/StageQuestions.py +2 -1
  17. edsl/auto/utilities.py +0 -6
  18. edsl/config.py +22 -2
  19. edsl/conversation/car_buying.py +2 -1
  20. edsl/coop/CoopFunctionsMixin.py +15 -0
  21. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  22. edsl/coop/PriceFetcher.py +1 -1
  23. edsl/coop/coop.py +125 -47
  24. edsl/coop/utils.py +14 -14
  25. edsl/data/Cache.py +45 -27
  26. edsl/data/CacheEntry.py +12 -15
  27. edsl/data/CacheHandler.py +31 -12
  28. edsl/data/RemoteCacheSync.py +154 -46
  29. edsl/data/__init__.py +4 -3
  30. edsl/data_transfer_models.py +2 -1
  31. edsl/enums.py +27 -0
  32. edsl/exceptions/__init__.py +50 -50
  33. edsl/exceptions/agents.py +12 -0
  34. edsl/exceptions/inference_services.py +5 -0
  35. edsl/exceptions/questions.py +24 -6
  36. edsl/exceptions/scenarios.py +7 -0
  37. edsl/inference_services/AnthropicService.py +38 -19
  38. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  39. edsl/inference_services/AvailableModelFetcher.py +215 -0
  40. edsl/inference_services/AwsBedrock.py +0 -2
  41. edsl/inference_services/AzureAI.py +0 -2
  42. edsl/inference_services/GoogleService.py +7 -12
  43. edsl/inference_services/InferenceServiceABC.py +18 -85
  44. edsl/inference_services/InferenceServicesCollection.py +120 -79
  45. edsl/inference_services/MistralAIService.py +0 -3
  46. edsl/inference_services/OpenAIService.py +47 -35
  47. edsl/inference_services/PerplexityService.py +0 -3
  48. edsl/inference_services/ServiceAvailability.py +135 -0
  49. edsl/inference_services/TestService.py +11 -10
  50. edsl/inference_services/TogetherAIService.py +5 -3
  51. edsl/inference_services/data_structures.py +134 -0
  52. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  53. edsl/jobs/Answers.py +1 -14
  54. edsl/jobs/FetchInvigilator.py +47 -0
  55. edsl/jobs/InterviewTaskManager.py +98 -0
  56. edsl/jobs/InterviewsConstructor.py +50 -0
  57. edsl/jobs/Jobs.py +356 -431
  58. edsl/jobs/JobsChecks.py +35 -10
  59. edsl/jobs/JobsComponentConstructor.py +189 -0
  60. edsl/jobs/JobsPrompts.py +6 -4
  61. edsl/jobs/JobsRemoteInferenceHandler.py +205 -133
  62. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  63. edsl/jobs/RequestTokenEstimator.py +30 -0
  64. edsl/jobs/async_interview_runner.py +138 -0
  65. edsl/jobs/buckets/BucketCollection.py +44 -3
  66. edsl/jobs/buckets/TokenBucket.py +53 -21
  67. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  68. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  69. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  70. edsl/jobs/data_structures.py +120 -0
  71. edsl/jobs/decorators.py +35 -0
  72. edsl/jobs/interviews/Interview.py +143 -408
  73. edsl/jobs/jobs_status_enums.py +9 -0
  74. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  75. edsl/jobs/results_exceptions_handler.py +98 -0
  76. edsl/jobs/runners/JobsRunnerAsyncio.py +88 -403
  77. edsl/jobs/runners/JobsRunnerStatus.py +133 -165
  78. edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
  79. edsl/jobs/tasks/TaskHistory.py +38 -18
  80. edsl/jobs/tasks/task_status_enum.py +0 -2
  81. edsl/language_models/ComputeCost.py +63 -0
  82. edsl/language_models/LanguageModel.py +194 -236
  83. edsl/language_models/ModelList.py +28 -19
  84. edsl/language_models/PriceManager.py +127 -0
  85. edsl/language_models/RawResponseHandler.py +106 -0
  86. edsl/language_models/ServiceDataSources.py +0 -0
  87. edsl/language_models/__init__.py +1 -2
  88. edsl/language_models/key_management/KeyLookup.py +63 -0
  89. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  90. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  91. edsl/language_models/key_management/__init__.py +0 -0
  92. edsl/language_models/key_management/models.py +131 -0
  93. edsl/language_models/model.py +256 -0
  94. edsl/language_models/repair.py +2 -2
  95. edsl/language_models/utilities.py +5 -4
  96. edsl/notebooks/Notebook.py +19 -14
  97. edsl/notebooks/NotebookToLaTeX.py +142 -0
  98. edsl/prompts/Prompt.py +29 -39
  99. edsl/questions/ExceptionExplainer.py +77 -0
  100. edsl/questions/HTMLQuestion.py +103 -0
  101. edsl/questions/QuestionBase.py +68 -214
  102. edsl/questions/QuestionBasePromptsMixin.py +7 -3
  103. edsl/questions/QuestionBudget.py +1 -1
  104. edsl/questions/QuestionCheckBox.py +3 -3
  105. edsl/questions/QuestionExtract.py +5 -7
  106. edsl/questions/QuestionFreeText.py +2 -3
  107. edsl/questions/QuestionList.py +10 -18
  108. edsl/questions/QuestionMatrix.py +265 -0
  109. edsl/questions/QuestionMultipleChoice.py +67 -23
  110. edsl/questions/QuestionNumerical.py +2 -4
  111. edsl/questions/QuestionRank.py +7 -17
  112. edsl/questions/SimpleAskMixin.py +4 -3
  113. edsl/questions/__init__.py +2 -1
  114. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +47 -2
  115. edsl/questions/data_structures.py +20 -0
  116. edsl/questions/derived/QuestionLinearScale.py +6 -3
  117. edsl/questions/derived/QuestionTopK.py +1 -1
  118. edsl/questions/descriptors.py +17 -3
  119. edsl/questions/loop_processor.py +149 -0
  120. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +57 -50
  121. edsl/questions/question_registry.py +1 -1
  122. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +40 -26
  123. edsl/questions/response_validator_factory.py +34 -0
  124. edsl/questions/templates/matrix/__init__.py +1 -0
  125. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  126. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  127. edsl/results/CSSParameterizer.py +1 -1
  128. edsl/results/Dataset.py +170 -7
  129. edsl/results/DatasetExportMixin.py +168 -305
  130. edsl/results/DatasetTree.py +28 -8
  131. edsl/results/MarkdownToDocx.py +122 -0
  132. edsl/results/MarkdownToPDF.py +111 -0
  133. edsl/results/Result.py +298 -206
  134. edsl/results/Results.py +149 -131
  135. edsl/results/ResultsExportMixin.py +2 -0
  136. edsl/results/TableDisplay.py +98 -171
  137. edsl/results/TextEditor.py +50 -0
  138. edsl/results/__init__.py +1 -1
  139. edsl/results/file_exports.py +252 -0
  140. edsl/results/{Selector.py → results_selector.py} +23 -13
  141. edsl/results/smart_objects.py +96 -0
  142. edsl/results/table_data_class.py +12 -0
  143. edsl/results/table_renderers.py +118 -0
  144. edsl/scenarios/ConstructDownloadLink.py +109 -0
  145. edsl/scenarios/DocumentChunker.py +102 -0
  146. edsl/scenarios/DocxScenario.py +16 -0
  147. edsl/scenarios/FileStore.py +150 -239
  148. edsl/scenarios/PdfExtractor.py +40 -0
  149. edsl/scenarios/Scenario.py +90 -193
  150. edsl/scenarios/ScenarioHtmlMixin.py +4 -3
  151. edsl/scenarios/ScenarioList.py +415 -244
  152. edsl/scenarios/ScenarioListExportMixin.py +0 -7
  153. edsl/scenarios/ScenarioListPdfMixin.py +15 -37
  154. edsl/scenarios/__init__.py +1 -2
  155. edsl/scenarios/directory_scanner.py +96 -0
  156. edsl/scenarios/file_methods.py +85 -0
  157. edsl/scenarios/handlers/__init__.py +13 -0
  158. edsl/scenarios/handlers/csv.py +49 -0
  159. edsl/scenarios/handlers/docx.py +76 -0
  160. edsl/scenarios/handlers/html.py +37 -0
  161. edsl/scenarios/handlers/json.py +111 -0
  162. edsl/scenarios/handlers/latex.py +5 -0
  163. edsl/scenarios/handlers/md.py +51 -0
  164. edsl/scenarios/handlers/pdf.py +68 -0
  165. edsl/scenarios/handlers/png.py +39 -0
  166. edsl/scenarios/handlers/pptx.py +105 -0
  167. edsl/scenarios/handlers/py.py +294 -0
  168. edsl/scenarios/handlers/sql.py +313 -0
  169. edsl/scenarios/handlers/sqlite.py +149 -0
  170. edsl/scenarios/handlers/txt.py +33 -0
  171. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +10 -6
  172. edsl/scenarios/scenario_selector.py +156 -0
  173. edsl/study/ObjectEntry.py +1 -1
  174. edsl/study/SnapShot.py +1 -1
  175. edsl/study/Study.py +5 -12
  176. edsl/surveys/ConstructDAG.py +92 -0
  177. edsl/surveys/EditSurvey.py +221 -0
  178. edsl/surveys/InstructionHandler.py +100 -0
  179. edsl/surveys/MemoryManagement.py +72 -0
  180. edsl/surveys/Rule.py +5 -4
  181. edsl/surveys/RuleCollection.py +25 -27
  182. edsl/surveys/RuleManager.py +172 -0
  183. edsl/surveys/Simulator.py +75 -0
  184. edsl/surveys/Survey.py +270 -791
  185. edsl/surveys/SurveyCSS.py +20 -8
  186. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
  187. edsl/surveys/SurveyToApp.py +141 -0
  188. edsl/surveys/__init__.py +4 -2
  189. edsl/surveys/descriptors.py +6 -2
  190. edsl/surveys/instructions/ChangeInstruction.py +1 -2
  191. edsl/surveys/instructions/Instruction.py +4 -13
  192. edsl/surveys/instructions/InstructionCollection.py +11 -6
  193. edsl/templates/error_reporting/interview_details.html +1 -1
  194. edsl/templates/error_reporting/report.html +1 -1
  195. edsl/tools/plotting.py +1 -1
  196. edsl/utilities/PrettyList.py +56 -0
  197. edsl/utilities/is_notebook.py +18 -0
  198. edsl/utilities/is_valid_variable_name.py +11 -0
  199. edsl/utilities/remove_edsl_version.py +24 -0
  200. edsl/utilities/utilities.py +35 -23
  201. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/METADATA +12 -10
  202. edsl-0.1.39.dist-info/RECORD +358 -0
  203. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
  204. edsl/language_models/KeyLookup.py +0 -30
  205. edsl/language_models/registry.py +0 -190
  206. edsl/language_models/unused/ReplicateBase.py +0 -83
  207. edsl/results/ResultsDBMixin.py +0 -238
  208. edsl-0.1.38.dev4.dist-info/RECORD +0 -277
  209. /edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +0 -0
  210. /edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +0 -0
  211. /edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +0 -0
  212. {edsl-0.1.38.dev4.dist-info → edsl-0.1.39.dist-info}/LICENSE +0 -0
@@ -27,13 +27,6 @@ def decorate_methods_from_mixin(cls, mixin_cls):
27
27
  return cls
28
28
 
29
29
 
30
- # def decorate_all_methods(cls):
31
- # for attr_name, attr_value in cls.__dict__.items():
32
- # if callable(attr_value):
33
- # setattr(cls, attr_name, to_dataset(attr_value))
34
- # return cls
35
-
36
-
37
30
  # @decorate_all_methods
38
31
  class ScenarioListExportMixin(DatasetExportMixin):
39
32
  """Mixin class for exporting Results objects."""
@@ -1,22 +1,9 @@
1
- import fitz # PyMuPDF
2
1
  import os
3
- import copy
4
- import subprocess
5
- import requests
6
- import tempfile
7
- import os
8
-
9
- # import urllib.parse as urlparse
10
- from urllib.parse import urlparse
11
-
12
- # from edsl import Scenario
13
-
14
- import requests
15
2
  import re
16
- import tempfile
17
- import os
3
+ import copy
18
4
  import atexit
19
- from urllib.parse import urlparse, parse_qs
5
+ import tempfile
6
+ import subprocess
20
7
 
21
8
 
22
9
  class GoogleDriveDownloader:
@@ -25,6 +12,8 @@ class GoogleDriveDownloader:
25
12
 
26
13
  @classmethod
27
14
  def fetch_from_drive(cls, url, filename=None):
15
+ import requests
16
+
28
17
  # Extract file ID from the URL
29
18
  file_id = cls._extract_file_id(url)
30
19
  if not file_id:
@@ -67,6 +56,8 @@ class GoogleDriveDownloader:
67
56
 
68
57
  @staticmethod
69
58
  def _extract_file_id(url):
59
+ from urllib.parse import urlparse, parse_qs
60
+
70
61
  # Try to extract file ID from '/file/d/' format
71
62
  file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
72
63
  if file_id_match:
@@ -92,6 +83,8 @@ class GoogleDriveDownloader:
92
83
 
93
84
  def fetch_and_save_pdf(url, filename):
94
85
  # Send a GET request to the URL
86
+ import requests
87
+
95
88
  response = requests.get(url)
96
89
 
97
90
  # Check if the request was successful
@@ -114,11 +107,6 @@ def fetch_and_save_pdf(url, filename):
114
107
  return temp_file_path
115
108
 
116
109
 
117
- # Example usage:
118
- # url = "https://example.com/sample.pdf"
119
- # fetch_and_save_pdf(url, "sample.pdf")
120
-
121
-
122
110
  class ScenarioListPdfMixin:
123
111
  @classmethod
124
112
  def from_pdf(cls, filename_or_url, collapse_pages=False):
@@ -151,6 +139,8 @@ class ScenarioListPdfMixin:
151
139
 
152
140
  @staticmethod
153
141
  def is_url(string):
142
+ from urllib.parse import urlparse
143
+
154
144
  try:
155
145
  result = urlparse(string)
156
146
  return all([result.scheme, result.netloc])
@@ -189,7 +179,8 @@ class ScenarioListPdfMixin:
189
179
 
190
180
  @staticmethod
191
181
  def extract_text_from_pdf(pdf_path):
192
- from edsl import Scenario
182
+ from edsl.scenarios.Scenario import Scenario
183
+ import fitz # PyMuPDF
193
184
 
194
185
  # TODO: Add test case
195
186
  # Ensure the file exists
@@ -243,19 +234,6 @@ class ScenarioListPdfMixin:
243
234
 
244
235
 
245
236
  if __name__ == "__main__":
246
- pass
247
-
248
- # from edsl import ScenarioList
249
-
250
- # class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
251
- # pass
252
-
253
- # #ScenarioListNew.create_hello_world_pdf('hello_world')
254
- # #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
255
- # #print(scenarios)
237
+ import doctest
256
238
 
257
- # from edsl import ScenarioList, QuestionFreeText
258
- # homo_silicus = ScenarioList.from_pdf('w31122.pdf')
259
- # q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
260
- # results = q.by(homo_silicus).run(progress_bar = True)
261
- # results.select('scenario.page', 'answer.key_point').order_by('page').print()
239
+ doctest.testmod()
@@ -1,4 +1,3 @@
1
1
  from edsl.scenarios.Scenario import Scenario
2
2
  from edsl.scenarios.ScenarioList import ScenarioList
3
-
4
- # from edsl.scenarios.FileStore import FileStore
3
+ from edsl.scenarios.FileStore import FileStore
@@ -0,0 +1,96 @@
1
+ # directory_scanner.py
2
+ from dataclasses import dataclass
3
+ from typing import Optional, List, Iterator, TypeVar, Generic, Callable, Any
4
+ import os
5
+
6
+ T = TypeVar("T")
7
+
8
+
9
+ @dataclass
10
+ class DirectoryScanner:
11
+ """
12
+ Scanner for finding files in a directory based on various criteria.
13
+ """
14
+
15
+ directory_path: str
16
+
17
+ def scan(
18
+ self,
19
+ factory: Callable[[str], T],
20
+ recursive: bool = False,
21
+ suffix_allow_list: Optional[List[str]] = None,
22
+ suffix_exclude_list: Optional[List[str]] = None,
23
+ example_suffix: Optional[str] = None,
24
+ include_no_extension: bool = True,
25
+ ) -> List[T]:
26
+ """
27
+ Eagerly scan directory and return list of objects created by factory.
28
+
29
+ Args:
30
+ factory: Callable that creates objects from file paths
31
+ recursive: If True, recursively traverse subdirectories
32
+ suffix_allow_list: List of allowed file extensions (without dots)
33
+ suffix_exclude_list: List of excluded file extensions (takes precedence over allow list)
34
+ example_suffix: If provided, only include files with this example suffix
35
+ include_no_extension: Whether to include files without extensions
36
+ """
37
+ return list(
38
+ self.iter_scan(
39
+ factory,
40
+ recursive=recursive,
41
+ suffix_allow_list=suffix_allow_list,
42
+ suffix_exclude_list=suffix_exclude_list,
43
+ example_suffix=example_suffix,
44
+ include_no_extension=include_no_extension,
45
+ )
46
+ )
47
+
48
+ def iter_scan(
49
+ self,
50
+ factory: Callable[[str], T],
51
+ recursive: bool = False,
52
+ suffix_allow_list: Optional[List[str]] = None,
53
+ suffix_exclude_list: Optional[List[str]] = None,
54
+ example_suffix: Optional[str] = None,
55
+ include_no_extension: bool = True,
56
+ ) -> Iterator[T]:
57
+ """
58
+ Lazily scan directory and yield objects created by factory.
59
+ """
60
+
61
+ def should_include_file(filepath: str) -> bool:
62
+ _, ext = os.path.splitext(filepath)
63
+ ext = ext[1:] if ext else ""
64
+
65
+ # Handle no extension case
66
+ if not ext:
67
+ return include_no_extension
68
+
69
+ # Check exclusions first (they take precedence)
70
+ if suffix_exclude_list and ext in suffix_exclude_list:
71
+ return False
72
+
73
+ # Check example suffix if specified
74
+ if example_suffix and not filepath.endswith(example_suffix):
75
+ return False
76
+
77
+ # Check allowed suffixes if specified
78
+ if suffix_allow_list and ext not in suffix_allow_list:
79
+ return False
80
+
81
+ return True
82
+
83
+ def iter_files():
84
+ if recursive:
85
+ for root, _, files in os.walk(self.directory_path):
86
+ for file in files:
87
+ yield os.path.join(root, file)
88
+ else:
89
+ for file in os.listdir(self.directory_path):
90
+ file_path = os.path.join(self.directory_path, file)
91
+ if os.path.isfile(file_path):
92
+ yield file_path
93
+
94
+ for file_path in iter_files():
95
+ if should_include_file(file_path):
96
+ yield factory(file_path)
@@ -0,0 +1,85 @@
1
+ from typing import Optional, Dict, Type
2
+ from abc import ABC, abstractmethod
3
+ import importlib.metadata
4
+ import importlib.util
5
+
6
+ from edsl.utilities.is_notebook import is_notebook
7
+
8
+
9
+ class FileMethods(ABC):
10
+ _handlers: Dict[str, Type["FileMethods"]] = {}
11
+
12
+ def __init__(self, path: Optional[str] = None):
13
+ self.path = path
14
+
15
+ def __init_subclass__(cls) -> None:
16
+ """Register subclasses automatically when they're defined."""
17
+ super().__init_subclass__()
18
+ if hasattr(cls, "suffix"):
19
+ FileMethods._handlers[cls.suffix] = cls
20
+
21
+ @classmethod
22
+ def get_handler(cls, suffix: str) -> Optional[Type["FileMethods"]]:
23
+ """Get the appropriate handler class for a given suffix."""
24
+ # Load plugins if they haven't been loaded yet
25
+ if not cls._handlers:
26
+ cls.load_plugins()
27
+ return cls._handlers.get(suffix.lower())
28
+
29
+ @classmethod
30
+ def load_plugins(cls):
31
+ """Load all file handler plugins including built-ins and external plugins."""
32
+
33
+ from edsl.scenarios import handlers
34
+
35
+ # Then load any external plugins
36
+ try:
37
+ entries = importlib.metadata.entry_points(group="file_handlers")
38
+ except TypeError: # some Python 3.9 bullshit
39
+ # entries = importlib.metadata.entry_points()
40
+ entries = []
41
+
42
+ for ep in entries:
43
+ try:
44
+ handler_class = ep.load()
45
+ # Registration happens automatically via __init_subclass__
46
+ except Exception as e:
47
+ print(f"Failed to load external handler {ep.name}: {e}")
48
+
49
+ @classmethod
50
+ def get_handler_for_path(cls, path: str) -> Optional[Type["FileMethods"]]:
51
+ """Get the appropriate handler class for a file path."""
52
+ suffix = path.split(".")[-1].lower() if "." in path else ""
53
+ return cls.get_handler(suffix)
54
+
55
+ @classmethod
56
+ def create(cls, path: str) -> Optional["FileMethods"]:
57
+ """Create an appropriate handler instance for the given path."""
58
+ handler_class = cls.get_handler_for_path(path)
59
+ if handler_class:
60
+ return handler_class(path)
61
+ return None
62
+
63
+ @classmethod
64
+ def supported_file_types(cls):
65
+ if not cls._handlers:
66
+ cls.load_plugins()
67
+ return list(cls._handlers.keys())
68
+
69
+ @abstractmethod
70
+ def view_system(self):
71
+ ...
72
+
73
+ @abstractmethod
74
+ def view_notebook(self):
75
+ ...
76
+
77
+ def view(self):
78
+ if is_notebook():
79
+ self.view_notebook()
80
+ else:
81
+ self.view_system()
82
+
83
+ @abstractmethod
84
+ def example(self):
85
+ ...
@@ -0,0 +1,13 @@
1
+ from .pdf import PdfMethods
2
+ from .docx import DocxMethods
3
+ from .png import PngMethods
4
+ from .txt import TxtMethods
5
+ from .html import HtmlMethods
6
+ from .md import MarkdownMethods
7
+ from .csv import CsvMethods
8
+ from .json import JsonMethods
9
+ from .sql import SqlMethods
10
+ from .pptx import PptxMethods
11
+ from .latex import LaTeXMethods
12
+ from .py import PyMethods
13
+ from .sqlite import SQLiteMethods
@@ -0,0 +1,49 @@
1
+ import tempfile
2
+ from edsl.scenarios.file_methods import FileMethods
3
+
4
+
5
+ class CsvMethods(FileMethods):
6
+ suffix = "csv"
7
+
8
+ def view_system(self):
9
+ import os
10
+ import subprocess
11
+
12
+ if os.path.exists(self.path):
13
+ try:
14
+ if (os_name := os.name) == "posix":
15
+ subprocess.run(["open", self.path], check=True) # macOS
16
+ elif os_name == "nt":
17
+ os.startfile(self.path) # Windows
18
+ else:
19
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
20
+ except Exception as e:
21
+ print(f"Error opening CSV: {e}")
22
+ else:
23
+ print("CSV file was not found.")
24
+
25
+ def view_notebook(self):
26
+ import pandas as pd
27
+ from IPython.display import display
28
+
29
+ df = pd.read_csv(self.path)
30
+ display(df)
31
+
32
+ def example(self):
33
+ import pandas as pd
34
+
35
+ df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
36
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as f:
37
+ df.to_csv(f.name, index=False)
38
+ return f.name
39
+
40
+ def to_pandas(self):
41
+ """
42
+ Convert the CSV file to a pandas DataFrame.
43
+
44
+ Returns:
45
+ pandas.DataFrame: The data from the CSV as a DataFrame
46
+ """
47
+ import pandas as pd
48
+
49
+ return pd.read_csv(self.path)
@@ -0,0 +1,76 @@
1
+ from edsl.scenarios.file_methods import FileMethods
2
+ import os
3
+ import tempfile
4
+
5
+
6
+ class DocxMethods(FileMethods):
7
+ suffix = "docx"
8
+
9
+ def extract_text(self):
10
+ from docx import Document
11
+
12
+ self.doc = Document(self.path)
13
+
14
+ # Extract all text
15
+ full_text = []
16
+ for para in self.doc.paragraphs:
17
+ full_text.append(para.text)
18
+
19
+ text = "\n".join(full_text)
20
+ return text
21
+
22
+ def view_system(self):
23
+ import os
24
+ import subprocess
25
+
26
+ if os.path.exists(self.path):
27
+ try:
28
+ if (os_name := os.name) == "posix":
29
+ subprocess.run(["open", self.path], check=True) # macOS
30
+ elif os_name == "nt":
31
+ os.startfile(self.path) # Windows
32
+ else:
33
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
34
+ except Exception as e:
35
+ print(f"Error opening DOCX: {e}")
36
+ else:
37
+ print("DOCX file was not found.")
38
+
39
+ def view_notebook(self):
40
+ import mammoth
41
+ from IPython.display import HTML, display
42
+
43
+ with open(self.path, "rb") as docx_file:
44
+ result = mammoth.convert_to_html(docx_file)
45
+ html = f"""
46
+ <div style="width: 800px; height: 800px; padding: 20px;
47
+ border: 1px solid #ccc; overflow-y: auto;">
48
+ {result.value}
49
+ </div>
50
+ """
51
+ display(HTML(html))
52
+
53
+ def example(self):
54
+ from docx import Document
55
+ from edsl.scenarios.Scenario import Scenario
56
+ from edsl.scenarios.ScenarioList import ScenarioList
57
+
58
+ os.makedirs("test_dir", exist_ok=True)
59
+ doc1 = Document()
60
+ _ = doc1.add_heading("First Survey")
61
+ doc1.save("test_dir/test1.docx")
62
+ doc2 = Document()
63
+ _ = doc2.add_heading("Second Survey")
64
+
65
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
66
+ doc2.save(tmp.name)
67
+ tmp.close()
68
+
69
+ return tmp.name
70
+
71
+
72
+ if __name__ == "__main__":
73
+ docx_temp = DocxMethods.example()
74
+ from edsl.scenarios.FileStore import FileStore
75
+
76
+ fs = FileStore(docx_temp)
@@ -0,0 +1,37 @@
1
+ from edsl.scenarios.file_methods import FileMethods
2
+ import tempfile
3
+
4
+
5
+ class HtmlMethods(FileMethods):
6
+ suffix = "html"
7
+
8
+ def view_system(self):
9
+ import webbrowser
10
+
11
+ # with open(self.path, "r") as f:
12
+ # html_string = f.read()
13
+
14
+ # html_path = self.to_tempfile()
15
+ # webbrowser.open("file://" + html_path)
16
+ webbrowser.open("file://" + self.path)
17
+
18
+ def view_notebook(self):
19
+ from IPython.display import IFrame, display
20
+
21
+ display(IFrame(self.path, width=800, height=800))
22
+
23
+ def example(self):
24
+ html_string = b"""
25
+ <html>
26
+ <head>
27
+ <title>Test</title>
28
+ </head>
29
+ <body>
30
+ <h1>Hello, World!</h1>
31
+ </body>
32
+ </html>
33
+ """
34
+
35
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
36
+ f.write(html_string)
37
+ return f.name
@@ -0,0 +1,111 @@
1
+ from edsl.scenarios.file_methods import FileMethods
2
+ import tempfile
3
+ import json
4
+ from typing import Optional, Dict, Any
5
+
6
+
7
+ class JsonMethods(FileMethods):
8
+ suffix = "json"
9
+
10
+ def view_system(self):
11
+ import os
12
+ import subprocess
13
+
14
+ if os.path.exists(self.path):
15
+ try:
16
+ if (os_name := os.name) == "posix":
17
+ subprocess.run(["open", self.path], check=True) # macOS
18
+ elif os_name == "nt":
19
+ os.startfile(self.path) # Windows
20
+ else:
21
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
22
+ except Exception as e:
23
+ print(f"Error opening JSON: {e}")
24
+ else:
25
+ print("JSON file was not found.")
26
+
27
+ def view_notebook(self):
28
+ from IPython.display import FileLink, JSON, display
29
+ import json
30
+
31
+ # Read and parse the JSON file
32
+ try:
33
+ with open(self.path, "r", encoding="utf-8") as f:
34
+ content = json.load(f)
35
+
36
+ # Display formatted JSON
37
+ display(JSON(content))
38
+
39
+ # Provide download link
40
+ display(FileLink(self.path))
41
+ except json.JSONDecodeError as e:
42
+ print(f"Error parsing JSON: {e}")
43
+ except Exception as e:
44
+ print(f"Error reading file: {e}")
45
+
46
+ def validate_json(self, schema: Optional[Dict[str, Any]] = None) -> bool:
47
+ """
48
+ Validate the JSON file against a schema if provided,
49
+ or check if it's valid JSON if no schema is provided.
50
+ """
51
+ try:
52
+ with open(self.path, "r", encoding="utf-8") as f:
53
+ content = json.load(f)
54
+
55
+ if schema is not None:
56
+ from jsonschema import validate
57
+
58
+ validate(instance=content, schema=schema)
59
+
60
+ return True
61
+ except json.JSONDecodeError as e:
62
+ print(f"Invalid JSON format: {e}")
63
+ return False
64
+ except Exception as e:
65
+ print(f"Validation error: {e}")
66
+ return False
67
+
68
+ def pretty_print(self):
69
+ """Pretty print the JSON content with proper indentation."""
70
+ try:
71
+ with open(self.path, "r", encoding="utf-8") as f:
72
+ content = json.load(f)
73
+
74
+ pretty_json = json.dumps(content, indent=2, sort_keys=True)
75
+ print(pretty_json)
76
+ except Exception as e:
77
+ print(f"Error pretty printing JSON: {e}")
78
+
79
+ def example(self):
80
+ sample_json = {
81
+ "person": {
82
+ "name": "John Doe",
83
+ "age": 30,
84
+ "contact": {"email": "john@example.com", "phone": "+1-555-555-5555"},
85
+ "interests": ["programming", "data science", "machine learning"],
86
+ "active": True,
87
+ "metadata": {"last_updated": "2024-01-01", "version": 1.0},
88
+ }
89
+ }
90
+
91
+ with tempfile.NamedTemporaryFile(
92
+ delete=False, suffix=".json", mode="w", encoding="utf-8"
93
+ ) as f:
94
+ json.dump(sample_json, f, indent=2)
95
+ return f.name
96
+
97
+ def format_file(self):
98
+ """Read, format, and write back the JSON with consistent formatting."""
99
+ try:
100
+ # Read the current content
101
+ with open(self.path, "r", encoding="utf-8") as f:
102
+ content = json.load(f)
103
+
104
+ # Write back with consistent formatting
105
+ with open(self.path, "w", encoding="utf-8") as f:
106
+ json.dump(content, f, indent=2, sort_keys=True)
107
+
108
+ return True
109
+ except Exception as e:
110
+ print(f"Error formatting JSON file: {e}")
111
+ return False
@@ -0,0 +1,5 @@
1
+ from edsl.scenarios.handlers.txt import TxtMethods
2
+
3
+
4
+ class LaTeXMethods(TxtMethods):
5
+ pass
@@ -0,0 +1,51 @@
1
+ from edsl.scenarios.file_methods import FileMethods
2
+ import tempfile
3
+
4
+
5
+ class MarkdownMethods(FileMethods):
6
+ suffix = "md"
7
+
8
+ def view_system(self):
9
+ import os
10
+ import subprocess
11
+
12
+ if os.path.exists(self.path):
13
+ try:
14
+ if (os_name := os.name) == "posix":
15
+ subprocess.run(["open", self.path], check=True) # macOS
16
+ elif os_name == "nt":
17
+ os.startfile(self.path) # Windows
18
+ else:
19
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
20
+ except Exception as e:
21
+ print(f"Error opening Markdown: {e}")
22
+ else:
23
+ print("Markdown file was not found.")
24
+
25
+ def view_notebook(self):
26
+ from IPython.display import FileLink, Markdown, display
27
+
28
+ # First display the content of the markdown file
29
+ with open(self.path, "r", encoding="utf-8") as f:
30
+ content = f.read()
31
+ display(Markdown(content))
32
+
33
+ # Then provide a download link
34
+ display(FileLink(self.path))
35
+
36
+ def example(self):
37
+ markdown_content = """# Sample Markdown
38
+
39
+ ## Features
40
+ - **Bold text** demonstration
41
+ - *Italic text* demonstration
42
+ - Code block example:
43
+ ```python
44
+ print("Hello, World!")
45
+ ```
46
+ """
47
+ with tempfile.NamedTemporaryFile(
48
+ delete=False, suffix=".md", mode="w", encoding="utf-8"
49
+ ) as f:
50
+ f.write(markdown_content)
51
+ return f.name