edsl 0.1.39.dev1__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +169 -116
- edsl/__init__.py +14 -6
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +358 -146
- edsl/agents/AgentList.py +211 -73
- edsl/agents/Invigilator.py +88 -36
- edsl/agents/InvigilatorBase.py +59 -70
- edsl/agents/PromptConstructor.py +117 -219
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionOptionProcessor.py +172 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +0 -1
- edsl/agents/prompt_helpers.py +3 -3
- edsl/config.py +22 -2
- edsl/conversation/car_buying.py +2 -1
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +104 -42
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +21 -14
- edsl/data/CacheEntry.py +12 -15
- edsl/data/CacheHandler.py +33 -12
- edsl/data/__init__.py +4 -3
- edsl/data_transfer_models.py +2 -1
- edsl/enums.py +20 -0
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +12 -0
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/questions.py +24 -6
- edsl/exceptions/scenarios.py +7 -0
- edsl/inference_services/AnthropicService.py +0 -3
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +209 -0
- edsl/inference_services/AwsBedrock.py +0 -2
- edsl/inference_services/AzureAI.py +0 -2
- edsl/inference_services/GoogleService.py +2 -11
- edsl/inference_services/InferenceServiceABC.py +18 -85
- edsl/inference_services/InferenceServicesCollection.py +105 -80
- edsl/inference_services/MistralAIService.py +0 -3
- edsl/inference_services/OpenAIService.py +1 -4
- edsl/inference_services/PerplexityService.py +0 -3
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +11 -8
- edsl/inference_services/data_structures.py +62 -0
- edsl/jobs/AnswerQuestionFunctionConstructor.py +188 -0
- edsl/jobs/Answers.py +1 -14
- edsl/jobs/FetchInvigilator.py +40 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +48 -0
- edsl/jobs/Jobs.py +102 -243
- edsl/jobs/JobsChecks.py +35 -10
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +5 -3
- edsl/jobs/JobsRemoteInferenceHandler.py +128 -80
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/buckets/BucketCollection.py +44 -3
- edsl/jobs/buckets/TokenBucket.py +53 -21
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +77 -380
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +4 -49
- edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
- edsl/jobs/tasks/TaskHistory.py +14 -15
- edsl/jobs/tasks/task_status_enum.py +0 -2
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +137 -234
- edsl/language_models/ModelList.py +11 -13
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +0 -1
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/registry.py +49 -59
- edsl/language_models/repair.py +2 -2
- edsl/language_models/utilities.py +5 -4
- edsl/notebooks/Notebook.py +19 -14
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/prompts/Prompt.py +29 -39
- edsl/questions/AnswerValidatorMixin.py +47 -2
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/LoopProcessor.py +149 -0
- edsl/questions/QuestionBase.py +37 -192
- edsl/questions/QuestionBaseGenMixin.py +52 -48
- edsl/questions/QuestionBasePromptsMixin.py +7 -3
- edsl/questions/QuestionCheckBox.py +1 -1
- edsl/questions/QuestionExtract.py +1 -1
- edsl/questions/QuestionFreeText.py +1 -2
- edsl/questions/QuestionList.py +3 -5
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +66 -22
- edsl/questions/QuestionNumerical.py +1 -3
- edsl/questions/QuestionRank.py +6 -16
- edsl/questions/ResponseValidatorABC.py +37 -11
- edsl/questions/ResponseValidatorFactory.py +28 -0
- edsl/questions/SimpleAskMixin.py +4 -3
- edsl/questions/__init__.py +1 -0
- edsl/questions/derived/QuestionLinearScale.py +6 -3
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +17 -3
- edsl/questions/question_registry.py +1 -1
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +170 -7
- edsl/results/DatasetExportMixin.py +224 -302
- edsl/results/DatasetTree.py +28 -8
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +192 -206
- edsl/results/Results.py +120 -113
- edsl/results/ResultsExportMixin.py +2 -0
- edsl/results/Selector.py +23 -13
- edsl/results/TableDisplay.py +98 -171
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +1 -1
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_renderers.py +118 -0
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DirectoryScanner.py +96 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +118 -239
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +90 -193
- edsl/scenarios/ScenarioHtmlMixin.py +4 -3
- edsl/scenarios/ScenarioJoin.py +10 -6
- edsl/scenarios/ScenarioList.py +383 -240
- edsl/scenarios/ScenarioListExportMixin.py +0 -7
- edsl/scenarios/ScenarioListPdfMixin.py +15 -37
- edsl/scenarios/ScenarioSelector.py +156 -0
- edsl/scenarios/__init__.py +1 -2
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +38 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +5 -12
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/Rule.py +5 -4
- edsl/surveys/RuleCollection.py +25 -27
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +199 -771
- edsl/surveys/SurveyCSS.py +20 -8
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +4 -2
- edsl/surveys/descriptors.py +6 -2
- edsl/surveys/instructions/ChangeInstruction.py +1 -2
- edsl/surveys/instructions/Instruction.py +4 -13
- edsl/surveys/instructions/InstructionCollection.py +11 -6
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/utilities.py +35 -23
- {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +12 -10
- edsl-0.1.39.dev2.dist-info/RECORD +352 -0
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.39.dev1.dist-info/RECORD +0 -277
- {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
- {edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +0 -0
@@ -27,13 +27,6 @@ def decorate_methods_from_mixin(cls, mixin_cls):
|
|
27
27
|
return cls
|
28
28
|
|
29
29
|
|
30
|
-
# def decorate_all_methods(cls):
|
31
|
-
# for attr_name, attr_value in cls.__dict__.items():
|
32
|
-
# if callable(attr_value):
|
33
|
-
# setattr(cls, attr_name, to_dataset(attr_value))
|
34
|
-
# return cls
|
35
|
-
|
36
|
-
|
37
30
|
# @decorate_all_methods
|
38
31
|
class ScenarioListExportMixin(DatasetExportMixin):
|
39
32
|
"""Mixin class for exporting Results objects."""
|
@@ -1,22 +1,9 @@
|
|
1
|
-
import fitz # PyMuPDF
|
2
1
|
import os
|
3
|
-
import copy
|
4
|
-
import subprocess
|
5
|
-
import requests
|
6
|
-
import tempfile
|
7
|
-
import os
|
8
|
-
|
9
|
-
# import urllib.parse as urlparse
|
10
|
-
from urllib.parse import urlparse
|
11
|
-
|
12
|
-
# from edsl import Scenario
|
13
|
-
|
14
|
-
import requests
|
15
2
|
import re
|
16
|
-
import
|
17
|
-
import os
|
3
|
+
import copy
|
18
4
|
import atexit
|
19
|
-
|
5
|
+
import tempfile
|
6
|
+
import subprocess
|
20
7
|
|
21
8
|
|
22
9
|
class GoogleDriveDownloader:
|
@@ -25,6 +12,8 @@ class GoogleDriveDownloader:
|
|
25
12
|
|
26
13
|
@classmethod
|
27
14
|
def fetch_from_drive(cls, url, filename=None):
|
15
|
+
import requests
|
16
|
+
|
28
17
|
# Extract file ID from the URL
|
29
18
|
file_id = cls._extract_file_id(url)
|
30
19
|
if not file_id:
|
@@ -67,6 +56,8 @@ class GoogleDriveDownloader:
|
|
67
56
|
|
68
57
|
@staticmethod
|
69
58
|
def _extract_file_id(url):
|
59
|
+
from urllib.parse import urlparse, parse_qs
|
60
|
+
|
70
61
|
# Try to extract file ID from '/file/d/' format
|
71
62
|
file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
|
72
63
|
if file_id_match:
|
@@ -92,6 +83,8 @@ class GoogleDriveDownloader:
|
|
92
83
|
|
93
84
|
def fetch_and_save_pdf(url, filename):
|
94
85
|
# Send a GET request to the URL
|
86
|
+
import requests
|
87
|
+
|
95
88
|
response = requests.get(url)
|
96
89
|
|
97
90
|
# Check if the request was successful
|
@@ -114,11 +107,6 @@ def fetch_and_save_pdf(url, filename):
|
|
114
107
|
return temp_file_path
|
115
108
|
|
116
109
|
|
117
|
-
# Example usage:
|
118
|
-
# url = "https://example.com/sample.pdf"
|
119
|
-
# fetch_and_save_pdf(url, "sample.pdf")
|
120
|
-
|
121
|
-
|
122
110
|
class ScenarioListPdfMixin:
|
123
111
|
@classmethod
|
124
112
|
def from_pdf(cls, filename_or_url, collapse_pages=False):
|
@@ -151,6 +139,8 @@ class ScenarioListPdfMixin:
|
|
151
139
|
|
152
140
|
@staticmethod
|
153
141
|
def is_url(string):
|
142
|
+
from urllib.parse import urlparse
|
143
|
+
|
154
144
|
try:
|
155
145
|
result = urlparse(string)
|
156
146
|
return all([result.scheme, result.netloc])
|
@@ -189,7 +179,8 @@ class ScenarioListPdfMixin:
|
|
189
179
|
|
190
180
|
@staticmethod
|
191
181
|
def extract_text_from_pdf(pdf_path):
|
192
|
-
from edsl import Scenario
|
182
|
+
from edsl.scenarios.Scenario import Scenario
|
183
|
+
import fitz # PyMuPDF
|
193
184
|
|
194
185
|
# TODO: Add test case
|
195
186
|
# Ensure the file exists
|
@@ -243,19 +234,6 @@ class ScenarioListPdfMixin:
|
|
243
234
|
|
244
235
|
|
245
236
|
if __name__ == "__main__":
|
246
|
-
|
247
|
-
|
248
|
-
# from edsl import ScenarioList
|
249
|
-
|
250
|
-
# class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
|
251
|
-
# pass
|
252
|
-
|
253
|
-
# #ScenarioListNew.create_hello_world_pdf('hello_world')
|
254
|
-
# #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
|
255
|
-
# #print(scenarios)
|
237
|
+
import doctest
|
256
238
|
|
257
|
-
|
258
|
-
# homo_silicus = ScenarioList.from_pdf('w31122.pdf')
|
259
|
-
# q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
|
260
|
-
# results = q.by(homo_silicus).run(progress_bar = True)
|
261
|
-
# results.select('scenario.page', 'answer.key_point').order_by('page').print()
|
239
|
+
doctest.testmod()
|
@@ -0,0 +1,156 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
|
4
|
+
class ScenarioSelector:
|
5
|
+
"""
|
6
|
+
A class for performing advanced field selection on ScenarioList objects,
|
7
|
+
including support for wildcard patterns.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
scenario_list: The ScenarioList object to perform selections on
|
11
|
+
|
12
|
+
Examples:
|
13
|
+
>>> from edsl import Scenario, ScenarioList
|
14
|
+
>>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3}), Scenario({'test_1': 4, 'test_2': 5, 'other': 6})])
|
15
|
+
>>> selector = ScenarioSelector(scenarios)
|
16
|
+
>>> selector.select('test*')
|
17
|
+
ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
|
18
|
+
"""
|
19
|
+
|
20
|
+
def __init__(self, scenario_list: "ScenarioList"):
|
21
|
+
"""Initialize with a ScenarioList object."""
|
22
|
+
self.scenario_list = scenario_list
|
23
|
+
self.available_fields = (
|
24
|
+
list(scenario_list.data[0].keys()) if scenario_list.data else []
|
25
|
+
)
|
26
|
+
|
27
|
+
def _match_field_pattern(self, pattern: str, field: str) -> bool:
|
28
|
+
"""
|
29
|
+
Checks if a field name matches a pattern with wildcards.
|
30
|
+
Supports '*' as wildcard at start or end of pattern.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
pattern: The pattern to match against, may contain '*' at start or end
|
34
|
+
field: The field name to check
|
35
|
+
|
36
|
+
Examples:
|
37
|
+
>>> from edsl.scenarios import ScenarioList, Scenario
|
38
|
+
>>> selector = ScenarioSelector(ScenarioList([]))
|
39
|
+
>>> selector._match_field_pattern('test*', 'test_field')
|
40
|
+
True
|
41
|
+
>>> selector._match_field_pattern('*field', 'test_field')
|
42
|
+
True
|
43
|
+
>>> selector._match_field_pattern('test', 'test')
|
44
|
+
True
|
45
|
+
>>> selector._match_field_pattern('*test*', 'my_test_field')
|
46
|
+
True
|
47
|
+
"""
|
48
|
+
if "*" not in pattern:
|
49
|
+
return pattern == field
|
50
|
+
|
51
|
+
if pattern.startswith("*") and pattern.endswith("*"):
|
52
|
+
return pattern[1:-1] in field
|
53
|
+
elif pattern.startswith("*"):
|
54
|
+
return field.endswith(pattern[1:])
|
55
|
+
elif pattern.endswith("*"):
|
56
|
+
return field.startswith(pattern[:-1])
|
57
|
+
return pattern == field
|
58
|
+
|
59
|
+
def _get_matching_fields(self, patterns: list[str]) -> list[str]:
|
60
|
+
"""
|
61
|
+
Gets all fields that match any of the given patterns.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
patterns: List of field patterns, may contain wildcards
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
List of field names that match at least one pattern
|
68
|
+
|
69
|
+
Examples:
|
70
|
+
>>> from edsl import Scenario, ScenarioList
|
71
|
+
>>> scenarios = ScenarioList([
|
72
|
+
... Scenario({'test_1': 1, 'test_2': 2, 'other': 3})
|
73
|
+
... ])
|
74
|
+
>>> selector = ScenarioSelector(scenarios)
|
75
|
+
>>> selector._get_matching_fields(['test*'])
|
76
|
+
['test_1', 'test_2']
|
77
|
+
"""
|
78
|
+
matching_fields = set()
|
79
|
+
for pattern in patterns:
|
80
|
+
matches = [
|
81
|
+
field
|
82
|
+
for field in self.available_fields
|
83
|
+
if self._match_field_pattern(pattern, field)
|
84
|
+
]
|
85
|
+
matching_fields.update(matches)
|
86
|
+
return sorted(list(matching_fields))
|
87
|
+
|
88
|
+
def select(self, *fields) -> "ScenarioList":
|
89
|
+
"""
|
90
|
+
Selects scenarios with only the referenced fields.
|
91
|
+
Supports wildcard patterns using '*' at the start or end of field names.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
*fields: Field names or patterns to select. Patterns may include '*' for wildcards.
|
95
|
+
|
96
|
+
Returns:
|
97
|
+
A new ScenarioList containing only the matched fields.
|
98
|
+
|
99
|
+
Raises:
|
100
|
+
ValueError: If no fields match the given patterns.
|
101
|
+
|
102
|
+
Examples:
|
103
|
+
>>> from edsl import Scenario, ScenarioList
|
104
|
+
>>> scenarios = ScenarioList([
|
105
|
+
... Scenario({'test_1': 1, 'test_2': 2, 'other': 3}),
|
106
|
+
... Scenario({'test_1': 4, 'test_2': 5, 'other': 6})
|
107
|
+
... ])
|
108
|
+
>>> selector = ScenarioSelector(scenarios)
|
109
|
+
>>> selector.select('test*') # Selects all fields starting with 'test'
|
110
|
+
ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
|
111
|
+
>>> selector.select('*_1') # Selects all fields ending with '_1'
|
112
|
+
ScenarioList([Scenario({'test_1': 1}), Scenario({'test_1': 4})])
|
113
|
+
>>> selector.select('test_1', '*_2') # Multiple patterns
|
114
|
+
ScenarioList([Scenario({'test_1': 1, 'test_2': 2}), Scenario({'test_1': 4, 'test_2': 5})])
|
115
|
+
"""
|
116
|
+
if not self.scenario_list.data:
|
117
|
+
return self.scenario_list.__class__([])
|
118
|
+
|
119
|
+
# Convert single string to list for consistent processing
|
120
|
+
patterns = list(fields)
|
121
|
+
|
122
|
+
# Get all fields that match the patterns
|
123
|
+
fields_to_select = self._get_matching_fields(patterns)
|
124
|
+
|
125
|
+
# If no fields match, raise an informative error
|
126
|
+
if not fields_to_select:
|
127
|
+
raise ValueError(
|
128
|
+
f"No fields matched the given patterns: {patterns}. "
|
129
|
+
f"Available fields are: {self.available_fields}"
|
130
|
+
)
|
131
|
+
|
132
|
+
return self.scenario_list.__class__(
|
133
|
+
[scenario.select(fields_to_select) for scenario in self.scenario_list.data]
|
134
|
+
)
|
135
|
+
|
136
|
+
def get_available_fields(self) -> list[str]:
|
137
|
+
"""
|
138
|
+
Returns a list of all available fields in the ScenarioList.
|
139
|
+
|
140
|
+
Returns:
|
141
|
+
List of field names available for selection.
|
142
|
+
|
143
|
+
Examples:
|
144
|
+
>>> from edsl import Scenario, ScenarioList
|
145
|
+
>>> scenarios = ScenarioList([Scenario({'test_1': 1, 'test_2': 2, 'other': 3})])
|
146
|
+
>>> selector = ScenarioSelector(scenarios)
|
147
|
+
>>> selector.get_available_fields()
|
148
|
+
['other', 'test_1', 'test_2']
|
149
|
+
"""
|
150
|
+
return sorted(self.available_fields)
|
151
|
+
|
152
|
+
|
153
|
+
if __name__ == "__main__":
|
154
|
+
import doctest
|
155
|
+
|
156
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
edsl/scenarios/__init__.py
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
from typing import Optional, Dict, Type
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
import importlib.metadata
|
4
|
+
import importlib.util
|
5
|
+
|
6
|
+
from edsl.utilities.is_notebook import is_notebook
|
7
|
+
|
8
|
+
|
9
|
+
class FileMethods(ABC):
|
10
|
+
_handlers: Dict[str, Type["FileMethods"]] = {}
|
11
|
+
|
12
|
+
def __init__(self, path: Optional[str] = None):
|
13
|
+
self.path = path
|
14
|
+
|
15
|
+
def __init_subclass__(cls) -> None:
|
16
|
+
"""Register subclasses automatically when they're defined."""
|
17
|
+
super().__init_subclass__()
|
18
|
+
if hasattr(cls, "suffix"):
|
19
|
+
FileMethods._handlers[cls.suffix] = cls
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def get_handler(cls, suffix: str) -> Optional[Type["FileMethods"]]:
|
23
|
+
"""Get the appropriate handler class for a given suffix."""
|
24
|
+
# Load plugins if they haven't been loaded yet
|
25
|
+
if not cls._handlers:
|
26
|
+
cls.load_plugins()
|
27
|
+
return cls._handlers.get(suffix.lower())
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def load_plugins(cls):
|
31
|
+
"""Load all file handler plugins including built-ins and external plugins."""
|
32
|
+
|
33
|
+
from edsl.scenarios import handlers
|
34
|
+
|
35
|
+
# Then load any external plugins
|
36
|
+
try:
|
37
|
+
entries = importlib.metadata.entry_points(group="file_handlers")
|
38
|
+
except TypeError: # some Python 3.9 bullshit
|
39
|
+
# entries = importlib.metadata.entry_points()
|
40
|
+
entries = []
|
41
|
+
|
42
|
+
for ep in entries:
|
43
|
+
try:
|
44
|
+
handler_class = ep.load()
|
45
|
+
# Registration happens automatically via __init_subclass__
|
46
|
+
except Exception as e:
|
47
|
+
print(f"Failed to load external handler {ep.name}: {e}")
|
48
|
+
|
49
|
+
@classmethod
|
50
|
+
def get_handler_for_path(cls, path: str) -> Optional[Type["FileMethods"]]:
|
51
|
+
"""Get the appropriate handler class for a file path."""
|
52
|
+
suffix = path.split(".")[-1].lower() if "." in path else ""
|
53
|
+
return cls.get_handler(suffix)
|
54
|
+
|
55
|
+
@classmethod
|
56
|
+
def create(cls, path: str) -> Optional["FileMethods"]:
|
57
|
+
"""Create an appropriate handler instance for the given path."""
|
58
|
+
handler_class = cls.get_handler_for_path(path)
|
59
|
+
if handler_class:
|
60
|
+
return handler_class(path)
|
61
|
+
return None
|
62
|
+
|
63
|
+
@classmethod
|
64
|
+
def supported_file_types(cls):
|
65
|
+
if not cls._handlers:
|
66
|
+
cls.load_plugins()
|
67
|
+
return list(cls._handlers.keys())
|
68
|
+
|
69
|
+
@abstractmethod
|
70
|
+
def view_system(self):
|
71
|
+
...
|
72
|
+
|
73
|
+
@abstractmethod
|
74
|
+
def view_notebook(self):
|
75
|
+
...
|
76
|
+
|
77
|
+
def view(self):
|
78
|
+
if is_notebook():
|
79
|
+
self.view_notebook()
|
80
|
+
else:
|
81
|
+
self.view_system()
|
82
|
+
|
83
|
+
@abstractmethod
|
84
|
+
def example(self):
|
85
|
+
...
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from .pdf import PdfMethods
|
2
|
+
from .docx import DocxMethods
|
3
|
+
from .png import PngMethods
|
4
|
+
from .txt import TxtMethods
|
5
|
+
from .html import HtmlMethods
|
6
|
+
from .md import MarkdownMethods
|
7
|
+
from .csv import CsvMethods
|
8
|
+
from .json import JsonMethods
|
9
|
+
from .sql import SqlMethods
|
10
|
+
from .pptx import PptxMethods
|
11
|
+
from .latex import LaTeXMethods
|
12
|
+
from .py import PyMethods
|
13
|
+
from .sqlite import SQLiteMethods
|
@@ -0,0 +1,38 @@
|
|
1
|
+
import tempfile
|
2
|
+
from edsl.scenarios.file_methods import FileMethods
|
3
|
+
|
4
|
+
|
5
|
+
class CsvMethods(FileMethods):
|
6
|
+
suffix = "csv"
|
7
|
+
|
8
|
+
def view_system(self):
|
9
|
+
import os
|
10
|
+
import subprocess
|
11
|
+
|
12
|
+
if os.path.exists(self.path):
|
13
|
+
try:
|
14
|
+
if (os_name := os.name) == "posix":
|
15
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
16
|
+
elif os_name == "nt":
|
17
|
+
os.startfile(self.path) # Windows
|
18
|
+
else:
|
19
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
20
|
+
except Exception as e:
|
21
|
+
print(f"Error opening CSV: {e}")
|
22
|
+
else:
|
23
|
+
print("CSV file was not found.")
|
24
|
+
|
25
|
+
def view_notebook(self):
|
26
|
+
import pandas as pd
|
27
|
+
from IPython.display import display
|
28
|
+
|
29
|
+
df = pd.read_csv(self.path)
|
30
|
+
display(df)
|
31
|
+
|
32
|
+
def example(self):
|
33
|
+
import pandas as pd
|
34
|
+
|
35
|
+
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
|
36
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as f:
|
37
|
+
df.to_csv(f.name, index=False)
|
38
|
+
return f.name
|
@@ -0,0 +1,76 @@
|
|
1
|
+
from edsl.scenarios.file_methods import FileMethods
|
2
|
+
import os
|
3
|
+
import tempfile
|
4
|
+
|
5
|
+
|
6
|
+
class DocxMethods(FileMethods):
|
7
|
+
suffix = "docx"
|
8
|
+
|
9
|
+
def extract_text(self):
|
10
|
+
from docx import Document
|
11
|
+
|
12
|
+
self.doc = Document(self.path)
|
13
|
+
|
14
|
+
# Extract all text
|
15
|
+
full_text = []
|
16
|
+
for para in self.doc.paragraphs:
|
17
|
+
full_text.append(para.text)
|
18
|
+
|
19
|
+
text = "\n".join(full_text)
|
20
|
+
return text
|
21
|
+
|
22
|
+
def view_system(self):
|
23
|
+
import os
|
24
|
+
import subprocess
|
25
|
+
|
26
|
+
if os.path.exists(self.path):
|
27
|
+
try:
|
28
|
+
if (os_name := os.name) == "posix":
|
29
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
30
|
+
elif os_name == "nt":
|
31
|
+
os.startfile(self.path) # Windows
|
32
|
+
else:
|
33
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
34
|
+
except Exception as e:
|
35
|
+
print(f"Error opening DOCX: {e}")
|
36
|
+
else:
|
37
|
+
print("DOCX file was not found.")
|
38
|
+
|
39
|
+
def view_notebook(self):
|
40
|
+
import mammoth
|
41
|
+
from IPython.display import HTML, display
|
42
|
+
|
43
|
+
with open(self.path, "rb") as docx_file:
|
44
|
+
result = mammoth.convert_to_html(docx_file)
|
45
|
+
html = f"""
|
46
|
+
<div style="width: 800px; height: 800px; padding: 20px;
|
47
|
+
border: 1px solid #ccc; overflow-y: auto;">
|
48
|
+
{result.value}
|
49
|
+
</div>
|
50
|
+
"""
|
51
|
+
display(HTML(html))
|
52
|
+
|
53
|
+
def example(self):
|
54
|
+
from docx import Document
|
55
|
+
from edsl.scenarios.Scenario import Scenario
|
56
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
57
|
+
|
58
|
+
os.makedirs("test_dir", exist_ok=True)
|
59
|
+
doc1 = Document()
|
60
|
+
_ = doc1.add_heading("First Survey")
|
61
|
+
doc1.save("test_dir/test1.docx")
|
62
|
+
doc2 = Document()
|
63
|
+
_ = doc2.add_heading("Second Survey")
|
64
|
+
|
65
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
|
66
|
+
doc2.save(tmp.name)
|
67
|
+
tmp.close()
|
68
|
+
|
69
|
+
return tmp.name
|
70
|
+
|
71
|
+
|
72
|
+
if __name__ == "__main__":
|
73
|
+
docx_temp = DocxMethods.example()
|
74
|
+
from edsl.scenarios.FileStore import FileStore
|
75
|
+
|
76
|
+
fs = FileStore(docx_temp)
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from edsl.scenarios.file_methods import FileMethods
|
2
|
+
import tempfile
|
3
|
+
|
4
|
+
|
5
|
+
class HtmlMethods(FileMethods):
|
6
|
+
suffix = "html"
|
7
|
+
|
8
|
+
def view_system(self):
|
9
|
+
import webbrowser
|
10
|
+
|
11
|
+
# with open(self.path, "r") as f:
|
12
|
+
# html_string = f.read()
|
13
|
+
|
14
|
+
# html_path = self.to_tempfile()
|
15
|
+
# webbrowser.open("file://" + html_path)
|
16
|
+
webbrowser.open("file://" + self.path)
|
17
|
+
|
18
|
+
def view_notebook(self):
|
19
|
+
from IPython.display import IFrame, display
|
20
|
+
|
21
|
+
display(IFrame(self.path, width=800, height=800))
|
22
|
+
|
23
|
+
def example(self):
|
24
|
+
html_string = b"""
|
25
|
+
<html>
|
26
|
+
<head>
|
27
|
+
<title>Test</title>
|
28
|
+
</head>
|
29
|
+
<body>
|
30
|
+
<h1>Hello, World!</h1>
|
31
|
+
</body>
|
32
|
+
</html>
|
33
|
+
"""
|
34
|
+
|
35
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
|
36
|
+
f.write(html_string)
|
37
|
+
return f.name
|
@@ -0,0 +1,111 @@
|
|
1
|
+
from edsl.scenarios.file_methods import FileMethods
|
2
|
+
import tempfile
|
3
|
+
import json
|
4
|
+
from typing import Optional, Dict, Any
|
5
|
+
|
6
|
+
|
7
|
+
class JsonMethods(FileMethods):
|
8
|
+
suffix = "json"
|
9
|
+
|
10
|
+
def view_system(self):
|
11
|
+
import os
|
12
|
+
import subprocess
|
13
|
+
|
14
|
+
if os.path.exists(self.path):
|
15
|
+
try:
|
16
|
+
if (os_name := os.name) == "posix":
|
17
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
18
|
+
elif os_name == "nt":
|
19
|
+
os.startfile(self.path) # Windows
|
20
|
+
else:
|
21
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
22
|
+
except Exception as e:
|
23
|
+
print(f"Error opening JSON: {e}")
|
24
|
+
else:
|
25
|
+
print("JSON file was not found.")
|
26
|
+
|
27
|
+
def view_notebook(self):
|
28
|
+
from IPython.display import FileLink, JSON, display
|
29
|
+
import json
|
30
|
+
|
31
|
+
# Read and parse the JSON file
|
32
|
+
try:
|
33
|
+
with open(self.path, "r", encoding="utf-8") as f:
|
34
|
+
content = json.load(f)
|
35
|
+
|
36
|
+
# Display formatted JSON
|
37
|
+
display(JSON(content))
|
38
|
+
|
39
|
+
# Provide download link
|
40
|
+
display(FileLink(self.path))
|
41
|
+
except json.JSONDecodeError as e:
|
42
|
+
print(f"Error parsing JSON: {e}")
|
43
|
+
except Exception as e:
|
44
|
+
print(f"Error reading file: {e}")
|
45
|
+
|
46
|
+
def validate_json(self, schema: Optional[Dict[str, Any]] = None) -> bool:
|
47
|
+
"""
|
48
|
+
Validate the JSON file against a schema if provided,
|
49
|
+
or check if it's valid JSON if no schema is provided.
|
50
|
+
"""
|
51
|
+
try:
|
52
|
+
with open(self.path, "r", encoding="utf-8") as f:
|
53
|
+
content = json.load(f)
|
54
|
+
|
55
|
+
if schema is not None:
|
56
|
+
from jsonschema import validate
|
57
|
+
|
58
|
+
validate(instance=content, schema=schema)
|
59
|
+
|
60
|
+
return True
|
61
|
+
except json.JSONDecodeError as e:
|
62
|
+
print(f"Invalid JSON format: {e}")
|
63
|
+
return False
|
64
|
+
except Exception as e:
|
65
|
+
print(f"Validation error: {e}")
|
66
|
+
return False
|
67
|
+
|
68
|
+
def pretty_print(self):
|
69
|
+
"""Pretty print the JSON content with proper indentation."""
|
70
|
+
try:
|
71
|
+
with open(self.path, "r", encoding="utf-8") as f:
|
72
|
+
content = json.load(f)
|
73
|
+
|
74
|
+
pretty_json = json.dumps(content, indent=2, sort_keys=True)
|
75
|
+
print(pretty_json)
|
76
|
+
except Exception as e:
|
77
|
+
print(f"Error pretty printing JSON: {e}")
|
78
|
+
|
79
|
+
def example(self):
|
80
|
+
sample_json = {
|
81
|
+
"person": {
|
82
|
+
"name": "John Doe",
|
83
|
+
"age": 30,
|
84
|
+
"contact": {"email": "john@example.com", "phone": "+1-555-555-5555"},
|
85
|
+
"interests": ["programming", "data science", "machine learning"],
|
86
|
+
"active": True,
|
87
|
+
"metadata": {"last_updated": "2024-01-01", "version": 1.0},
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
with tempfile.NamedTemporaryFile(
|
92
|
+
delete=False, suffix=".json", mode="w", encoding="utf-8"
|
93
|
+
) as f:
|
94
|
+
json.dump(sample_json, f, indent=2)
|
95
|
+
return f.name
|
96
|
+
|
97
|
+
def format_file(self):
|
98
|
+
"""Read, format, and write back the JSON with consistent formatting."""
|
99
|
+
try:
|
100
|
+
# Read the current content
|
101
|
+
with open(self.path, "r", encoding="utf-8") as f:
|
102
|
+
content = json.load(f)
|
103
|
+
|
104
|
+
# Write back with consistent formatting
|
105
|
+
with open(self.path, "w", encoding="utf-8") as f:
|
106
|
+
json.dump(content, f, indent=2, sort_keys=True)
|
107
|
+
|
108
|
+
return True
|
109
|
+
except Exception as e:
|
110
|
+
print(f"Error formatting JSON file: {e}")
|
111
|
+
return False
|