edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +116 -197
- edsl/__init__.py +7 -15
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +147 -351
- edsl/agents/AgentList.py +73 -211
- edsl/agents/Invigilator.py +50 -101
- edsl/agents/InvigilatorBase.py +70 -62
- edsl/agents/PromptConstructor.py +225 -143
- edsl/agents/__init__.py +1 -0
- edsl/agents/prompt_helpers.py +3 -3
- edsl/auto/AutoStudy.py +5 -18
- edsl/auto/StageBase.py +40 -53
- edsl/auto/StageQuestions.py +1 -2
- edsl/auto/utilities.py +6 -0
- edsl/config.py +2 -22
- edsl/conversation/car_buying.py +1 -2
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +47 -125
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +27 -45
- edsl/data/CacheEntry.py +15 -12
- edsl/data/CacheHandler.py +12 -31
- edsl/data/RemoteCacheSync.py +46 -154
- edsl/data/__init__.py +3 -4
- edsl/data_transfer_models.py +1 -2
- edsl/enums.py +0 -27
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +0 -12
- edsl/exceptions/questions.py +6 -24
- edsl/exceptions/scenarios.py +0 -7
- edsl/inference_services/AnthropicService.py +19 -38
- edsl/inference_services/AwsBedrock.py +2 -0
- edsl/inference_services/AzureAI.py +2 -0
- edsl/inference_services/GoogleService.py +12 -7
- edsl/inference_services/InferenceServiceABC.py +85 -18
- edsl/inference_services/InferenceServicesCollection.py +79 -120
- edsl/inference_services/MistralAIService.py +3 -0
- edsl/inference_services/OpenAIService.py +35 -47
- edsl/inference_services/PerplexityService.py +3 -0
- edsl/inference_services/TestService.py +10 -11
- edsl/inference_services/TogetherAIService.py +3 -5
- edsl/jobs/Answers.py +14 -1
- edsl/jobs/Jobs.py +431 -356
- edsl/jobs/JobsChecks.py +10 -35
- edsl/jobs/JobsPrompts.py +4 -6
- edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
- edsl/jobs/buckets/BucketCollection.py +3 -44
- edsl/jobs/buckets/TokenBucket.py +21 -53
- edsl/jobs/interviews/Interview.py +408 -143
- edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
- edsl/jobs/runners/JobsRunnerStatus.py +165 -133
- edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
- edsl/jobs/tasks/TaskHistory.py +18 -38
- edsl/jobs/tasks/task_status_enum.py +2 -0
- edsl/language_models/KeyLookup.py +30 -0
- edsl/language_models/LanguageModel.py +236 -194
- edsl/language_models/ModelList.py +19 -28
- edsl/language_models/__init__.py +2 -1
- edsl/language_models/registry.py +190 -0
- edsl/language_models/repair.py +2 -2
- edsl/language_models/unused/ReplicateBase.py +83 -0
- edsl/language_models/utilities.py +4 -5
- edsl/notebooks/Notebook.py +14 -19
- edsl/prompts/Prompt.py +39 -29
- edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
- edsl/questions/QuestionBase.py +214 -68
- edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
- edsl/questions/QuestionBasePromptsMixin.py +3 -7
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +3 -3
- edsl/questions/QuestionExtract.py +7 -5
- edsl/questions/QuestionFreeText.py +3 -2
- edsl/questions/QuestionList.py +18 -10
- edsl/questions/QuestionMultipleChoice.py +23 -67
- edsl/questions/QuestionNumerical.py +4 -2
- edsl/questions/QuestionRank.py +17 -7
- edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
- edsl/questions/SimpleAskMixin.py +3 -4
- edsl/questions/__init__.py +1 -2
- edsl/questions/derived/QuestionLinearScale.py +3 -6
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +3 -17
- edsl/questions/question_registry.py +1 -1
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +7 -170
- edsl/results/DatasetExportMixin.py +305 -168
- edsl/results/DatasetTree.py +8 -28
- edsl/results/Result.py +206 -298
- edsl/results/Results.py +131 -149
- edsl/results/ResultsDBMixin.py +238 -0
- edsl/results/ResultsExportMixin.py +0 -2
- edsl/results/{results_selector.py → Selector.py} +13 -23
- edsl/results/TableDisplay.py +171 -98
- edsl/results/__init__.py +1 -1
- edsl/scenarios/FileStore.py +239 -150
- edsl/scenarios/Scenario.py +193 -90
- edsl/scenarios/ScenarioHtmlMixin.py +3 -4
- edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
- edsl/scenarios/ScenarioList.py +244 -415
- edsl/scenarios/ScenarioListExportMixin.py +7 -0
- edsl/scenarios/ScenarioListPdfMixin.py +37 -15
- edsl/scenarios/__init__.py +2 -1
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +12 -5
- edsl/surveys/Rule.py +4 -5
- edsl/surveys/RuleCollection.py +27 -25
- edsl/surveys/Survey.py +791 -270
- edsl/surveys/SurveyCSS.py +8 -20
- edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
- edsl/surveys/__init__.py +2 -4
- edsl/surveys/descriptors.py +2 -6
- edsl/surveys/instructions/ChangeInstruction.py +2 -1
- edsl/surveys/instructions/Instruction.py +13 -4
- edsl/surveys/instructions/InstructionCollection.py +6 -11
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/utilities.py +23 -35
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
- edsl-0.1.39.dev1.dist-info/RECORD +277 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
- edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
- edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
- edsl/agents/question_option_processor.py +0 -172
- edsl/coop/CoopFunctionsMixin.py +0 -15
- edsl/coop/ExpectedParrotKeyHandler.py +0 -125
- edsl/exceptions/inference_services.py +0 -5
- edsl/inference_services/AvailableModelCacheHandler.py +0 -184
- edsl/inference_services/AvailableModelFetcher.py +0 -215
- edsl/inference_services/ServiceAvailability.py +0 -135
- edsl/inference_services/data_structures.py +0 -134
- edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
- edsl/jobs/FetchInvigilator.py +0 -47
- edsl/jobs/InterviewTaskManager.py +0 -98
- edsl/jobs/InterviewsConstructor.py +0 -50
- edsl/jobs/JobsComponentConstructor.py +0 -189
- edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
- edsl/jobs/RequestTokenEstimator.py +0 -30
- edsl/jobs/async_interview_runner.py +0 -138
- edsl/jobs/buckets/TokenBucketAPI.py +0 -211
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/check_survey_scenario_compatibility.py +0 -85
- edsl/jobs/data_structures.py +0 -120
- edsl/jobs/decorators.py +0 -35
- edsl/jobs/jobs_status_enums.py +0 -9
- edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
- edsl/jobs/results_exceptions_handler.py +0 -98
- edsl/language_models/ComputeCost.py +0 -63
- edsl/language_models/PriceManager.py +0 -127
- edsl/language_models/RawResponseHandler.py +0 -106
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +0 -131
- edsl/language_models/model.py +0 -256
- edsl/notebooks/NotebookToLaTeX.py +0 -142
- edsl/questions/ExceptionExplainer.py +0 -77
- edsl/questions/HTMLQuestion.py +0 -103
- edsl/questions/QuestionMatrix.py +0 -265
- edsl/questions/data_structures.py +0 -20
- edsl/questions/loop_processor.py +0 -149
- edsl/questions/response_validator_factory.py +0 -34
- edsl/questions/templates/matrix/__init__.py +0 -1
- edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
- edsl/questions/templates/matrix/question_presentation.jinja +0 -20
- edsl/results/MarkdownToDocx.py +0 -122
- edsl/results/MarkdownToPDF.py +0 -111
- edsl/results/TextEditor.py +0 -50
- edsl/results/file_exports.py +0 -252
- edsl/results/smart_objects.py +0 -96
- edsl/results/table_data_class.py +0 -12
- edsl/results/table_renderers.py +0 -118
- edsl/scenarios/ConstructDownloadLink.py +0 -109
- edsl/scenarios/DocumentChunker.py +0 -102
- edsl/scenarios/DocxScenario.py +0 -16
- edsl/scenarios/PdfExtractor.py +0 -40
- edsl/scenarios/directory_scanner.py +0 -96
- edsl/scenarios/file_methods.py +0 -85
- edsl/scenarios/handlers/__init__.py +0 -13
- edsl/scenarios/handlers/csv.py +0 -49
- edsl/scenarios/handlers/docx.py +0 -76
- edsl/scenarios/handlers/html.py +0 -37
- edsl/scenarios/handlers/json.py +0 -111
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/scenarios/handlers/md.py +0 -51
- edsl/scenarios/handlers/pdf.py +0 -68
- edsl/scenarios/handlers/png.py +0 -39
- edsl/scenarios/handlers/pptx.py +0 -105
- edsl/scenarios/handlers/py.py +0 -294
- edsl/scenarios/handlers/sql.py +0 -313
- edsl/scenarios/handlers/sqlite.py +0 -149
- edsl/scenarios/handlers/txt.py +0 -33
- edsl/scenarios/scenario_selector.py +0 -156
- edsl/surveys/ConstructDAG.py +0 -92
- edsl/surveys/EditSurvey.py +0 -221
- edsl/surveys/InstructionHandler.py +0 -100
- edsl/surveys/MemoryManagement.py +0 -72
- edsl/surveys/RuleManager.py +0 -172
- edsl/surveys/Simulator.py +0 -75
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/utilities/PrettyList.py +0 -56
- edsl/utilities/is_notebook.py +0 -18
- edsl/utilities/is_valid_variable_name.py +0 -11
- edsl/utilities/remove_edsl_version.py +0 -24
- edsl-0.1.39.dist-info/RECORD +0 -358
- /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
- /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
- /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0
edsl/scenarios/FileStore.py
CHANGED
@@ -4,11 +4,111 @@ import tempfile
|
|
4
4
|
import mimetypes
|
5
5
|
import os
|
6
6
|
from typing import Dict, Any, IO, Optional
|
7
|
+
import requests
|
8
|
+
from urllib.parse import urlparse
|
7
9
|
|
8
|
-
|
9
|
-
from edsl.utilities.remove_edsl_version import remove_edsl_version
|
10
|
+
import google.generativeai as genai
|
10
11
|
|
11
|
-
from edsl
|
12
|
+
from edsl import Scenario
|
13
|
+
from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
|
14
|
+
from edsl.utilities.utilities import is_notebook
|
15
|
+
|
16
|
+
|
17
|
+
def view_csv(csv_path):
|
18
|
+
import pandas as pd
|
19
|
+
|
20
|
+
df = pd.read_csv(csv_path)
|
21
|
+
return df
|
22
|
+
|
23
|
+
|
24
|
+
def view_html(html_path):
|
25
|
+
import os
|
26
|
+
import subprocess
|
27
|
+
from IPython.display import IFrame, display, HTML
|
28
|
+
|
29
|
+
if os.path.exists(html_path):
|
30
|
+
if is_notebook():
|
31
|
+
# Display the HTML inline in Jupyter Notebook
|
32
|
+
display(IFrame(src=html_path, width=700, height=600))
|
33
|
+
display(
|
34
|
+
HTML(
|
35
|
+
f'<a href="{html_path}" target="_blank">Open HTML in a new tab</a>'
|
36
|
+
)
|
37
|
+
)
|
38
|
+
else:
|
39
|
+
try:
|
40
|
+
if (os_name := os.name) == "posix":
|
41
|
+
# Open with the default browser on macOS
|
42
|
+
subprocess.run(["open", html_path], check=True)
|
43
|
+
elif os_name == "nt":
|
44
|
+
# Open with the default browser on Windows
|
45
|
+
os.startfile(html_path)
|
46
|
+
else:
|
47
|
+
# Open with the default browser on Linux
|
48
|
+
subprocess.run(["xdg-open", html_path], check=True)
|
49
|
+
except Exception as e:
|
50
|
+
print(f"Error opening HTML file: {e}")
|
51
|
+
else:
|
52
|
+
print("HTML file was not found.")
|
53
|
+
|
54
|
+
|
55
|
+
def view_html(html_path):
|
56
|
+
import os
|
57
|
+
from IPython.display import display, HTML
|
58
|
+
|
59
|
+
if is_notebook():
|
60
|
+
with open(html_path, "r") as f:
|
61
|
+
html_content = f.read()
|
62
|
+
display(HTML(html_content))
|
63
|
+
else:
|
64
|
+
if os.path.exists(html_path):
|
65
|
+
try:
|
66
|
+
if (os_name := os.name) == "posix":
|
67
|
+
subprocess.run(["open", html_path], check=True)
|
68
|
+
elif os_name == "nt":
|
69
|
+
os.startfile(html_path)
|
70
|
+
else:
|
71
|
+
subprocess.run(["xdg-open", html_path], check=True)
|
72
|
+
except Exception as e:
|
73
|
+
print(f"Error opening file: {e}")
|
74
|
+
else:
|
75
|
+
print("File was not created successfully.")
|
76
|
+
|
77
|
+
|
78
|
+
def view_pdf(pdf_path):
|
79
|
+
import os
|
80
|
+
import subprocess
|
81
|
+
import os
|
82
|
+
from IPython.display import HTML, display
|
83
|
+
|
84
|
+
if is_notebook():
|
85
|
+
# Convert to absolute path if needed
|
86
|
+
with open(pdf_path, "rb") as f:
|
87
|
+
base64_pdf = base64.b64encode(f.read()).decode("utf-8")
|
88
|
+
|
89
|
+
html = f"""
|
90
|
+
<iframe
|
91
|
+
src="data:application/pdf;base64,{base64_pdf}"
|
92
|
+
width="800px"
|
93
|
+
height="800px"
|
94
|
+
type="application/pdf"
|
95
|
+
></iframe>
|
96
|
+
"""
|
97
|
+
display(HTML(html))
|
98
|
+
|
99
|
+
if os.path.exists(pdf_path):
|
100
|
+
try:
|
101
|
+
if (os_name := os.name) == "posix":
|
102
|
+
# for cool kids
|
103
|
+
subprocess.run(["open", pdf_path], check=True) # macOS
|
104
|
+
elif os_name == "nt":
|
105
|
+
os.startfile(pdf_path) # Windows
|
106
|
+
else:
|
107
|
+
subprocess.run(["xdg-open", pdf_path], check=True) # Linux
|
108
|
+
except Exception as e:
|
109
|
+
print(f"Error opening PDF: {e}")
|
110
|
+
else:
|
111
|
+
print("PDF file was not created successfully.")
|
12
112
|
|
13
113
|
|
14
114
|
class FileStore(Scenario):
|
@@ -22,7 +122,6 @@ class FileStore(Scenario):
|
|
22
122
|
suffix: Optional[str] = None,
|
23
123
|
base64_string: Optional[str] = None,
|
24
124
|
external_locations: Optional[Dict[str, str]] = None,
|
25
|
-
extracted_text: Optional[str] = None,
|
26
125
|
**kwargs,
|
27
126
|
):
|
28
127
|
if path is None and "filename" in kwargs:
|
@@ -38,11 +137,6 @@ class FileStore(Scenario):
|
|
38
137
|
)
|
39
138
|
self.base64_string = base64_string or self.encode_file_to_base64_string(path)
|
40
139
|
self.external_locations = external_locations or {}
|
41
|
-
|
42
|
-
self.extracted_text = (
|
43
|
-
self.extract_text() if extracted_text is None else extracted_text
|
44
|
-
)
|
45
|
-
|
46
140
|
super().__init__(
|
47
141
|
{
|
48
142
|
"path": path,
|
@@ -51,7 +145,6 @@ class FileStore(Scenario):
|
|
51
145
|
"suffix": self.suffix,
|
52
146
|
"mime_type": self.mime_type,
|
53
147
|
"external_locations": self.external_locations,
|
54
|
-
"extracted_text": self.extracted_text,
|
55
148
|
}
|
56
149
|
)
|
57
150
|
|
@@ -77,12 +170,88 @@ class FileStore(Scenario):
|
|
77
170
|
return "FileStore: self.path"
|
78
171
|
|
79
172
|
@classmethod
|
80
|
-
def example(cls, example_type="
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
173
|
+
def example(cls, example_type="text"):
|
174
|
+
import textwrap
|
175
|
+
import tempfile
|
176
|
+
|
177
|
+
if example_type == "png" or example_type == "image":
|
178
|
+
import importlib.resources
|
179
|
+
from pathlib import Path
|
180
|
+
|
181
|
+
# Get package root directory
|
182
|
+
package_root = Path(__file__).parent.parent.parent
|
183
|
+
logo_path = package_root / "static" / "logo.png"
|
184
|
+
return cls(str(logo_path))
|
185
|
+
|
186
|
+
if example_type == "text":
|
187
|
+
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
|
188
|
+
f.write(b"Hello, World!")
|
189
|
+
|
190
|
+
return cls(path=f.name)
|
191
|
+
|
192
|
+
elif example_type == "csv":
|
193
|
+
from edsl.results.Results import Results
|
194
|
+
|
195
|
+
r = Results.example()
|
196
|
+
|
197
|
+
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
|
198
|
+
r.to_csv(filename=f.name)
|
199
|
+
return cls(f.name)
|
200
|
+
|
201
|
+
elif example_type == "pdf":
|
202
|
+
pdf_string = textwrap.dedent(
|
203
|
+
"""\
|
204
|
+
%PDF-1.4
|
205
|
+
1 0 obj
|
206
|
+
<< /Type /Catalog /Pages 2 0 R >>
|
207
|
+
endobj
|
208
|
+
2 0 obj
|
209
|
+
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
210
|
+
endobj
|
211
|
+
3 0 obj
|
212
|
+
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
|
213
|
+
endobj
|
214
|
+
4 0 obj
|
215
|
+
<< /Length 44 >>
|
216
|
+
stream
|
217
|
+
BT
|
218
|
+
/F1 24 Tf
|
219
|
+
100 700 Td
|
220
|
+
(Hello, World!) Tj
|
221
|
+
ET
|
222
|
+
endstream
|
223
|
+
endobj
|
224
|
+
5 0 obj
|
225
|
+
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
226
|
+
endobj
|
227
|
+
6 0 obj
|
228
|
+
<< /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
|
229
|
+
endobj
|
230
|
+
xref
|
231
|
+
0 7
|
232
|
+
0000000000 65535 f
|
233
|
+
0000000010 00000 n
|
234
|
+
0000000053 00000 n
|
235
|
+
0000000100 00000 n
|
236
|
+
0000000173 00000 n
|
237
|
+
0000000232 00000 n
|
238
|
+
0000000272 00000 n
|
239
|
+
trailer
|
240
|
+
<< /Size 7 /Root 1 0 R >>
|
241
|
+
startxref
|
242
|
+
318
|
243
|
+
%%EOF"""
|
244
|
+
)
|
245
|
+
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
|
246
|
+
f.write(pdf_string.encode())
|
247
|
+
|
248
|
+
return cls(f.name)
|
249
|
+
|
250
|
+
elif example_type == "html":
|
251
|
+
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
|
252
|
+
f.write("<html><body><h1>Test</h1></body></html>".encode())
|
253
|
+
|
254
|
+
return cls(f.name)
|
86
255
|
|
87
256
|
@property
|
88
257
|
def size(self) -> int:
|
@@ -91,8 +260,6 @@ class FileStore(Scenario):
|
|
91
260
|
return os.path.getsize(self.path)
|
92
261
|
|
93
262
|
def upload_google(self, refresh: bool = False) -> None:
|
94
|
-
import google.generativeai as genai
|
95
|
-
|
96
263
|
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
97
264
|
google_info = genai.upload_file(self.path, mime_type=self.mime_type)
|
98
265
|
self.external_locations["google"] = google_info.to_dict()
|
@@ -104,21 +271,7 @@ class FileStore(Scenario):
|
|
104
271
|
return cls(**d)
|
105
272
|
|
106
273
|
def __repr__(self):
|
107
|
-
|
108
|
-
|
109
|
-
r = reprlib.Repr()
|
110
|
-
r.maxstring = 20 # Limit strings to 20 chars
|
111
|
-
r.maxother = 30 # Limit other types to 30 chars
|
112
|
-
|
113
|
-
params = ", ".join(f"{key}={r.repr(value)}" for key, value in self.data.items())
|
114
|
-
return f"{self.__class__.__name__}({params})"
|
115
|
-
|
116
|
-
def _repr_html_(self):
|
117
|
-
parent_html = super()._repr_html_()
|
118
|
-
from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
|
119
|
-
|
120
|
-
link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
|
121
|
-
return f"{parent_html}<br>{link}"
|
274
|
+
return f"FileStore(path='{self.path}')"
|
122
275
|
|
123
276
|
def encode_file_to_base64_string(self, file_path: str):
|
124
277
|
try:
|
@@ -143,44 +296,9 @@ class FileStore(Scenario):
|
|
143
296
|
|
144
297
|
def open(self) -> "IO":
|
145
298
|
if self.binary:
|
146
|
-
return self.base64_to_file(self
|
299
|
+
return self.base64_to_file(self["base64_string"], is_binary=True)
|
147
300
|
else:
|
148
|
-
return self.base64_to_text_file(self
|
149
|
-
|
150
|
-
def write(self, filename: Optional[str] = None) -> str:
|
151
|
-
"""
|
152
|
-
Write the file content to disk, either to a specified filename or a temporary file.
|
153
|
-
|
154
|
-
Args:
|
155
|
-
filename (Optional[str]): The destination filename. If None, creates a temporary file.
|
156
|
-
|
157
|
-
Returns:
|
158
|
-
str: The path to the written file.
|
159
|
-
"""
|
160
|
-
# Determine the mode based on binary flag
|
161
|
-
mode = "wb" if self.binary else "w"
|
162
|
-
|
163
|
-
# If no filename provided, create a temporary file
|
164
|
-
if filename is None:
|
165
|
-
from tempfile import NamedTemporaryFile
|
166
|
-
|
167
|
-
with NamedTemporaryFile(delete=False, suffix="." + self.suffix) as f:
|
168
|
-
filename = f.name
|
169
|
-
|
170
|
-
# Write the content using the appropriate mode
|
171
|
-
try:
|
172
|
-
with open(filename, mode) as f:
|
173
|
-
content = self.open().read()
|
174
|
-
# For text mode, ensure we're writing a string
|
175
|
-
if not self.binary and isinstance(content, bytes):
|
176
|
-
content = content.decode("utf-8")
|
177
|
-
f.write(content)
|
178
|
-
print(f"File written to {filename}")
|
179
|
-
except Exception as e:
|
180
|
-
print(f"Error writing file: {e}")
|
181
|
-
raise
|
182
|
-
|
183
|
-
# return filename
|
301
|
+
return self.base64_to_text_file(self["base64_string"])
|
184
302
|
|
185
303
|
@staticmethod
|
186
304
|
def base64_to_text_file(base64_string) -> "IO":
|
@@ -209,15 +327,6 @@ class FileStore(Scenario):
|
|
209
327
|
# Create a StringIO object for text data
|
210
328
|
return io.StringIO(text_data)
|
211
329
|
|
212
|
-
@property
|
213
|
-
def text(self):
|
214
|
-
if self.binary:
|
215
|
-
import warnings
|
216
|
-
|
217
|
-
warnings.warn("This is a binary file.")
|
218
|
-
else:
|
219
|
-
return self.base64_to_text_file(self.base64_string).read()
|
220
|
-
|
221
330
|
def to_tempfile(self, suffix=None):
|
222
331
|
if suffix is None:
|
223
332
|
suffix = self.suffix
|
@@ -226,7 +335,7 @@ class FileStore(Scenario):
|
|
226
335
|
self["base64_string"], is_binary=True
|
227
336
|
)
|
228
337
|
else:
|
229
|
-
file_like_object = self.base64_to_text_file(self
|
338
|
+
file_like_object = self.base64_to_text_file(self["base64_string"])
|
230
339
|
|
231
340
|
# Create a named temporary file
|
232
341
|
mode = "wb" if self.binary else "w"
|
@@ -243,23 +352,40 @@ class FileStore(Scenario):
|
|
243
352
|
|
244
353
|
return temp_file.name
|
245
354
|
|
246
|
-
def view(self) -> None:
|
247
|
-
|
248
|
-
if
|
249
|
-
|
250
|
-
else:
|
251
|
-
print(f"Viewing of {self.suffix} files is not supported.")
|
355
|
+
def view(self, max_size: int = 300) -> None:
|
356
|
+
# with self.open() as f:
|
357
|
+
if self.suffix == "csv":
|
358
|
+
return view_csv(self.path)
|
252
359
|
|
253
|
-
|
254
|
-
|
255
|
-
if handler and hasattr(handler, "extract_text"):
|
256
|
-
return handler(self.path).extract_text()
|
360
|
+
if self.suffix == "pdf":
|
361
|
+
view_pdf(self.path)
|
257
362
|
|
258
|
-
if
|
259
|
-
|
363
|
+
if self.suffix == "html":
|
364
|
+
view_html(self.path)
|
260
365
|
|
261
|
-
|
262
|
-
|
366
|
+
if self.suffix == "png" or self.suffix == "jpg" or self.suffix == "jpeg":
|
367
|
+
if is_notebook():
|
368
|
+
from IPython.display import Image
|
369
|
+
from PIL import Image as PILImage
|
370
|
+
|
371
|
+
if max_size:
|
372
|
+
# Open the image using Pillow
|
373
|
+
with PILImage.open(self.path) as img:
|
374
|
+
# Get original width and height
|
375
|
+
original_width, original_height = img.size
|
376
|
+
|
377
|
+
# Calculate the scaling factor
|
378
|
+
scale = min(
|
379
|
+
max_size / original_width, max_size / original_height
|
380
|
+
)
|
381
|
+
|
382
|
+
# Calculate new dimensions
|
383
|
+
new_width = int(original_width * scale)
|
384
|
+
new_height = int(original_height * scale)
|
385
|
+
|
386
|
+
return Image(self.path, width=new_width, height=new_height)
|
387
|
+
else:
|
388
|
+
return Image(self.path)
|
263
389
|
|
264
390
|
def push(
|
265
391
|
self, description: Optional[str] = None, visibility: str = "unlisted"
|
@@ -297,8 +423,6 @@ class FileStore(Scenario):
|
|
297
423
|
:param download_path: The path to save the downloaded file.
|
298
424
|
:param mime_type: The MIME type of the file. If None, it will be guessed from the file extension.
|
299
425
|
"""
|
300
|
-
import requests
|
301
|
-
from urllib.parse import urlparse
|
302
426
|
|
303
427
|
response = requests.get(url, stream=True)
|
304
428
|
response.raise_for_status() # Raises an HTTPError for bad responses
|
@@ -322,43 +446,6 @@ class FileStore(Scenario):
|
|
322
446
|
# Create and return a new File instance
|
323
447
|
return cls(download_path, mime_type=mime_type)
|
324
448
|
|
325
|
-
def create_link(self, custom_filename=None, style=None):
|
326
|
-
from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
|
327
|
-
|
328
|
-
return ConstructDownloadLink(self).create_link(custom_filename, style)
|
329
|
-
|
330
|
-
def to_pandas(self):
|
331
|
-
"""
|
332
|
-
Convert the file content to a pandas DataFrame if supported by the file handler.
|
333
|
-
|
334
|
-
Returns:
|
335
|
-
pandas.DataFrame: The data from the file as a DataFrame
|
336
|
-
|
337
|
-
Raises:
|
338
|
-
AttributeError: If the file type's handler doesn't support pandas conversion
|
339
|
-
"""
|
340
|
-
handler = FileMethods.get_handler(self.suffix)
|
341
|
-
if handler and hasattr(handler, "to_pandas"):
|
342
|
-
return handler(self.path).to_pandas()
|
343
|
-
raise AttributeError(
|
344
|
-
f"Converting {self.suffix} files to pandas DataFrame is not supported"
|
345
|
-
)
|
346
|
-
|
347
|
-
def __getattr__(self, name):
|
348
|
-
"""
|
349
|
-
Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
|
350
|
-
"""
|
351
|
-
if self.suffix == "csv":
|
352
|
-
# Get the pandas DataFrame
|
353
|
-
df = self.to_pandas()
|
354
|
-
# Check if the requested attribute exists in the DataFrame
|
355
|
-
if hasattr(df, name):
|
356
|
-
return getattr(df, name)
|
357
|
-
# If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
|
358
|
-
raise AttributeError(
|
359
|
-
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
360
|
-
)
|
361
|
-
|
362
449
|
|
363
450
|
class CSVFileStore(FileStore):
|
364
451
|
@classmethod
|
@@ -519,25 +606,27 @@ class HTMLFileStore(FileStore):
|
|
519
606
|
|
520
607
|
|
521
608
|
if __name__ == "__main__":
|
522
|
-
|
609
|
+
# file_path = "../conjure/examples/Ex11-2.sav"
|
610
|
+
# fs = FileStore(file_path)
|
611
|
+
# info = fs.push()
|
612
|
+
# print(info)
|
523
613
|
|
524
|
-
|
614
|
+
# fs = CSVFileStore.example()
|
615
|
+
# fs.to_tempfile()
|
616
|
+
# print(fs.view())
|
525
617
|
|
526
|
-
# fs =
|
618
|
+
# fs = PDFFileStore.example()
|
527
619
|
# fs.view()
|
528
620
|
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
# pdf_example.view()
|
537
|
-
# FileStore(pdf_example).view()
|
538
|
-
|
539
|
-
# pdf_methods = methods.get("pdf")
|
540
|
-
# file = pdf_methods().example()
|
541
|
-
# pdf_methods(file).view()
|
621
|
+
# fs = PDFFileStore("paper.pdf")
|
622
|
+
# fs.view()
|
623
|
+
# from edsl import Conjure
|
624
|
+
pass
|
625
|
+
# fs = PNGFileStore("logo.png")
|
626
|
+
# fs.view()
|
627
|
+
# fs.upload_google()
|
542
628
|
|
543
|
-
#
|
629
|
+
# c = Conjure(datafile_name=fs.to_tempfile())
|
630
|
+
# f = PDFFileStore("paper.pdf")
|
631
|
+
# print(f.to_tempfile())
|
632
|
+
# f.push()
|