edsl 0.1.38.dev3__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (341) hide show
  1. edsl/Base.py +413 -303
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +57 -49
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +1071 -858
  7. edsl/agents/AgentList.py +551 -362
  8. edsl/agents/Invigilator.py +284 -222
  9. edsl/agents/InvigilatorBase.py +257 -284
  10. edsl/agents/PromptConstructor.py +272 -353
  11. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  12. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  13. edsl/agents/__init__.py +2 -3
  14. edsl/agents/descriptors.py +99 -99
  15. edsl/agents/prompt_helpers.py +129 -129
  16. edsl/agents/question_option_processor.py +172 -0
  17. edsl/auto/AutoStudy.py +130 -117
  18. edsl/auto/StageBase.py +243 -230
  19. edsl/auto/StageGenerateSurvey.py +178 -178
  20. edsl/auto/StageLabelQuestions.py +125 -125
  21. edsl/auto/StagePersona.py +61 -61
  22. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  23. edsl/auto/StagePersonaDimensionValues.py +74 -74
  24. edsl/auto/StagePersonaDimensions.py +69 -69
  25. edsl/auto/StageQuestions.py +74 -73
  26. edsl/auto/SurveyCreatorPipeline.py +21 -21
  27. edsl/auto/utilities.py +218 -224
  28. edsl/base/Base.py +279 -279
  29. edsl/config.py +177 -149
  30. edsl/conversation/Conversation.py +290 -290
  31. edsl/conversation/car_buying.py +59 -58
  32. edsl/conversation/chips.py +95 -95
  33. edsl/conversation/mug_negotiation.py +81 -81
  34. edsl/conversation/next_speaker_utilities.py +93 -93
  35. edsl/coop/CoopFunctionsMixin.py +15 -0
  36. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  37. edsl/coop/PriceFetcher.py +54 -54
  38. edsl/coop/__init__.py +2 -2
  39. edsl/coop/coop.py +1106 -961
  40. edsl/coop/utils.py +131 -131
  41. edsl/data/Cache.py +573 -530
  42. edsl/data/CacheEntry.py +230 -228
  43. edsl/data/CacheHandler.py +168 -149
  44. edsl/data/RemoteCacheSync.py +186 -97
  45. edsl/data/SQLiteDict.py +292 -292
  46. edsl/data/__init__.py +5 -4
  47. edsl/data/orm.py +10 -10
  48. edsl/data_transfer_models.py +74 -73
  49. edsl/enums.py +202 -173
  50. edsl/exceptions/BaseException.py +21 -21
  51. edsl/exceptions/__init__.py +54 -54
  52. edsl/exceptions/agents.py +54 -42
  53. edsl/exceptions/cache.py +5 -5
  54. edsl/exceptions/configuration.py +16 -16
  55. edsl/exceptions/coop.py +10 -10
  56. edsl/exceptions/data.py +14 -14
  57. edsl/exceptions/general.py +34 -34
  58. edsl/exceptions/inference_services.py +5 -0
  59. edsl/exceptions/jobs.py +33 -33
  60. edsl/exceptions/language_models.py +63 -63
  61. edsl/exceptions/prompts.py +15 -15
  62. edsl/exceptions/questions.py +109 -91
  63. edsl/exceptions/results.py +29 -29
  64. edsl/exceptions/scenarios.py +29 -22
  65. edsl/exceptions/surveys.py +37 -37
  66. edsl/inference_services/AnthropicService.py +106 -87
  67. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  68. edsl/inference_services/AvailableModelFetcher.py +215 -0
  69. edsl/inference_services/AwsBedrock.py +118 -120
  70. edsl/inference_services/AzureAI.py +215 -217
  71. edsl/inference_services/DeepInfraService.py +18 -18
  72. edsl/inference_services/GoogleService.py +143 -156
  73. edsl/inference_services/GroqService.py +20 -20
  74. edsl/inference_services/InferenceServiceABC.py +80 -147
  75. edsl/inference_services/InferenceServicesCollection.py +138 -97
  76. edsl/inference_services/MistralAIService.py +120 -123
  77. edsl/inference_services/OllamaService.py +18 -18
  78. edsl/inference_services/OpenAIService.py +236 -224
  79. edsl/inference_services/PerplexityService.py +160 -0
  80. edsl/inference_services/ServiceAvailability.py +135 -0
  81. edsl/inference_services/TestService.py +90 -89
  82. edsl/inference_services/TogetherAIService.py +172 -170
  83. edsl/inference_services/data_structures.py +134 -0
  84. edsl/inference_services/models_available_cache.py +118 -118
  85. edsl/inference_services/rate_limits_cache.py +25 -25
  86. edsl/inference_services/registry.py +41 -39
  87. edsl/inference_services/write_available.py +10 -10
  88. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  89. edsl/jobs/Answers.py +43 -56
  90. edsl/jobs/FetchInvigilator.py +47 -0
  91. edsl/jobs/InterviewTaskManager.py +98 -0
  92. edsl/jobs/InterviewsConstructor.py +50 -0
  93. edsl/jobs/Jobs.py +823 -1358
  94. edsl/jobs/JobsChecks.py +172 -0
  95. edsl/jobs/JobsComponentConstructor.py +189 -0
  96. edsl/jobs/JobsPrompts.py +270 -0
  97. edsl/jobs/JobsRemoteInferenceHandler.py +311 -0
  98. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  99. edsl/jobs/RequestTokenEstimator.py +30 -0
  100. edsl/jobs/__init__.py +1 -1
  101. edsl/jobs/async_interview_runner.py +138 -0
  102. edsl/jobs/buckets/BucketCollection.py +104 -63
  103. edsl/jobs/buckets/ModelBuckets.py +65 -65
  104. edsl/jobs/buckets/TokenBucket.py +283 -251
  105. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  106. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  107. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  108. edsl/jobs/data_structures.py +120 -0
  109. edsl/jobs/decorators.py +35 -0
  110. edsl/jobs/interviews/Interview.py +396 -661
  111. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  112. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  113. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  114. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  115. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  116. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  117. edsl/jobs/interviews/ReportErrors.py +66 -66
  118. edsl/jobs/interviews/interview_status_enum.py +9 -9
  119. edsl/jobs/jobs_status_enums.py +9 -0
  120. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  121. edsl/jobs/results_exceptions_handler.py +98 -0
  122. edsl/jobs/runners/JobsRunnerAsyncio.py +151 -361
  123. edsl/jobs/runners/JobsRunnerStatus.py +298 -332
  124. edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
  125. edsl/jobs/tasks/TaskCreators.py +64 -64
  126. edsl/jobs/tasks/TaskHistory.py +470 -451
  127. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  128. edsl/jobs/tasks/task_status_enum.py +161 -163
  129. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  130. edsl/jobs/tokens/TokenUsage.py +34 -34
  131. edsl/language_models/ComputeCost.py +63 -0
  132. edsl/language_models/LanguageModel.py +626 -708
  133. edsl/language_models/ModelList.py +164 -109
  134. edsl/language_models/PriceManager.py +127 -0
  135. edsl/language_models/RawResponseHandler.py +106 -0
  136. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  137. edsl/language_models/ServiceDataSources.py +0 -0
  138. edsl/language_models/__init__.py +2 -3
  139. edsl/language_models/fake_openai_call.py +15 -15
  140. edsl/language_models/fake_openai_service.py +61 -61
  141. edsl/language_models/key_management/KeyLookup.py +63 -0
  142. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  143. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  144. edsl/language_models/key_management/__init__.py +0 -0
  145. edsl/language_models/key_management/models.py +131 -0
  146. edsl/language_models/model.py +256 -0
  147. edsl/language_models/repair.py +156 -156
  148. edsl/language_models/utilities.py +65 -64
  149. edsl/notebooks/Notebook.py +263 -258
  150. edsl/notebooks/NotebookToLaTeX.py +142 -0
  151. edsl/notebooks/__init__.py +1 -1
  152. edsl/prompts/Prompt.py +352 -357
  153. edsl/prompts/__init__.py +2 -2
  154. edsl/questions/ExceptionExplainer.py +77 -0
  155. edsl/questions/HTMLQuestion.py +103 -0
  156. edsl/questions/QuestionBase.py +518 -660
  157. edsl/questions/QuestionBasePromptsMixin.py +221 -217
  158. edsl/questions/QuestionBudget.py +227 -227
  159. edsl/questions/QuestionCheckBox.py +359 -359
  160. edsl/questions/QuestionExtract.py +180 -183
  161. edsl/questions/QuestionFreeText.py +113 -114
  162. edsl/questions/QuestionFunctional.py +166 -166
  163. edsl/questions/QuestionList.py +223 -231
  164. edsl/questions/QuestionMatrix.py +265 -0
  165. edsl/questions/QuestionMultipleChoice.py +330 -286
  166. edsl/questions/QuestionNumerical.py +151 -153
  167. edsl/questions/QuestionRank.py +314 -324
  168. edsl/questions/Quick.py +41 -41
  169. edsl/questions/SimpleAskMixin.py +74 -73
  170. edsl/questions/__init__.py +27 -26
  171. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
  172. edsl/questions/compose_questions.py +98 -98
  173. edsl/questions/data_structures.py +20 -0
  174. edsl/questions/decorators.py +21 -21
  175. edsl/questions/derived/QuestionLikertFive.py +76 -76
  176. edsl/questions/derived/QuestionLinearScale.py +90 -87
  177. edsl/questions/derived/QuestionTopK.py +93 -93
  178. edsl/questions/derived/QuestionYesNo.py +82 -82
  179. edsl/questions/descriptors.py +427 -413
  180. edsl/questions/loop_processor.py +149 -0
  181. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  182. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  183. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  184. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  185. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  186. edsl/questions/prompt_templates/question_list.jinja +17 -17
  187. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  188. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  189. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
  190. edsl/questions/question_registry.py +177 -147
  191. edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
  192. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
  193. edsl/questions/response_validator_factory.py +34 -0
  194. edsl/questions/settings.py +12 -12
  195. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  196. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  197. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  198. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  199. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  200. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  201. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  202. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  203. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  204. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  205. edsl/questions/templates/list/question_presentation.jinja +5 -5
  206. edsl/questions/templates/matrix/__init__.py +1 -0
  207. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  208. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  209. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  210. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  211. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  212. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  213. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  214. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  215. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  216. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  217. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  218. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  219. edsl/results/CSSParameterizer.py +108 -0
  220. edsl/results/Dataset.py +587 -293
  221. edsl/results/DatasetExportMixin.py +594 -717
  222. edsl/results/DatasetTree.py +295 -145
  223. edsl/results/MarkdownToDocx.py +122 -0
  224. edsl/results/MarkdownToPDF.py +111 -0
  225. edsl/results/Result.py +557 -456
  226. edsl/results/Results.py +1183 -1071
  227. edsl/results/ResultsExportMixin.py +45 -43
  228. edsl/results/ResultsGGMixin.py +121 -121
  229. edsl/results/TableDisplay.py +125 -0
  230. edsl/results/TextEditor.py +50 -0
  231. edsl/results/__init__.py +2 -2
  232. edsl/results/file_exports.py +252 -0
  233. edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
  234. edsl/results/{Selector.py → results_selector.py} +145 -135
  235. edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
  236. edsl/results/smart_objects.py +96 -0
  237. edsl/results/table_data_class.py +12 -0
  238. edsl/results/table_display.css +78 -0
  239. edsl/results/table_renderers.py +118 -0
  240. edsl/results/tree_explore.py +115 -115
  241. edsl/scenarios/ConstructDownloadLink.py +109 -0
  242. edsl/scenarios/DocumentChunker.py +102 -0
  243. edsl/scenarios/DocxScenario.py +16 -0
  244. edsl/scenarios/FileStore.py +543 -458
  245. edsl/scenarios/PdfExtractor.py +40 -0
  246. edsl/scenarios/Scenario.py +498 -544
  247. edsl/scenarios/ScenarioHtmlMixin.py +65 -64
  248. edsl/scenarios/ScenarioList.py +1458 -1112
  249. edsl/scenarios/ScenarioListExportMixin.py +45 -52
  250. edsl/scenarios/ScenarioListPdfMixin.py +239 -261
  251. edsl/scenarios/__init__.py +3 -4
  252. edsl/scenarios/directory_scanner.py +96 -0
  253. edsl/scenarios/file_methods.py +85 -0
  254. edsl/scenarios/handlers/__init__.py +13 -0
  255. edsl/scenarios/handlers/csv.py +49 -0
  256. edsl/scenarios/handlers/docx.py +76 -0
  257. edsl/scenarios/handlers/html.py +37 -0
  258. edsl/scenarios/handlers/json.py +111 -0
  259. edsl/scenarios/handlers/latex.py +5 -0
  260. edsl/scenarios/handlers/md.py +51 -0
  261. edsl/scenarios/handlers/pdf.py +68 -0
  262. edsl/scenarios/handlers/png.py +39 -0
  263. edsl/scenarios/handlers/pptx.py +105 -0
  264. edsl/scenarios/handlers/py.py +294 -0
  265. edsl/scenarios/handlers/sql.py +313 -0
  266. edsl/scenarios/handlers/sqlite.py +149 -0
  267. edsl/scenarios/handlers/txt.py +33 -0
  268. edsl/scenarios/scenario_join.py +131 -0
  269. edsl/scenarios/scenario_selector.py +156 -0
  270. edsl/shared.py +1 -1
  271. edsl/study/ObjectEntry.py +173 -173
  272. edsl/study/ProofOfWork.py +113 -113
  273. edsl/study/SnapShot.py +80 -80
  274. edsl/study/Study.py +521 -528
  275. edsl/study/__init__.py +4 -4
  276. edsl/surveys/ConstructDAG.py +92 -0
  277. edsl/surveys/DAG.py +148 -148
  278. edsl/surveys/EditSurvey.py +221 -0
  279. edsl/surveys/InstructionHandler.py +100 -0
  280. edsl/surveys/Memory.py +31 -31
  281. edsl/surveys/MemoryManagement.py +72 -0
  282. edsl/surveys/MemoryPlan.py +244 -244
  283. edsl/surveys/Rule.py +327 -326
  284. edsl/surveys/RuleCollection.py +385 -387
  285. edsl/surveys/RuleManager.py +172 -0
  286. edsl/surveys/Simulator.py +75 -0
  287. edsl/surveys/Survey.py +1280 -1787
  288. edsl/surveys/SurveyCSS.py +273 -261
  289. edsl/surveys/SurveyExportMixin.py +259 -259
  290. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -121
  291. edsl/surveys/SurveyQualtricsImport.py +284 -284
  292. edsl/surveys/SurveyToApp.py +141 -0
  293. edsl/surveys/__init__.py +5 -3
  294. edsl/surveys/base.py +53 -53
  295. edsl/surveys/descriptors.py +60 -56
  296. edsl/surveys/instructions/ChangeInstruction.py +48 -49
  297. edsl/surveys/instructions/Instruction.py +56 -53
  298. edsl/surveys/instructions/InstructionCollection.py +82 -77
  299. edsl/templates/error_reporting/base.html +23 -23
  300. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  301. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  302. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  303. edsl/templates/error_reporting/interview_details.html +115 -115
  304. edsl/templates/error_reporting/interviews.html +19 -10
  305. edsl/templates/error_reporting/overview.html +4 -4
  306. edsl/templates/error_reporting/performance_plot.html +1 -1
  307. edsl/templates/error_reporting/report.css +73 -73
  308. edsl/templates/error_reporting/report.html +117 -117
  309. edsl/templates/error_reporting/report.js +25 -25
  310. edsl/tools/__init__.py +1 -1
  311. edsl/tools/clusters.py +192 -192
  312. edsl/tools/embeddings.py +27 -27
  313. edsl/tools/embeddings_plotting.py +118 -118
  314. edsl/tools/plotting.py +112 -112
  315. edsl/tools/summarize.py +18 -18
  316. edsl/utilities/PrettyList.py +56 -0
  317. edsl/utilities/SystemInfo.py +28 -28
  318. edsl/utilities/__init__.py +22 -22
  319. edsl/utilities/ast_utilities.py +25 -25
  320. edsl/utilities/data/Registry.py +6 -6
  321. edsl/utilities/data/__init__.py +1 -1
  322. edsl/utilities/data/scooter_results.json +1 -1
  323. edsl/utilities/decorators.py +77 -77
  324. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  325. edsl/utilities/interface.py +627 -627
  326. edsl/utilities/is_notebook.py +18 -0
  327. edsl/utilities/is_valid_variable_name.py +11 -0
  328. edsl/utilities/naming_utilities.py +263 -263
  329. edsl/utilities/remove_edsl_version.py +24 -0
  330. edsl/utilities/repair_functions.py +28 -28
  331. edsl/utilities/restricted_python.py +70 -70
  332. edsl/utilities/utilities.py +436 -409
  333. {edsl-0.1.38.dev3.dist-info → edsl-0.1.39.dist-info}/LICENSE +21 -21
  334. {edsl-0.1.38.dev3.dist-info → edsl-0.1.39.dist-info}/METADATA +13 -10
  335. edsl-0.1.39.dist-info/RECORD +358 -0
  336. {edsl-0.1.38.dev3.dist-info → edsl-0.1.39.dist-info}/WHEEL +1 -1
  337. edsl/language_models/KeyLookup.py +0 -30
  338. edsl/language_models/registry.py +0 -137
  339. edsl/language_models/unused/ReplicateBase.py +0 -83
  340. edsl/results/ResultsDBMixin.py +0 -238
  341. edsl-0.1.38.dev3.dist-info/RECORD +0 -269
@@ -1,458 +1,543 @@
1
- import base64
2
- import io
3
- import tempfile
4
- import mimetypes
5
- import os
6
- from typing import Dict, Any, IO, Optional
7
- import requests
8
- from urllib.parse import urlparse
9
-
10
- import google.generativeai as genai
11
-
12
- from edsl import Scenario
13
- from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
14
- from edsl.utilities.utilities import is_notebook
15
-
16
-
17
- def view_pdf(pdf_path):
18
- import os
19
- import subprocess
20
-
21
- if is_notebook():
22
- from IPython.display import IFrame
23
- from IPython.display import display, HTML
24
-
25
- # Replace 'path/to/your/file.pdf' with the actual path to your PDF file
26
- IFrame(pdf_path, width=700, height=600)
27
- display(HTML(f'<a href="{pdf_path}" target="_blank">Open PDF</a>'))
28
- return
29
-
30
- if os.path.exists(pdf_path):
31
- try:
32
- if (os_name := os.name) == "posix":
33
- # for cool kids
34
- subprocess.run(["open", pdf_path], check=True) # macOS
35
- elif os_name == "nt":
36
- os.startfile(pdf_path) # Windows
37
- else:
38
- subprocess.run(["xdg-open", pdf_path], check=True) # Linux
39
- except Exception as e:
40
- print(f"Error opening PDF: {e}")
41
- else:
42
- print("PDF file was not created successfully.")
43
-
44
-
45
- class FileStore(Scenario):
46
- def __init__(
47
- self,
48
- path: Optional[str] = None,
49
- mime_type: Optional[str] = None,
50
- binary: Optional[bool] = None,
51
- suffix: Optional[str] = None,
52
- base64_string: Optional[str] = None,
53
- external_locations: Optional[Dict[str, str]] = None,
54
- **kwargs,
55
- ):
56
- if path is None and "filename" in kwargs:
57
- path = kwargs["filename"]
58
- self.path = path
59
- self.suffix = suffix or path.split(".")[-1]
60
- self.binary = binary or False
61
- self.mime_type = (
62
- mime_type or mimetypes.guess_type(path)[0] or "application/octet-stream"
63
- )
64
- self.base64_string = base64_string or self.encode_file_to_base64_string(path)
65
- self.external_locations = external_locations or {}
66
- super().__init__(
67
- {
68
- "path": self.path,
69
- "base64_string": self.base64_string,
70
- "binary": self.binary,
71
- "suffix": self.suffix,
72
- "mime_type": self.mime_type,
73
- "external_locations": self.external_locations,
74
- }
75
- )
76
-
77
- def __str__(self):
78
- return "FileStore: self.path"
79
-
80
- @classmethod
81
- def example(self):
82
- import tempfile
83
-
84
- with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
85
- f.write(b"Hello, World!")
86
-
87
- return self(path=f.name)
88
-
89
- @property
90
- def size(self) -> int:
91
- if self.base64_string != None:
92
- return (len(self.base64_string) / 4.0) * 3 # from base64 to char size
93
- return os.path.getsize(self.path)
94
-
95
- def upload_google(self, refresh: bool = False) -> None:
96
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
97
- google_info = genai.upload_file(self.path, mime_type=self.mime_type)
98
- self.external_locations["google"] = google_info.to_dict()
99
-
100
- @classmethod
101
- @remove_edsl_version
102
- def from_dict(cls, d):
103
- # return cls(d["filename"], d["binary"], d["suffix"], d["base64_string"])
104
- return cls(**d)
105
-
106
- def __repr__(self):
107
- return f"FileStore(path='{self.path}')"
108
-
109
- def encode_file_to_base64_string(self, file_path: str):
110
- try:
111
- # Attempt to open the file in text mode
112
- with open(file_path, "r") as text_file:
113
- # Read the text data
114
- text_data = text_file.read()
115
- # Encode the text data to a base64 string
116
- base64_encoded_data = base64.b64encode(text_data.encode("utf-8"))
117
- except UnicodeDecodeError:
118
- # If reading as text fails, open the file in binary mode
119
- with open(file_path, "rb") as binary_file:
120
- # Read the binary data
121
- binary_data = binary_file.read()
122
- # Encode the binary data to a base64 string
123
- base64_encoded_data = base64.b64encode(binary_data)
124
- self.binary = True
125
- # Convert the base64 bytes to a string
126
- base64_string = base64_encoded_data.decode("utf-8")
127
-
128
- return base64_string
129
-
130
- def open(self) -> "IO":
131
- if self.binary:
132
- return self.base64_to_file(self["base64_string"], is_binary=True)
133
- else:
134
- return self.base64_to_text_file(self["base64_string"])
135
-
136
- @staticmethod
137
- def base64_to_text_file(base64_string) -> "IO":
138
- # Decode the base64 string to bytes
139
- text_data_bytes = base64.b64decode(base64_string)
140
-
141
- # Convert bytes to string
142
- text_data = text_data_bytes.decode("utf-8")
143
-
144
- # Create a StringIO object from the text data
145
- text_file = io.StringIO(text_data)
146
-
147
- return text_file
148
-
149
- @staticmethod
150
- def base64_to_file(base64_string, is_binary=True):
151
- # Decode the base64 string to bytes
152
- file_data = base64.b64decode(base64_string)
153
-
154
- if is_binary:
155
- # Create a BytesIO object for binary data
156
- return io.BytesIO(file_data)
157
- else:
158
- # Convert bytes to string for text data
159
- text_data = file_data.decode("utf-8")
160
- # Create a StringIO object for text data
161
- return io.StringIO(text_data)
162
-
163
- def to_tempfile(self, suffix=None):
164
- if suffix is None:
165
- suffix = self.suffix
166
- if self.binary:
167
- file_like_object = self.base64_to_file(
168
- self["base64_string"], is_binary=True
169
- )
170
- else:
171
- file_like_object = self.base64_to_text_file(self["base64_string"])
172
-
173
- # Create a named temporary file
174
- mode = "wb" if self.binary else "w"
175
- temp_file = tempfile.NamedTemporaryFile(
176
- delete=False, suffix="." + suffix, mode=mode
177
- )
178
-
179
- if self.binary:
180
- temp_file.write(file_like_object.read())
181
- else:
182
- temp_file.write(file_like_object.read())
183
-
184
- temp_file.close()
185
-
186
- return temp_file.name
187
-
188
- def view(self, max_size: int = 300) -> None:
189
- if self.suffix == "pdf":
190
- view_pdf(self.path)
191
-
192
- if self.suffix == "png" or self.suffix == "jpg" or self.suffix == "jpeg":
193
- if is_notebook():
194
- from IPython.display import Image
195
- from PIL import Image as PILImage
196
-
197
- if max_size:
198
- # Open the image using Pillow
199
- with PILImage.open(self.path) as img:
200
- # Get original width and height
201
- original_width, original_height = img.size
202
-
203
- # Calculate the scaling factor
204
- scale = min(
205
- max_size / original_width, max_size / original_height
206
- )
207
-
208
- # Calculate new dimensions
209
- new_width = int(original_width * scale)
210
- new_height = int(original_height * scale)
211
-
212
- return Image(self.path, width=new_width, height=new_height)
213
- else:
214
- return Image(self.path)
215
-
216
- def push(
217
- self, description: Optional[str] = None, visibility: str = "unlisted"
218
- ) -> dict:
219
- """
220
- Push the object to Coop.
221
- :param description: The description of the object to push.
222
- :param visibility: The visibility of the object to push.
223
- """
224
- scenario_version = Scenario.from_dict(self.to_dict())
225
- if description is None:
226
- description = "File: " + self.path
227
- info = scenario_version.push(description=description, visibility=visibility)
228
- return info
229
-
230
- @classmethod
231
- def pull(cls, uuid: str, expected_parrot_url: Optional[str] = None) -> "FileStore":
232
- """
233
- :param uuid: The UUID of the object to pull.
234
- :param expected_parrot_url: The URL of the Parrot server to use.
235
- :return: The object pulled from the Parrot server.
236
- """
237
- scenario_version = Scenario.pull(uuid, expected_parrot_url=expected_parrot_url)
238
- return cls.from_dict(scenario_version.to_dict())
239
-
240
- @classmethod
241
- def from_url(
242
- cls,
243
- url: str,
244
- download_path: Optional[str] = None,
245
- mime_type: Optional[str] = None,
246
- ) -> "FileStore":
247
- """
248
- :param url: The URL of the file to download.
249
- :param download_path: The path to save the downloaded file.
250
- :param mime_type: The MIME type of the file. If None, it will be guessed from the file extension.
251
- """
252
-
253
- response = requests.get(url, stream=True)
254
- response.raise_for_status() # Raises an HTTPError for bad responses
255
-
256
- # Get the filename from the URL if download_path is not provided
257
- if download_path is None:
258
- filename = os.path.basename(urlparse(url).path)
259
- if not filename:
260
- filename = "downloaded_file"
261
- # download_path = filename
262
- download_path = os.path.join(os.getcwd(), filename)
263
-
264
- # Ensure the directory exists
265
- os.makedirs(os.path.dirname(download_path), exist_ok=True)
266
-
267
- # Write the file
268
- with open(download_path, "wb") as file:
269
- for chunk in response.iter_content(chunk_size=8192):
270
- file.write(chunk)
271
-
272
- # Create and return a new File instance
273
- return cls(download_path, mime_type=mime_type)
274
-
275
-
276
- class CSVFileStore(FileStore):
277
- @classmethod
278
- def example(cls):
279
- from edsl.results.Results import Results
280
-
281
- r = Results.example()
282
- import tempfile
283
-
284
- with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
285
- r.to_csv(filename=f.name)
286
-
287
- return cls(f.name)
288
-
289
- def view(self):
290
- import pandas as pd
291
-
292
- return pd.read_csv(self.to_tempfile())
293
-
294
-
295
- class PDFFileStore(FileStore):
296
- def view(self):
297
- pdf_path = self.to_tempfile()
298
- print(f"PDF path: {pdf_path}") # Print the path to ensure it exists
299
- import os
300
- import subprocess
301
-
302
- if os.path.exists(pdf_path):
303
- try:
304
- if os.name == "posix":
305
- # for cool kids
306
- subprocess.run(["open", pdf_path], check=True) # macOS
307
- elif os.name == "nt":
308
- os.startfile(pdf_path) # Windows
309
- else:
310
- subprocess.run(["xdg-open", pdf_path], check=True) # Linux
311
- except Exception as e:
312
- print(f"Error opening PDF: {e}")
313
- else:
314
- print("PDF file was not created successfully.")
315
-
316
- @classmethod
317
- def example(cls):
318
- import textwrap
319
-
320
- pdf_string = textwrap.dedent(
321
- """\
322
- %PDF-1.4
323
- 1 0 obj
324
- << /Type /Catalog /Pages 2 0 R >>
325
- endobj
326
- 2 0 obj
327
- << /Type /Pages /Kids [3 0 R] /Count 1 >>
328
- endobj
329
- 3 0 obj
330
- << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
331
- endobj
332
- 4 0 obj
333
- << /Length 44 >>
334
- stream
335
- BT
336
- /F1 24 Tf
337
- 100 700 Td
338
- (Hello, World!) Tj
339
- ET
340
- endstream
341
- endobj
342
- 5 0 obj
343
- << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
344
- endobj
345
- 6 0 obj
346
- << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
347
- endobj
348
- xref
349
- 0 7
350
- 0000000000 65535 f
351
- 0000000010 00000 n
352
- 0000000053 00000 n
353
- 0000000100 00000 n
354
- 0000000173 00000 n
355
- 0000000232 00000 n
356
- 0000000272 00000 n
357
- trailer
358
- << /Size 7 /Root 1 0 R >>
359
- startxref
360
- 318
361
- %%EOF"""
362
- )
363
- import tempfile
364
-
365
- with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
366
- f.write(pdf_string.encode())
367
-
368
- return cls(f.name)
369
-
370
-
371
- class PNGFileStore(FileStore):
372
- @classmethod
373
- def example(cls):
374
- import textwrap
375
-
376
- png_string = textwrap.dedent(
377
- """\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\x0cIDAT\x08\xd7c\x00\x01"""
378
- )
379
- import tempfile
380
-
381
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
382
- f.write(png_string.encode())
383
-
384
- return cls(f.name)
385
-
386
- def view(self):
387
- import matplotlib.pyplot as plt
388
- import matplotlib.image as mpimg
389
-
390
- img = mpimg.imread(self.to_tempfile())
391
- plt.imshow(img)
392
- plt.show()
393
-
394
-
395
- class SQLiteFileStore(FileStore):
396
- @classmethod
397
- def example(cls):
398
- import sqlite3
399
- import tempfile
400
-
401
- with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
402
- conn = sqlite3.connect(f.name)
403
- c = conn.cursor()
404
- c.execute("""CREATE TABLE stocks (date text)""")
405
- conn.commit()
406
-
407
- return cls(f.name)
408
-
409
- def view(self):
410
- import subprocess
411
- import os
412
-
413
- sqlite_path = self.to_tempfile()
414
- os.system(f"sqlite3 {sqlite_path}")
415
-
416
-
417
- class HTMLFileStore(FileStore):
418
- @classmethod
419
- def example(cls):
420
- import tempfile
421
-
422
- with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
423
- f.write("<html><body><h1>Test</h1></body></html>".encode())
424
-
425
- return cls(f.name)
426
-
427
- def view(self):
428
- import webbrowser
429
-
430
- html_path = self.to_tempfile()
431
- webbrowser.open("file://" + html_path)
432
-
433
-
434
- if __name__ == "__main__":
435
- # file_path = "../conjure/examples/Ex11-2.sav"
436
- # fs = FileStore(file_path)
437
- # info = fs.push()
438
- # print(info)
439
-
440
- # fs = CSVFileStore.example()
441
- # fs.to_tempfile()
442
- # print(fs.view())
443
-
444
- # fs = PDFFileStore.example()
445
- # fs.view()
446
-
447
- # fs = PDFFileStore("paper.pdf")
448
- # fs.view()
449
- # from edsl import Conjure
450
- pass
451
- # fs = PNGFileStore("logo.png")
452
- # fs.view()
453
- # fs.upload_google()
454
-
455
- # c = Conjure(datafile_name=fs.to_tempfile())
456
- # f = PDFFileStore("paper.pdf")
457
- # print(f.to_tempfile())
458
- # f.push()
1
+ import base64
2
+ import io
3
+ import tempfile
4
+ import mimetypes
5
+ import os
6
+ from typing import Dict, Any, IO, Optional
7
+
8
+ from edsl.scenarios.Scenario import Scenario
9
+ from edsl.utilities.remove_edsl_version import remove_edsl_version
10
+
11
+ from edsl.scenarios.file_methods import FileMethods
12
+
13
+
14
+ class FileStore(Scenario):
15
+ __documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
16
+
17
+ def __init__(
18
+ self,
19
+ path: Optional[str] = None,
20
+ mime_type: Optional[str] = None,
21
+ binary: Optional[bool] = None,
22
+ suffix: Optional[str] = None,
23
+ base64_string: Optional[str] = None,
24
+ external_locations: Optional[Dict[str, str]] = None,
25
+ extracted_text: Optional[str] = None,
26
+ **kwargs,
27
+ ):
28
+ if path is None and "filename" in kwargs:
29
+ path = kwargs["filename"]
30
+
31
+ self._path = path # Store the original path privately
32
+ self._temp_path = None # Track any generated temporary file
33
+
34
+ self.suffix = suffix or path.split(".")[-1]
35
+ self.binary = binary or False
36
+ self.mime_type = (
37
+ mime_type or mimetypes.guess_type(path)[0] or "application/octet-stream"
38
+ )
39
+ self.base64_string = base64_string or self.encode_file_to_base64_string(path)
40
+ self.external_locations = external_locations or {}
41
+
42
+ self.extracted_text = (
43
+ self.extract_text() if extracted_text is None else extracted_text
44
+ )
45
+
46
+ super().__init__(
47
+ {
48
+ "path": path,
49
+ "base64_string": self.base64_string,
50
+ "binary": self.binary,
51
+ "suffix": self.suffix,
52
+ "mime_type": self.mime_type,
53
+ "external_locations": self.external_locations,
54
+ "extracted_text": self.extracted_text,
55
+ }
56
+ )
57
+
58
+ @property
59
+ def path(self) -> str:
60
+ """
61
+ Property that returns a valid path to the file content.
62
+ If the original path doesn't exist, generates a temporary file from the base64 content.
63
+ """
64
+ # Check if original path exists and is accessible
65
+ if self._path and os.path.isfile(self._path):
66
+ return self._path
67
+
68
+ # If we already have a valid temporary file, use it
69
+ if self._temp_path and os.path.isfile(self._temp_path):
70
+ return self._temp_path
71
+
72
+ # Generate a new temporary file from base64 content
73
+ self._temp_path = self.to_tempfile(self.suffix)
74
+ return self._temp_path
75
+
76
+ def __str__(self):
77
+ return "FileStore: self.path"
78
+
79
+ @classmethod
80
+ def example(cls, example_type="txt"):
81
+ file_methods_class = FileMethods.get_handler(example_type)
82
+ if file_methods_class:
83
+ return cls(file_methods_class().example())
84
+ else:
85
+ print(f"Example for {example_type} is not supported.")
86
+
87
+ @property
88
+ def size(self) -> int:
89
+ if self.base64_string != None:
90
+ return (len(self.base64_string) / 4.0) * 3 # from base64 to char size
91
+ return os.path.getsize(self.path)
92
+
93
+ def upload_google(self, refresh: bool = False) -> None:
94
+ import google.generativeai as genai
95
+
96
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
97
+ google_info = genai.upload_file(self.path, mime_type=self.mime_type)
98
+ self.external_locations["google"] = google_info.to_dict()
99
+
100
+ @classmethod
101
+ @remove_edsl_version
102
+ def from_dict(cls, d):
103
+ # return cls(d["filename"], d["binary"], d["suffix"], d["base64_string"])
104
+ return cls(**d)
105
+
106
+ def __repr__(self):
107
+ import reprlib
108
+
109
+ r = reprlib.Repr()
110
+ r.maxstring = 20 # Limit strings to 20 chars
111
+ r.maxother = 30 # Limit other types to 30 chars
112
+
113
+ params = ", ".join(f"{key}={r.repr(value)}" for key, value in self.data.items())
114
+ return f"{self.__class__.__name__}({params})"
115
+
116
+ def _repr_html_(self):
117
+ parent_html = super()._repr_html_()
118
+ from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
119
+
120
+ link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
121
+ return f"{parent_html}<br>{link}"
122
+
123
+ def encode_file_to_base64_string(self, file_path: str):
124
+ try:
125
+ # Attempt to open the file in text mode
126
+ with open(file_path, "r") as text_file:
127
+ # Read the text data
128
+ text_data = text_file.read()
129
+ # Encode the text data to a base64 string
130
+ base64_encoded_data = base64.b64encode(text_data.encode("utf-8"))
131
+ except UnicodeDecodeError:
132
+ # If reading as text fails, open the file in binary mode
133
+ with open(file_path, "rb") as binary_file:
134
+ # Read the binary data
135
+ binary_data = binary_file.read()
136
+ # Encode the binary data to a base64 string
137
+ base64_encoded_data = base64.b64encode(binary_data)
138
+ self.binary = True
139
+ # Convert the base64 bytes to a string
140
+ base64_string = base64_encoded_data.decode("utf-8")
141
+
142
+ return base64_string
143
+
144
+ def open(self) -> "IO":
145
+ if self.binary:
146
+ return self.base64_to_file(self.base64_string, is_binary=True)
147
+ else:
148
+ return self.base64_to_text_file(self.base64_string)
149
+
150
+ def write(self, filename: Optional[str] = None) -> str:
151
+ """
152
+ Write the file content to disk, either to a specified filename or a temporary file.
153
+
154
+ Args:
155
+ filename (Optional[str]): The destination filename. If None, creates a temporary file.
156
+
157
+ Returns:
158
+ str: The path to the written file.
159
+ """
160
+ # Determine the mode based on binary flag
161
+ mode = "wb" if self.binary else "w"
162
+
163
+ # If no filename provided, create a temporary file
164
+ if filename is None:
165
+ from tempfile import NamedTemporaryFile
166
+
167
+ with NamedTemporaryFile(delete=False, suffix="." + self.suffix) as f:
168
+ filename = f.name
169
+
170
+ # Write the content using the appropriate mode
171
+ try:
172
+ with open(filename, mode) as f:
173
+ content = self.open().read()
174
+ # For text mode, ensure we're writing a string
175
+ if not self.binary and isinstance(content, bytes):
176
+ content = content.decode("utf-8")
177
+ f.write(content)
178
+ print(f"File written to {filename}")
179
+ except Exception as e:
180
+ print(f"Error writing file: {e}")
181
+ raise
182
+
183
+ # return filename
184
+
185
+ @staticmethod
186
+ def base64_to_text_file(base64_string) -> "IO":
187
+ # Decode the base64 string to bytes
188
+ text_data_bytes = base64.b64decode(base64_string)
189
+
190
+ # Convert bytes to string
191
+ text_data = text_data_bytes.decode("utf-8")
192
+
193
+ # Create a StringIO object from the text data
194
+ text_file = io.StringIO(text_data)
195
+
196
+ return text_file
197
+
198
+ @staticmethod
199
+ def base64_to_file(base64_string, is_binary=True):
200
+ # Decode the base64 string to bytes
201
+ file_data = base64.b64decode(base64_string)
202
+
203
+ if is_binary:
204
+ # Create a BytesIO object for binary data
205
+ return io.BytesIO(file_data)
206
+ else:
207
+ # Convert bytes to string for text data
208
+ text_data = file_data.decode("utf-8")
209
+ # Create a StringIO object for text data
210
+ return io.StringIO(text_data)
211
+
212
+ @property
213
+ def text(self):
214
+ if self.binary:
215
+ import warnings
216
+
217
+ warnings.warn("This is a binary file.")
218
+ else:
219
+ return self.base64_to_text_file(self.base64_string).read()
220
+
221
+ def to_tempfile(self, suffix=None):
222
+ if suffix is None:
223
+ suffix = self.suffix
224
+ if self.binary:
225
+ file_like_object = self.base64_to_file(
226
+ self["base64_string"], is_binary=True
227
+ )
228
+ else:
229
+ file_like_object = self.base64_to_text_file(self.base64_string)
230
+
231
+ # Create a named temporary file
232
+ mode = "wb" if self.binary else "w"
233
+ temp_file = tempfile.NamedTemporaryFile(
234
+ delete=False, suffix="." + suffix, mode=mode
235
+ )
236
+
237
+ if self.binary:
238
+ temp_file.write(file_like_object.read())
239
+ else:
240
+ temp_file.write(file_like_object.read())
241
+
242
+ temp_file.close()
243
+
244
+ return temp_file.name
245
+
246
+ def view(self) -> None:
247
+ handler = FileMethods.get_handler(self.suffix)
248
+ if handler:
249
+ handler(self.path).view()
250
+ else:
251
+ print(f"Viewing of {self.suffix} files is not supported.")
252
+
253
+ def extract_text(self) -> str:
254
+ handler = FileMethods.get_handler(self.suffix)
255
+ if handler and hasattr(handler, "extract_text"):
256
+ return handler(self.path).extract_text()
257
+
258
+ if not self.binary:
259
+ return self.text
260
+
261
+ return None
262
+ # raise TypeError("No text method found for this file type.")
263
+
264
+ def push(
265
+ self, description: Optional[str] = None, visibility: str = "unlisted"
266
+ ) -> dict:
267
+ """
268
+ Push the object to Coop.
269
+ :param description: The description of the object to push.
270
+ :param visibility: The visibility of the object to push.
271
+ """
272
+ scenario_version = Scenario.from_dict(self.to_dict())
273
+ if description is None:
274
+ description = "File: " + self.path
275
+ info = scenario_version.push(description=description, visibility=visibility)
276
+ return info
277
+
278
+ @classmethod
279
+ def pull(cls, uuid: str, expected_parrot_url: Optional[str] = None) -> "FileStore":
280
+ """
281
+ :param uuid: The UUID of the object to pull.
282
+ :param expected_parrot_url: The URL of the Parrot server to use.
283
+ :return: The object pulled from the Parrot server.
284
+ """
285
+ scenario_version = Scenario.pull(uuid, expected_parrot_url=expected_parrot_url)
286
+ return cls.from_dict(scenario_version.to_dict())
287
+
288
+ @classmethod
289
+ def from_url(
290
+ cls,
291
+ url: str,
292
+ download_path: Optional[str] = None,
293
+ mime_type: Optional[str] = None,
294
+ ) -> "FileStore":
295
+ """
296
+ :param url: The URL of the file to download.
297
+ :param download_path: The path to save the downloaded file.
298
+ :param mime_type: The MIME type of the file. If None, it will be guessed from the file extension.
299
+ """
300
+ import requests
301
+ from urllib.parse import urlparse
302
+
303
+ response = requests.get(url, stream=True)
304
+ response.raise_for_status() # Raises an HTTPError for bad responses
305
+
306
+ # Get the filename from the URL if download_path is not provided
307
+ if download_path is None:
308
+ filename = os.path.basename(urlparse(url).path)
309
+ if not filename:
310
+ filename = "downloaded_file"
311
+ # download_path = filename
312
+ download_path = os.path.join(os.getcwd(), filename)
313
+
314
+ # Ensure the directory exists
315
+ os.makedirs(os.path.dirname(download_path), exist_ok=True)
316
+
317
+ # Write the file
318
+ with open(download_path, "wb") as file:
319
+ for chunk in response.iter_content(chunk_size=8192):
320
+ file.write(chunk)
321
+
322
+ # Create and return a new File instance
323
+ return cls(download_path, mime_type=mime_type)
324
+
325
+ def create_link(self, custom_filename=None, style=None):
326
+ from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
327
+
328
+ return ConstructDownloadLink(self).create_link(custom_filename, style)
329
+
330
+ def to_pandas(self):
331
+ """
332
+ Convert the file content to a pandas DataFrame if supported by the file handler.
333
+
334
+ Returns:
335
+ pandas.DataFrame: The data from the file as a DataFrame
336
+
337
+ Raises:
338
+ AttributeError: If the file type's handler doesn't support pandas conversion
339
+ """
340
+ handler = FileMethods.get_handler(self.suffix)
341
+ if handler and hasattr(handler, "to_pandas"):
342
+ return handler(self.path).to_pandas()
343
+ raise AttributeError(
344
+ f"Converting {self.suffix} files to pandas DataFrame is not supported"
345
+ )
346
+
347
+ def __getattr__(self, name):
348
+ """
349
+ Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
350
+ """
351
+ if self.suffix == "csv":
352
+ # Get the pandas DataFrame
353
+ df = self.to_pandas()
354
+ # Check if the requested attribute exists in the DataFrame
355
+ if hasattr(df, name):
356
+ return getattr(df, name)
357
+ # If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
358
+ raise AttributeError(
359
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
360
+ )
361
+
362
+
363
+ class CSVFileStore(FileStore):
364
+ @classmethod
365
+ def example(cls):
366
+ from edsl.results.Results import Results
367
+
368
+ r = Results.example()
369
+ import tempfile
370
+
371
+ with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
372
+ r.to_csv(filename=f.name)
373
+
374
+ return cls(f.name)
375
+
376
+ def view(self):
377
+ import pandas as pd
378
+
379
+ return pd.read_csv(self.to_tempfile())
380
+
381
+
382
+ class PDFFileStore(FileStore):
383
+ def view(self):
384
+ pdf_path = self.to_tempfile()
385
+ print(f"PDF path: {pdf_path}") # Print the path to ensure it exists
386
+ import os
387
+ import subprocess
388
+
389
+ if os.path.exists(pdf_path):
390
+ try:
391
+ if os.name == "posix":
392
+ # for cool kids
393
+ subprocess.run(["open", pdf_path], check=True) # macOS
394
+ elif os.name == "nt":
395
+ os.startfile(pdf_path) # Windows
396
+ else:
397
+ subprocess.run(["xdg-open", pdf_path], check=True) # Linux
398
+ except Exception as e:
399
+ print(f"Error opening PDF: {e}")
400
+ else:
401
+ print("PDF file was not created successfully.")
402
+
403
+ @classmethod
404
+ def example(cls):
405
+ import textwrap
406
+
407
+ pdf_string = textwrap.dedent(
408
+ """\
409
+ %PDF-1.4
410
+ 1 0 obj
411
+ << /Type /Catalog /Pages 2 0 R >>
412
+ endobj
413
+ 2 0 obj
414
+ << /Type /Pages /Kids [3 0 R] /Count 1 >>
415
+ endobj
416
+ 3 0 obj
417
+ << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
418
+ endobj
419
+ 4 0 obj
420
+ << /Length 44 >>
421
+ stream
422
+ BT
423
+ /F1 24 Tf
424
+ 100 700 Td
425
+ (Hello, World!) Tj
426
+ ET
427
+ endstream
428
+ endobj
429
+ 5 0 obj
430
+ << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
431
+ endobj
432
+ 6 0 obj
433
+ << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
434
+ endobj
435
+ xref
436
+ 0 7
437
+ 0000000000 65535 f
438
+ 0000000010 00000 n
439
+ 0000000053 00000 n
440
+ 0000000100 00000 n
441
+ 0000000173 00000 n
442
+ 0000000232 00000 n
443
+ 0000000272 00000 n
444
+ trailer
445
+ << /Size 7 /Root 1 0 R >>
446
+ startxref
447
+ 318
448
+ %%EOF"""
449
+ )
450
+ import tempfile
451
+
452
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
453
+ f.write(pdf_string.encode())
454
+
455
+ return cls(f.name)
456
+
457
+
458
+ class PNGFileStore(FileStore):
459
+ @classmethod
460
+ def example(cls):
461
+ import textwrap
462
+
463
+ png_string = textwrap.dedent(
464
+ """\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\x0cIDAT\x08\xd7c\x00\x01"""
465
+ )
466
+ import tempfile
467
+
468
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
469
+ f.write(png_string.encode())
470
+
471
+ return cls(f.name)
472
+
473
+ def view(self):
474
+ import matplotlib.pyplot as plt
475
+ import matplotlib.image as mpimg
476
+
477
+ img = mpimg.imread(self.to_tempfile())
478
+ plt.imshow(img)
479
+ plt.show()
480
+
481
+
482
+ class SQLiteFileStore(FileStore):
483
+ @classmethod
484
+ def example(cls):
485
+ import sqlite3
486
+ import tempfile
487
+
488
+ with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
489
+ conn = sqlite3.connect(f.name)
490
+ c = conn.cursor()
491
+ c.execute("""CREATE TABLE stocks (date text)""")
492
+ conn.commit()
493
+
494
+ return cls(f.name)
495
+
496
+ def view(self):
497
+ import subprocess
498
+ import os
499
+
500
+ sqlite_path = self.to_tempfile()
501
+ os.system(f"sqlite3 {sqlite_path}")
502
+
503
+
504
+ class HTMLFileStore(FileStore):
505
+ @classmethod
506
+ def example(cls):
507
+ import tempfile
508
+
509
+ with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
510
+ f.write("<html><body><h1>Test</h1></body></html>".encode())
511
+
512
+ return cls(f.name)
513
+
514
+ def view(self):
515
+ import webbrowser
516
+
517
+ html_path = self.to_tempfile()
518
+ webbrowser.open("file://" + html_path)
519
+
520
+
521
+ if __name__ == "__main__":
522
+ import doctest
523
+
524
+ doctest.testmod()
525
+
526
+ # fs = FileStore.example("pdf")
527
+ # fs.view()
528
+
529
+ formats = FileMethods.supported_file_types()
530
+ for file_type in formats:
531
+ print("Now testinging", file_type)
532
+ fs = FileStore.example(file_type)
533
+ fs.view()
534
+ input("Press Enter to continue...")
535
+
536
+ # pdf_example.view()
537
+ # FileStore(pdf_example).view()
538
+
539
+ # pdf_methods = methods.get("pdf")
540
+ # file = pdf_methods().example()
541
+ # pdf_methods(file).view()
542
+
543
+ # print(FileMethods._handlers)