edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. edsl/Base.py +413 -332
  2. edsl/BaseDiff.py +260 -260
  3. edsl/TemplateLoader.py +24 -24
  4. edsl/__init__.py +57 -49
  5. edsl/__version__.py +1 -1
  6. edsl/agents/Agent.py +1071 -867
  7. edsl/agents/AgentList.py +551 -413
  8. edsl/agents/Invigilator.py +284 -233
  9. edsl/agents/InvigilatorBase.py +257 -270
  10. edsl/agents/PromptConstructor.py +272 -354
  11. edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
  12. edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
  13. edsl/agents/__init__.py +2 -3
  14. edsl/agents/descriptors.py +99 -99
  15. edsl/agents/prompt_helpers.py +129 -129
  16. edsl/agents/question_option_processor.py +172 -0
  17. edsl/auto/AutoStudy.py +130 -117
  18. edsl/auto/StageBase.py +243 -230
  19. edsl/auto/StageGenerateSurvey.py +178 -178
  20. edsl/auto/StageLabelQuestions.py +125 -125
  21. edsl/auto/StagePersona.py +61 -61
  22. edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
  23. edsl/auto/StagePersonaDimensionValues.py +74 -74
  24. edsl/auto/StagePersonaDimensions.py +69 -69
  25. edsl/auto/StageQuestions.py +74 -73
  26. edsl/auto/SurveyCreatorPipeline.py +21 -21
  27. edsl/auto/utilities.py +218 -224
  28. edsl/base/Base.py +279 -279
  29. edsl/config.py +177 -157
  30. edsl/conversation/Conversation.py +290 -290
  31. edsl/conversation/car_buying.py +59 -58
  32. edsl/conversation/chips.py +95 -95
  33. edsl/conversation/mug_negotiation.py +81 -81
  34. edsl/conversation/next_speaker_utilities.py +93 -93
  35. edsl/coop/CoopFunctionsMixin.py +15 -0
  36. edsl/coop/ExpectedParrotKeyHandler.py +125 -0
  37. edsl/coop/PriceFetcher.py +54 -54
  38. edsl/coop/__init__.py +2 -2
  39. edsl/coop/coop.py +1106 -1028
  40. edsl/coop/utils.py +131 -131
  41. edsl/data/Cache.py +573 -555
  42. edsl/data/CacheEntry.py +230 -233
  43. edsl/data/CacheHandler.py +168 -149
  44. edsl/data/RemoteCacheSync.py +186 -78
  45. edsl/data/SQLiteDict.py +292 -292
  46. edsl/data/__init__.py +5 -4
  47. edsl/data/hack.py +10 -0
  48. edsl/data/orm.py +10 -10
  49. edsl/data_transfer_models.py +74 -73
  50. edsl/enums.py +202 -175
  51. edsl/exceptions/BaseException.py +21 -21
  52. edsl/exceptions/__init__.py +54 -54
  53. edsl/exceptions/agents.py +54 -42
  54. edsl/exceptions/cache.py +5 -5
  55. edsl/exceptions/configuration.py +16 -16
  56. edsl/exceptions/coop.py +10 -10
  57. edsl/exceptions/data.py +14 -14
  58. edsl/exceptions/general.py +34 -34
  59. edsl/exceptions/inference_services.py +5 -0
  60. edsl/exceptions/jobs.py +33 -33
  61. edsl/exceptions/language_models.py +63 -63
  62. edsl/exceptions/prompts.py +15 -15
  63. edsl/exceptions/questions.py +109 -91
  64. edsl/exceptions/results.py +29 -29
  65. edsl/exceptions/scenarios.py +29 -22
  66. edsl/exceptions/surveys.py +37 -37
  67. edsl/inference_services/AnthropicService.py +106 -87
  68. edsl/inference_services/AvailableModelCacheHandler.py +184 -0
  69. edsl/inference_services/AvailableModelFetcher.py +215 -0
  70. edsl/inference_services/AwsBedrock.py +118 -120
  71. edsl/inference_services/AzureAI.py +215 -217
  72. edsl/inference_services/DeepInfraService.py +18 -18
  73. edsl/inference_services/GoogleService.py +143 -148
  74. edsl/inference_services/GroqService.py +20 -20
  75. edsl/inference_services/InferenceServiceABC.py +80 -147
  76. edsl/inference_services/InferenceServicesCollection.py +138 -97
  77. edsl/inference_services/MistralAIService.py +120 -123
  78. edsl/inference_services/OllamaService.py +18 -18
  79. edsl/inference_services/OpenAIService.py +236 -224
  80. edsl/inference_services/PerplexityService.py +160 -163
  81. edsl/inference_services/ServiceAvailability.py +135 -0
  82. edsl/inference_services/TestService.py +90 -89
  83. edsl/inference_services/TogetherAIService.py +172 -170
  84. edsl/inference_services/data_structures.py +134 -0
  85. edsl/inference_services/models_available_cache.py +118 -118
  86. edsl/inference_services/rate_limits_cache.py +25 -25
  87. edsl/inference_services/registry.py +41 -41
  88. edsl/inference_services/write_available.py +10 -10
  89. edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
  90. edsl/jobs/Answers.py +43 -56
  91. edsl/jobs/FetchInvigilator.py +47 -0
  92. edsl/jobs/InterviewTaskManager.py +98 -0
  93. edsl/jobs/InterviewsConstructor.py +50 -0
  94. edsl/jobs/Jobs.py +823 -898
  95. edsl/jobs/JobsChecks.py +172 -147
  96. edsl/jobs/JobsComponentConstructor.py +189 -0
  97. edsl/jobs/JobsPrompts.py +270 -268
  98. edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
  99. edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
  100. edsl/jobs/RequestTokenEstimator.py +30 -0
  101. edsl/jobs/__init__.py +1 -1
  102. edsl/jobs/async_interview_runner.py +138 -0
  103. edsl/jobs/buckets/BucketCollection.py +104 -63
  104. edsl/jobs/buckets/ModelBuckets.py +65 -65
  105. edsl/jobs/buckets/TokenBucket.py +283 -251
  106. edsl/jobs/buckets/TokenBucketAPI.py +211 -0
  107. edsl/jobs/buckets/TokenBucketClient.py +191 -0
  108. edsl/jobs/check_survey_scenario_compatibility.py +85 -0
  109. edsl/jobs/data_structures.py +120 -0
  110. edsl/jobs/decorators.py +35 -0
  111. edsl/jobs/interviews/Interview.py +396 -661
  112. edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
  113. edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
  114. edsl/jobs/interviews/InterviewStatistic.py +63 -63
  115. edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
  116. edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
  117. edsl/jobs/interviews/InterviewStatusLog.py +92 -92
  118. edsl/jobs/interviews/ReportErrors.py +66 -66
  119. edsl/jobs/interviews/interview_status_enum.py +9 -9
  120. edsl/jobs/jobs_status_enums.py +9 -0
  121. edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
  122. edsl/jobs/results_exceptions_handler.py +98 -0
  123. edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
  124. edsl/jobs/runners/JobsRunnerStatus.py +297 -330
  125. edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
  126. edsl/jobs/tasks/TaskCreators.py +64 -64
  127. edsl/jobs/tasks/TaskHistory.py +470 -450
  128. edsl/jobs/tasks/TaskStatusLog.py +23 -23
  129. edsl/jobs/tasks/task_status_enum.py +161 -163
  130. edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
  131. edsl/jobs/tokens/TokenUsage.py +34 -34
  132. edsl/language_models/ComputeCost.py +63 -0
  133. edsl/language_models/LanguageModel.py +626 -668
  134. edsl/language_models/ModelList.py +164 -155
  135. edsl/language_models/PriceManager.py +127 -0
  136. edsl/language_models/RawResponseHandler.py +106 -0
  137. edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
  138. edsl/language_models/ServiceDataSources.py +0 -0
  139. edsl/language_models/__init__.py +2 -3
  140. edsl/language_models/fake_openai_call.py +15 -15
  141. edsl/language_models/fake_openai_service.py +61 -61
  142. edsl/language_models/key_management/KeyLookup.py +63 -0
  143. edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
  144. edsl/language_models/key_management/KeyLookupCollection.py +38 -0
  145. edsl/language_models/key_management/__init__.py +0 -0
  146. edsl/language_models/key_management/models.py +131 -0
  147. edsl/language_models/model.py +256 -0
  148. edsl/language_models/repair.py +156 -156
  149. edsl/language_models/utilities.py +65 -64
  150. edsl/notebooks/Notebook.py +263 -258
  151. edsl/notebooks/NotebookToLaTeX.py +142 -0
  152. edsl/notebooks/__init__.py +1 -1
  153. edsl/prompts/Prompt.py +352 -362
  154. edsl/prompts/__init__.py +2 -2
  155. edsl/questions/ExceptionExplainer.py +77 -0
  156. edsl/questions/HTMLQuestion.py +103 -0
  157. edsl/questions/QuestionBase.py +518 -664
  158. edsl/questions/QuestionBasePromptsMixin.py +221 -217
  159. edsl/questions/QuestionBudget.py +227 -227
  160. edsl/questions/QuestionCheckBox.py +359 -359
  161. edsl/questions/QuestionExtract.py +180 -182
  162. edsl/questions/QuestionFreeText.py +113 -114
  163. edsl/questions/QuestionFunctional.py +166 -166
  164. edsl/questions/QuestionList.py +223 -231
  165. edsl/questions/QuestionMatrix.py +265 -0
  166. edsl/questions/QuestionMultipleChoice.py +330 -286
  167. edsl/questions/QuestionNumerical.py +151 -153
  168. edsl/questions/QuestionRank.py +314 -324
  169. edsl/questions/Quick.py +41 -41
  170. edsl/questions/SimpleAskMixin.py +74 -73
  171. edsl/questions/__init__.py +27 -26
  172. edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
  173. edsl/questions/compose_questions.py +98 -98
  174. edsl/questions/data_structures.py +20 -0
  175. edsl/questions/decorators.py +21 -21
  176. edsl/questions/derived/QuestionLikertFive.py +76 -76
  177. edsl/questions/derived/QuestionLinearScale.py +90 -87
  178. edsl/questions/derived/QuestionTopK.py +93 -93
  179. edsl/questions/derived/QuestionYesNo.py +82 -82
  180. edsl/questions/descriptors.py +427 -413
  181. edsl/questions/loop_processor.py +149 -0
  182. edsl/questions/prompt_templates/question_budget.jinja +13 -13
  183. edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
  184. edsl/questions/prompt_templates/question_extract.jinja +11 -11
  185. edsl/questions/prompt_templates/question_free_text.jinja +3 -3
  186. edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
  187. edsl/questions/prompt_templates/question_list.jinja +17 -17
  188. edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
  189. edsl/questions/prompt_templates/question_numerical.jinja +36 -36
  190. edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
  191. edsl/questions/question_registry.py +177 -177
  192. edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
  193. edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
  194. edsl/questions/response_validator_factory.py +34 -0
  195. edsl/questions/settings.py +12 -12
  196. edsl/questions/templates/budget/answering_instructions.jinja +7 -7
  197. edsl/questions/templates/budget/question_presentation.jinja +7 -7
  198. edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
  199. edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
  200. edsl/questions/templates/extract/answering_instructions.jinja +7 -7
  201. edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
  202. edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
  203. edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
  204. edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
  205. edsl/questions/templates/list/answering_instructions.jinja +3 -3
  206. edsl/questions/templates/list/question_presentation.jinja +5 -5
  207. edsl/questions/templates/matrix/__init__.py +1 -0
  208. edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
  209. edsl/questions/templates/matrix/question_presentation.jinja +20 -0
  210. edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
  211. edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
  212. edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
  213. edsl/questions/templates/numerical/question_presentation.jinja +6 -6
  214. edsl/questions/templates/rank/answering_instructions.jinja +11 -11
  215. edsl/questions/templates/rank/question_presentation.jinja +15 -15
  216. edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
  217. edsl/questions/templates/top_k/question_presentation.jinja +22 -22
  218. edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
  219. edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
  220. edsl/results/CSSParameterizer.py +108 -108
  221. edsl/results/Dataset.py +587 -424
  222. edsl/results/DatasetExportMixin.py +594 -731
  223. edsl/results/DatasetTree.py +295 -275
  224. edsl/results/MarkdownToDocx.py +122 -0
  225. edsl/results/MarkdownToPDF.py +111 -0
  226. edsl/results/Result.py +557 -465
  227. edsl/results/Results.py +1183 -1165
  228. edsl/results/ResultsExportMixin.py +45 -43
  229. edsl/results/ResultsGGMixin.py +121 -121
  230. edsl/results/TableDisplay.py +125 -198
  231. edsl/results/TextEditor.py +50 -0
  232. edsl/results/__init__.py +2 -2
  233. edsl/results/file_exports.py +252 -0
  234. edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
  235. edsl/results/{Selector.py → results_selector.py} +145 -135
  236. edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
  237. edsl/results/smart_objects.py +96 -0
  238. edsl/results/table_data_class.py +12 -0
  239. edsl/results/table_display.css +77 -77
  240. edsl/results/table_renderers.py +118 -0
  241. edsl/results/tree_explore.py +115 -115
  242. edsl/scenarios/ConstructDownloadLink.py +109 -0
  243. edsl/scenarios/DocumentChunker.py +102 -0
  244. edsl/scenarios/DocxScenario.py +16 -0
  245. edsl/scenarios/FileStore.py +511 -632
  246. edsl/scenarios/PdfExtractor.py +40 -0
  247. edsl/scenarios/Scenario.py +498 -601
  248. edsl/scenarios/ScenarioHtmlMixin.py +65 -64
  249. edsl/scenarios/ScenarioList.py +1458 -1287
  250. edsl/scenarios/ScenarioListExportMixin.py +45 -52
  251. edsl/scenarios/ScenarioListPdfMixin.py +239 -261
  252. edsl/scenarios/__init__.py +3 -4
  253. edsl/scenarios/directory_scanner.py +96 -0
  254. edsl/scenarios/file_methods.py +85 -0
  255. edsl/scenarios/handlers/__init__.py +13 -0
  256. edsl/scenarios/handlers/csv.py +38 -0
  257. edsl/scenarios/handlers/docx.py +76 -0
  258. edsl/scenarios/handlers/html.py +37 -0
  259. edsl/scenarios/handlers/json.py +111 -0
  260. edsl/scenarios/handlers/latex.py +5 -0
  261. edsl/scenarios/handlers/md.py +51 -0
  262. edsl/scenarios/handlers/pdf.py +68 -0
  263. edsl/scenarios/handlers/png.py +39 -0
  264. edsl/scenarios/handlers/pptx.py +105 -0
  265. edsl/scenarios/handlers/py.py +294 -0
  266. edsl/scenarios/handlers/sql.py +313 -0
  267. edsl/scenarios/handlers/sqlite.py +149 -0
  268. edsl/scenarios/handlers/txt.py +33 -0
  269. edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
  270. edsl/scenarios/scenario_selector.py +156 -0
  271. edsl/shared.py +1 -1
  272. edsl/study/ObjectEntry.py +173 -173
  273. edsl/study/ProofOfWork.py +113 -113
  274. edsl/study/SnapShot.py +80 -80
  275. edsl/study/Study.py +521 -528
  276. edsl/study/__init__.py +4 -4
  277. edsl/surveys/ConstructDAG.py +92 -0
  278. edsl/surveys/DAG.py +148 -148
  279. edsl/surveys/EditSurvey.py +221 -0
  280. edsl/surveys/InstructionHandler.py +100 -0
  281. edsl/surveys/Memory.py +31 -31
  282. edsl/surveys/MemoryManagement.py +72 -0
  283. edsl/surveys/MemoryPlan.py +244 -244
  284. edsl/surveys/Rule.py +327 -326
  285. edsl/surveys/RuleCollection.py +385 -387
  286. edsl/surveys/RuleManager.py +172 -0
  287. edsl/surveys/Simulator.py +75 -0
  288. edsl/surveys/Survey.py +1280 -1801
  289. edsl/surveys/SurveyCSS.py +273 -261
  290. edsl/surveys/SurveyExportMixin.py +259 -259
  291. edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
  292. edsl/surveys/SurveyQualtricsImport.py +284 -284
  293. edsl/surveys/SurveyToApp.py +141 -0
  294. edsl/surveys/__init__.py +5 -3
  295. edsl/surveys/base.py +53 -53
  296. edsl/surveys/descriptors.py +60 -56
  297. edsl/surveys/instructions/ChangeInstruction.py +48 -49
  298. edsl/surveys/instructions/Instruction.py +56 -65
  299. edsl/surveys/instructions/InstructionCollection.py +82 -77
  300. edsl/templates/error_reporting/base.html +23 -23
  301. edsl/templates/error_reporting/exceptions_by_model.html +34 -34
  302. edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
  303. edsl/templates/error_reporting/exceptions_by_type.html +16 -16
  304. edsl/templates/error_reporting/interview_details.html +115 -115
  305. edsl/templates/error_reporting/interviews.html +19 -19
  306. edsl/templates/error_reporting/overview.html +4 -4
  307. edsl/templates/error_reporting/performance_plot.html +1 -1
  308. edsl/templates/error_reporting/report.css +73 -73
  309. edsl/templates/error_reporting/report.html +117 -117
  310. edsl/templates/error_reporting/report.js +25 -25
  311. edsl/test_h +1 -0
  312. edsl/tools/__init__.py +1 -1
  313. edsl/tools/clusters.py +192 -192
  314. edsl/tools/embeddings.py +27 -27
  315. edsl/tools/embeddings_plotting.py +118 -118
  316. edsl/tools/plotting.py +112 -112
  317. edsl/tools/summarize.py +18 -18
  318. edsl/utilities/PrettyList.py +56 -0
  319. edsl/utilities/SystemInfo.py +28 -28
  320. edsl/utilities/__init__.py +22 -22
  321. edsl/utilities/ast_utilities.py +25 -25
  322. edsl/utilities/data/Registry.py +6 -6
  323. edsl/utilities/data/__init__.py +1 -1
  324. edsl/utilities/data/scooter_results.json +1 -1
  325. edsl/utilities/decorators.py +77 -77
  326. edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
  327. edsl/utilities/gcp_bucket/example.py +50 -0
  328. edsl/utilities/interface.py +627 -627
  329. edsl/utilities/is_notebook.py +18 -0
  330. edsl/utilities/is_valid_variable_name.py +11 -0
  331. edsl/utilities/naming_utilities.py +263 -263
  332. edsl/utilities/remove_edsl_version.py +24 -0
  333. edsl/utilities/repair_functions.py +28 -28
  334. edsl/utilities/restricted_python.py +70 -70
  335. edsl/utilities/utilities.py +436 -424
  336. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/LICENSE +21 -21
  337. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/METADATA +13 -11
  338. edsl-0.1.39.dev4.dist-info/RECORD +361 -0
  339. edsl/language_models/KeyLookup.py +0 -30
  340. edsl/language_models/registry.py +0 -190
  341. edsl/language_models/unused/ReplicateBase.py +0 -83
  342. edsl/results/ResultsDBMixin.py +0 -238
  343. edsl-0.1.39.dev3.dist-info/RECORD +0 -277
  344. {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/WHEEL +0 -0
@@ -1,632 +1,511 @@
1
- import base64
2
- import io
3
- import tempfile
4
- import mimetypes
5
- import os
6
- from typing import Dict, Any, IO, Optional
7
- import requests
8
- from urllib.parse import urlparse
9
-
10
- import google.generativeai as genai
11
-
12
- from edsl import Scenario
13
- from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
14
- from edsl.utilities.utilities import is_notebook
15
-
16
-
17
- def view_csv(csv_path):
18
- import pandas as pd
19
-
20
- df = pd.read_csv(csv_path)
21
- return df
22
-
23
-
24
- def view_html(html_path):
25
- import os
26
- import subprocess
27
- from IPython.display import IFrame, display, HTML
28
-
29
- if os.path.exists(html_path):
30
- if is_notebook():
31
- # Display the HTML inline in Jupyter Notebook
32
- display(IFrame(src=html_path, width=700, height=600))
33
- display(
34
- HTML(
35
- f'<a href="{html_path}" target="_blank">Open HTML in a new tab</a>'
36
- )
37
- )
38
- else:
39
- try:
40
- if (os_name := os.name) == "posix":
41
- # Open with the default browser on macOS
42
- subprocess.run(["open", html_path], check=True)
43
- elif os_name == "nt":
44
- # Open with the default browser on Windows
45
- os.startfile(html_path)
46
- else:
47
- # Open with the default browser on Linux
48
- subprocess.run(["xdg-open", html_path], check=True)
49
- except Exception as e:
50
- print(f"Error opening HTML file: {e}")
51
- else:
52
- print("HTML file was not found.")
53
-
54
-
55
- def view_html(html_path):
56
- import os
57
- from IPython.display import display, HTML
58
-
59
- if is_notebook():
60
- with open(html_path, "r") as f:
61
- html_content = f.read()
62
- display(HTML(html_content))
63
- else:
64
- if os.path.exists(html_path):
65
- try:
66
- if (os_name := os.name) == "posix":
67
- subprocess.run(["open", html_path], check=True)
68
- elif os_name == "nt":
69
- os.startfile(html_path)
70
- else:
71
- subprocess.run(["xdg-open", html_path], check=True)
72
- except Exception as e:
73
- print(f"Error opening file: {e}")
74
- else:
75
- print("File was not created successfully.")
76
-
77
-
78
- def view_pdf(pdf_path):
79
- import os
80
- import subprocess
81
- import os
82
- from IPython.display import HTML, display
83
-
84
- if is_notebook():
85
- # Convert to absolute path if needed
86
- with open(pdf_path, "rb") as f:
87
- base64_pdf = base64.b64encode(f.read()).decode("utf-8")
88
-
89
- html = f"""
90
- <iframe
91
- src="data:application/pdf;base64,{base64_pdf}"
92
- width="800px"
93
- height="800px"
94
- type="application/pdf"
95
- ></iframe>
96
- """
97
- display(HTML(html))
98
-
99
- if os.path.exists(pdf_path):
100
- try:
101
- if (os_name := os.name) == "posix":
102
- # for cool kids
103
- subprocess.run(["open", pdf_path], check=True) # macOS
104
- elif os_name == "nt":
105
- os.startfile(pdf_path) # Windows
106
- else:
107
- subprocess.run(["xdg-open", pdf_path], check=True) # Linux
108
- except Exception as e:
109
- print(f"Error opening PDF: {e}")
110
- else:
111
- print("PDF file was not created successfully.")
112
-
113
-
114
- class FileStore(Scenario):
115
- __documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
116
-
117
- def __init__(
118
- self,
119
- path: Optional[str] = None,
120
- mime_type: Optional[str] = None,
121
- binary: Optional[bool] = None,
122
- suffix: Optional[str] = None,
123
- base64_string: Optional[str] = None,
124
- external_locations: Optional[Dict[str, str]] = None,
125
- **kwargs,
126
- ):
127
- if path is None and "filename" in kwargs:
128
- path = kwargs["filename"]
129
-
130
- self._path = path # Store the original path privately
131
- self._temp_path = None # Track any generated temporary file
132
-
133
- self.suffix = suffix or path.split(".")[-1]
134
- self.binary = binary or False
135
- self.mime_type = (
136
- mime_type or mimetypes.guess_type(path)[0] or "application/octet-stream"
137
- )
138
- self.base64_string = base64_string or self.encode_file_to_base64_string(path)
139
- self.external_locations = external_locations or {}
140
- super().__init__(
141
- {
142
- "path": path,
143
- "base64_string": self.base64_string,
144
- "binary": self.binary,
145
- "suffix": self.suffix,
146
- "mime_type": self.mime_type,
147
- "external_locations": self.external_locations,
148
- }
149
- )
150
-
151
- @property
152
- def path(self) -> str:
153
- """
154
- Property that returns a valid path to the file content.
155
- If the original path doesn't exist, generates a temporary file from the base64 content.
156
- """
157
- # Check if original path exists and is accessible
158
- if self._path and os.path.isfile(self._path):
159
- return self._path
160
-
161
- # If we already have a valid temporary file, use it
162
- if self._temp_path and os.path.isfile(self._temp_path):
163
- return self._temp_path
164
-
165
- # Generate a new temporary file from base64 content
166
- self._temp_path = self.to_tempfile(self.suffix)
167
- return self._temp_path
168
-
169
- def __str__(self):
170
- return "FileStore: self.path"
171
-
172
- @classmethod
173
- def example(cls, example_type="text"):
174
- import textwrap
175
- import tempfile
176
-
177
- if example_type == "png" or example_type == "image":
178
- import importlib.resources
179
- from pathlib import Path
180
-
181
- # Get package root directory
182
- package_root = Path(__file__).parent.parent.parent
183
- logo_path = package_root / "static" / "logo.png"
184
- return cls(str(logo_path))
185
-
186
- if example_type == "text":
187
- with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
188
- f.write(b"Hello, World!")
189
-
190
- return cls(path=f.name)
191
-
192
- elif example_type == "csv":
193
- from edsl.results.Results import Results
194
-
195
- r = Results.example()
196
-
197
- with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
198
- r.to_csv(filename=f.name)
199
- return cls(f.name)
200
-
201
- elif example_type == "pdf":
202
- pdf_string = textwrap.dedent(
203
- """\
204
- %PDF-1.4
205
- 1 0 obj
206
- << /Type /Catalog /Pages 2 0 R >>
207
- endobj
208
- 2 0 obj
209
- << /Type /Pages /Kids [3 0 R] /Count 1 >>
210
- endobj
211
- 3 0 obj
212
- << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
213
- endobj
214
- 4 0 obj
215
- << /Length 44 >>
216
- stream
217
- BT
218
- /F1 24 Tf
219
- 100 700 Td
220
- (Hello, World!) Tj
221
- ET
222
- endstream
223
- endobj
224
- 5 0 obj
225
- << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
226
- endobj
227
- 6 0 obj
228
- << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
229
- endobj
230
- xref
231
- 0 7
232
- 0000000000 65535 f
233
- 0000000010 00000 n
234
- 0000000053 00000 n
235
- 0000000100 00000 n
236
- 0000000173 00000 n
237
- 0000000232 00000 n
238
- 0000000272 00000 n
239
- trailer
240
- << /Size 7 /Root 1 0 R >>
241
- startxref
242
- 318
243
- %%EOF"""
244
- )
245
- with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
246
- f.write(pdf_string.encode())
247
-
248
- return cls(f.name)
249
-
250
- elif example_type == "html":
251
- with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
252
- f.write("<html><body><h1>Test</h1></body></html>".encode())
253
-
254
- return cls(f.name)
255
-
256
- @property
257
- def size(self) -> int:
258
- if self.base64_string != None:
259
- return (len(self.base64_string) / 4.0) * 3 # from base64 to char size
260
- return os.path.getsize(self.path)
261
-
262
- def upload_google(self, refresh: bool = False) -> None:
263
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
264
- google_info = genai.upload_file(self.path, mime_type=self.mime_type)
265
- self.external_locations["google"] = google_info.to_dict()
266
-
267
- @classmethod
268
- @remove_edsl_version
269
- def from_dict(cls, d):
270
- # return cls(d["filename"], d["binary"], d["suffix"], d["base64_string"])
271
- return cls(**d)
272
-
273
- def __repr__(self):
274
- return f"FileStore(path='{self.path}')"
275
-
276
- def encode_file_to_base64_string(self, file_path: str):
277
- try:
278
- # Attempt to open the file in text mode
279
- with open(file_path, "r") as text_file:
280
- # Read the text data
281
- text_data = text_file.read()
282
- # Encode the text data to a base64 string
283
- base64_encoded_data = base64.b64encode(text_data.encode("utf-8"))
284
- except UnicodeDecodeError:
285
- # If reading as text fails, open the file in binary mode
286
- with open(file_path, "rb") as binary_file:
287
- # Read the binary data
288
- binary_data = binary_file.read()
289
- # Encode the binary data to a base64 string
290
- base64_encoded_data = base64.b64encode(binary_data)
291
- self.binary = True
292
- # Convert the base64 bytes to a string
293
- base64_string = base64_encoded_data.decode("utf-8")
294
-
295
- return base64_string
296
-
297
- def open(self) -> "IO":
298
- if self.binary:
299
- return self.base64_to_file(self["base64_string"], is_binary=True)
300
- else:
301
- return self.base64_to_text_file(self["base64_string"])
302
-
303
- @staticmethod
304
- def base64_to_text_file(base64_string) -> "IO":
305
- # Decode the base64 string to bytes
306
- text_data_bytes = base64.b64decode(base64_string)
307
-
308
- # Convert bytes to string
309
- text_data = text_data_bytes.decode("utf-8")
310
-
311
- # Create a StringIO object from the text data
312
- text_file = io.StringIO(text_data)
313
-
314
- return text_file
315
-
316
- @staticmethod
317
- def base64_to_file(base64_string, is_binary=True):
318
- # Decode the base64 string to bytes
319
- file_data = base64.b64decode(base64_string)
320
-
321
- if is_binary:
322
- # Create a BytesIO object for binary data
323
- return io.BytesIO(file_data)
324
- else:
325
- # Convert bytes to string for text data
326
- text_data = file_data.decode("utf-8")
327
- # Create a StringIO object for text data
328
- return io.StringIO(text_data)
329
-
330
- def to_tempfile(self, suffix=None):
331
- if suffix is None:
332
- suffix = self.suffix
333
- if self.binary:
334
- file_like_object = self.base64_to_file(
335
- self["base64_string"], is_binary=True
336
- )
337
- else:
338
- file_like_object = self.base64_to_text_file(self["base64_string"])
339
-
340
- # Create a named temporary file
341
- mode = "wb" if self.binary else "w"
342
- temp_file = tempfile.NamedTemporaryFile(
343
- delete=False, suffix="." + suffix, mode=mode
344
- )
345
-
346
- if self.binary:
347
- temp_file.write(file_like_object.read())
348
- else:
349
- temp_file.write(file_like_object.read())
350
-
351
- temp_file.close()
352
-
353
- return temp_file.name
354
-
355
- def view(self, max_size: int = 300) -> None:
356
- # with self.open() as f:
357
- if self.suffix == "csv":
358
- return view_csv(self.path)
359
-
360
- if self.suffix == "pdf":
361
- view_pdf(self.path)
362
-
363
- if self.suffix == "html":
364
- view_html(self.path)
365
-
366
- if self.suffix == "png" or self.suffix == "jpg" or self.suffix == "jpeg":
367
- if is_notebook():
368
- from IPython.display import Image
369
- from PIL import Image as PILImage
370
-
371
- if max_size:
372
- # Open the image using Pillow
373
- with PILImage.open(self.path) as img:
374
- # Get original width and height
375
- original_width, original_height = img.size
376
-
377
- # Calculate the scaling factor
378
- scale = min(
379
- max_size / original_width, max_size / original_height
380
- )
381
-
382
- # Calculate new dimensions
383
- new_width = int(original_width * scale)
384
- new_height = int(original_height * scale)
385
-
386
- return Image(self.path, width=new_width, height=new_height)
387
- else:
388
- return Image(self.path)
389
-
390
- def push(
391
- self, description: Optional[str] = None, visibility: str = "unlisted"
392
- ) -> dict:
393
- """
394
- Push the object to Coop.
395
- :param description: The description of the object to push.
396
- :param visibility: The visibility of the object to push.
397
- """
398
- scenario_version = Scenario.from_dict(self.to_dict())
399
- if description is None:
400
- description = "File: " + self.path
401
- info = scenario_version.push(description=description, visibility=visibility)
402
- return info
403
-
404
- @classmethod
405
- def pull(cls, uuid: str, expected_parrot_url: Optional[str] = None) -> "FileStore":
406
- """
407
- :param uuid: The UUID of the object to pull.
408
- :param expected_parrot_url: The URL of the Parrot server to use.
409
- :return: The object pulled from the Parrot server.
410
- """
411
- scenario_version = Scenario.pull(uuid, expected_parrot_url=expected_parrot_url)
412
- return cls.from_dict(scenario_version.to_dict())
413
-
414
- @classmethod
415
- def from_url(
416
- cls,
417
- url: str,
418
- download_path: Optional[str] = None,
419
- mime_type: Optional[str] = None,
420
- ) -> "FileStore":
421
- """
422
- :param url: The URL of the file to download.
423
- :param download_path: The path to save the downloaded file.
424
- :param mime_type: The MIME type of the file. If None, it will be guessed from the file extension.
425
- """
426
-
427
- response = requests.get(url, stream=True)
428
- response.raise_for_status() # Raises an HTTPError for bad responses
429
-
430
- # Get the filename from the URL if download_path is not provided
431
- if download_path is None:
432
- filename = os.path.basename(urlparse(url).path)
433
- if not filename:
434
- filename = "downloaded_file"
435
- # download_path = filename
436
- download_path = os.path.join(os.getcwd(), filename)
437
-
438
- # Ensure the directory exists
439
- os.makedirs(os.path.dirname(download_path), exist_ok=True)
440
-
441
- # Write the file
442
- with open(download_path, "wb") as file:
443
- for chunk in response.iter_content(chunk_size=8192):
444
- file.write(chunk)
445
-
446
- # Create and return a new File instance
447
- return cls(download_path, mime_type=mime_type)
448
-
449
-
450
- class CSVFileStore(FileStore):
451
- @classmethod
452
- def example(cls):
453
- from edsl.results.Results import Results
454
-
455
- r = Results.example()
456
- import tempfile
457
-
458
- with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
459
- r.to_csv(filename=f.name)
460
-
461
- return cls(f.name)
462
-
463
- def view(self):
464
- import pandas as pd
465
-
466
- return pd.read_csv(self.to_tempfile())
467
-
468
-
469
- class PDFFileStore(FileStore):
470
- def view(self):
471
- pdf_path = self.to_tempfile()
472
- print(f"PDF path: {pdf_path}") # Print the path to ensure it exists
473
- import os
474
- import subprocess
475
-
476
- if os.path.exists(pdf_path):
477
- try:
478
- if os.name == "posix":
479
- # for cool kids
480
- subprocess.run(["open", pdf_path], check=True) # macOS
481
- elif os.name == "nt":
482
- os.startfile(pdf_path) # Windows
483
- else:
484
- subprocess.run(["xdg-open", pdf_path], check=True) # Linux
485
- except Exception as e:
486
- print(f"Error opening PDF: {e}")
487
- else:
488
- print("PDF file was not created successfully.")
489
-
490
- @classmethod
491
- def example(cls):
492
- import textwrap
493
-
494
- pdf_string = textwrap.dedent(
495
- """\
496
- %PDF-1.4
497
- 1 0 obj
498
- << /Type /Catalog /Pages 2 0 R >>
499
- endobj
500
- 2 0 obj
501
- << /Type /Pages /Kids [3 0 R] /Count 1 >>
502
- endobj
503
- 3 0 obj
504
- << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
505
- endobj
506
- 4 0 obj
507
- << /Length 44 >>
508
- stream
509
- BT
510
- /F1 24 Tf
511
- 100 700 Td
512
- (Hello, World!) Tj
513
- ET
514
- endstream
515
- endobj
516
- 5 0 obj
517
- << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
518
- endobj
519
- 6 0 obj
520
- << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
521
- endobj
522
- xref
523
- 0 7
524
- 0000000000 65535 f
525
- 0000000010 00000 n
526
- 0000000053 00000 n
527
- 0000000100 00000 n
528
- 0000000173 00000 n
529
- 0000000232 00000 n
530
- 0000000272 00000 n
531
- trailer
532
- << /Size 7 /Root 1 0 R >>
533
- startxref
534
- 318
535
- %%EOF"""
536
- )
537
- import tempfile
538
-
539
- with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
540
- f.write(pdf_string.encode())
541
-
542
- return cls(f.name)
543
-
544
-
545
- class PNGFileStore(FileStore):
546
- @classmethod
547
- def example(cls):
548
- import textwrap
549
-
550
- png_string = textwrap.dedent(
551
- """\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\x0cIDAT\x08\xd7c\x00\x01"""
552
- )
553
- import tempfile
554
-
555
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
556
- f.write(png_string.encode())
557
-
558
- return cls(f.name)
559
-
560
- def view(self):
561
- import matplotlib.pyplot as plt
562
- import matplotlib.image as mpimg
563
-
564
- img = mpimg.imread(self.to_tempfile())
565
- plt.imshow(img)
566
- plt.show()
567
-
568
-
569
- class SQLiteFileStore(FileStore):
570
- @classmethod
571
- def example(cls):
572
- import sqlite3
573
- import tempfile
574
-
575
- with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
576
- conn = sqlite3.connect(f.name)
577
- c = conn.cursor()
578
- c.execute("""CREATE TABLE stocks (date text)""")
579
- conn.commit()
580
-
581
- return cls(f.name)
582
-
583
- def view(self):
584
- import subprocess
585
- import os
586
-
587
- sqlite_path = self.to_tempfile()
588
- os.system(f"sqlite3 {sqlite_path}")
589
-
590
-
591
- class HTMLFileStore(FileStore):
592
- @classmethod
593
- def example(cls):
594
- import tempfile
595
-
596
- with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
597
- f.write("<html><body><h1>Test</h1></body></html>".encode())
598
-
599
- return cls(f.name)
600
-
601
- def view(self):
602
- import webbrowser
603
-
604
- html_path = self.to_tempfile()
605
- webbrowser.open("file://" + html_path)
606
-
607
-
608
- if __name__ == "__main__":
609
- # file_path = "../conjure/examples/Ex11-2.sav"
610
- # fs = FileStore(file_path)
611
- # info = fs.push()
612
- # print(info)
613
-
614
- # fs = CSVFileStore.example()
615
- # fs.to_tempfile()
616
- # print(fs.view())
617
-
618
- # fs = PDFFileStore.example()
619
- # fs.view()
620
-
621
- # fs = PDFFileStore("paper.pdf")
622
- # fs.view()
623
- # from edsl import Conjure
624
- pass
625
- # fs = PNGFileStore("logo.png")
626
- # fs.view()
627
- # fs.upload_google()
628
-
629
- # c = Conjure(datafile_name=fs.to_tempfile())
630
- # f = PDFFileStore("paper.pdf")
631
- # print(f.to_tempfile())
632
- # f.push()
1
+ import base64
2
+ import io
3
+ import tempfile
4
+ import mimetypes
5
+ import os
6
+ from typing import Dict, Any, IO, Optional
7
+
8
+ from edsl.scenarios.Scenario import Scenario
9
+ from edsl.utilities.remove_edsl_version import remove_edsl_version
10
+
11
+ from edsl.scenarios.file_methods import FileMethods
12
+
13
+
14
+ class FileStore(Scenario):
15
+ __documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
16
+
17
+ def __init__(
18
+ self,
19
+ path: Optional[str] = None,
20
+ mime_type: Optional[str] = None,
21
+ binary: Optional[bool] = None,
22
+ suffix: Optional[str] = None,
23
+ base64_string: Optional[str] = None,
24
+ external_locations: Optional[Dict[str, str]] = None,
25
+ extracted_text: Optional[str] = None,
26
+ **kwargs,
27
+ ):
28
+ if path is None and "filename" in kwargs:
29
+ path = kwargs["filename"]
30
+
31
+ self._path = path # Store the original path privately
32
+ self._temp_path = None # Track any generated temporary file
33
+
34
+ self.suffix = suffix or path.split(".")[-1]
35
+ self.binary = binary or False
36
+ self.mime_type = (
37
+ mime_type or mimetypes.guess_type(path)[0] or "application/octet-stream"
38
+ )
39
+ self.base64_string = base64_string or self.encode_file_to_base64_string(path)
40
+ self.external_locations = external_locations or {}
41
+
42
+ self.extracted_text = (
43
+ self.extract_text() if extracted_text is None else extracted_text
44
+ )
45
+
46
+ super().__init__(
47
+ {
48
+ "path": path,
49
+ "base64_string": self.base64_string,
50
+ "binary": self.binary,
51
+ "suffix": self.suffix,
52
+ "mime_type": self.mime_type,
53
+ "external_locations": self.external_locations,
54
+ "extracted_text": self.extracted_text,
55
+ }
56
+ )
57
+
58
+ @property
59
+ def path(self) -> str:
60
+ """
61
+ Property that returns a valid path to the file content.
62
+ If the original path doesn't exist, generates a temporary file from the base64 content.
63
+ """
64
+ # Check if original path exists and is accessible
65
+ if self._path and os.path.isfile(self._path):
66
+ return self._path
67
+
68
+ # If we already have a valid temporary file, use it
69
+ if self._temp_path and os.path.isfile(self._temp_path):
70
+ return self._temp_path
71
+
72
+ # Generate a new temporary file from base64 content
73
+ self._temp_path = self.to_tempfile(self.suffix)
74
+ return self._temp_path
75
+
76
+ def __str__(self):
77
+ return "FileStore: self.path"
78
+
79
+ @classmethod
80
+ def example(cls, example_type="txt"):
81
+ file_methods_class = FileMethods.get_handler(example_type)
82
+ if file_methods_class:
83
+ return cls(file_methods_class().example())
84
+ else:
85
+ print(f"Example for {example_type} is not supported.")
86
+
87
+ @property
88
+ def size(self) -> int:
89
+ if self.base64_string != None:
90
+ return (len(self.base64_string) / 4.0) * 3 # from base64 to char size
91
+ return os.path.getsize(self.path)
92
+
93
+ def upload_google(self, refresh: bool = False) -> None:
94
+ import google.generativeai as genai
95
+
96
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
97
+ google_info = genai.upload_file(self.path, mime_type=self.mime_type)
98
+ self.external_locations["google"] = google_info.to_dict()
99
+
100
+ @classmethod
101
+ @remove_edsl_version
102
+ def from_dict(cls, d):
103
+ # return cls(d["filename"], d["binary"], d["suffix"], d["base64_string"])
104
+ return cls(**d)
105
+
106
+ def __repr__(self):
107
+ import reprlib
108
+
109
+ r = reprlib.Repr()
110
+ r.maxstring = 20 # Limit strings to 20 chars
111
+ r.maxother = 30 # Limit other types to 30 chars
112
+
113
+ params = ", ".join(f"{key}={r.repr(value)}" for key, value in self.data.items())
114
+ return f"{self.__class__.__name__}({params})"
115
+
116
+ def _repr_html_(self):
117
+ parent_html = super()._repr_html_()
118
+ from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
119
+
120
+ link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
121
+ return f"{parent_html}<br>{link}"
122
+
123
+ def encode_file_to_base64_string(self, file_path: str):
124
+ try:
125
+ # Attempt to open the file in text mode
126
+ with open(file_path, "r") as text_file:
127
+ # Read the text data
128
+ text_data = text_file.read()
129
+ # Encode the text data to a base64 string
130
+ base64_encoded_data = base64.b64encode(text_data.encode("utf-8"))
131
+ except UnicodeDecodeError:
132
+ # If reading as text fails, open the file in binary mode
133
+ with open(file_path, "rb") as binary_file:
134
+ # Read the binary data
135
+ binary_data = binary_file.read()
136
+ # Encode the binary data to a base64 string
137
+ base64_encoded_data = base64.b64encode(binary_data)
138
+ self.binary = True
139
+ # Convert the base64 bytes to a string
140
+ base64_string = base64_encoded_data.decode("utf-8")
141
+
142
+ return base64_string
143
+
144
+ def open(self) -> "IO":
145
+ if self.binary:
146
+ return self.base64_to_file(self.base64_string, is_binary=True)
147
+ else:
148
+ return self.base64_to_text_file(self.base64_string)
149
+
150
+ def write(self, filename: Optional[str] = None) -> str:
151
+ """
152
+ Write the file content to disk, either to a specified filename or a temporary file.
153
+
154
+ Args:
155
+ filename (Optional[str]): The destination filename. If None, creates a temporary file.
156
+
157
+ Returns:
158
+ str: The path to the written file.
159
+ """
160
+ # Determine the mode based on binary flag
161
+ mode = "wb" if self.binary else "w"
162
+
163
+ # If no filename provided, create a temporary file
164
+ if filename is None:
165
+ from tempfile import NamedTemporaryFile
166
+
167
+ with NamedTemporaryFile(delete=False, suffix="." + self.suffix) as f:
168
+ filename = f.name
169
+
170
+ # Write the content using the appropriate mode
171
+ try:
172
+ with open(filename, mode) as f:
173
+ content = self.open().read()
174
+ # For text mode, ensure we're writing a string
175
+ if not self.binary and isinstance(content, bytes):
176
+ content = content.decode("utf-8")
177
+ f.write(content)
178
+ print(f"File written to {filename}")
179
+ except Exception as e:
180
+ print(f"Error writing file: {e}")
181
+ raise
182
+
183
+ # return filename
184
+
185
+ @staticmethod
186
+ def base64_to_text_file(base64_string) -> "IO":
187
+ # Decode the base64 string to bytes
188
+ text_data_bytes = base64.b64decode(base64_string)
189
+
190
+ # Convert bytes to string
191
+ text_data = text_data_bytes.decode("utf-8")
192
+
193
+ # Create a StringIO object from the text data
194
+ text_file = io.StringIO(text_data)
195
+
196
+ return text_file
197
+
198
+ @staticmethod
199
+ def base64_to_file(base64_string, is_binary=True):
200
+ # Decode the base64 string to bytes
201
+ file_data = base64.b64decode(base64_string)
202
+
203
+ if is_binary:
204
+ # Create a BytesIO object for binary data
205
+ return io.BytesIO(file_data)
206
+ else:
207
+ # Convert bytes to string for text data
208
+ text_data = file_data.decode("utf-8")
209
+ # Create a StringIO object for text data
210
+ return io.StringIO(text_data)
211
+
212
+ @property
213
+ def text(self):
214
+ if self.binary:
215
+ import warnings
216
+
217
+ warnings.warn("This is a binary file.")
218
+ else:
219
+ return self.base64_to_text_file(self.base64_string).read()
220
+
221
+ def to_tempfile(self, suffix=None):
222
+ if suffix is None:
223
+ suffix = self.suffix
224
+ if self.binary:
225
+ file_like_object = self.base64_to_file(
226
+ self["base64_string"], is_binary=True
227
+ )
228
+ else:
229
+ file_like_object = self.base64_to_text_file(self.base64_string)
230
+
231
+ # Create a named temporary file
232
+ mode = "wb" if self.binary else "w"
233
+ temp_file = tempfile.NamedTemporaryFile(
234
+ delete=False, suffix="." + suffix, mode=mode
235
+ )
236
+
237
+ if self.binary:
238
+ temp_file.write(file_like_object.read())
239
+ else:
240
+ temp_file.write(file_like_object.read())
241
+
242
+ temp_file.close()
243
+
244
+ return temp_file.name
245
+
246
+ def view(self) -> None:
247
+ handler = FileMethods.get_handler(self.suffix)
248
+ if handler:
249
+ handler(self.path).view()
250
+ else:
251
+ print(f"Viewing of {self.suffix} files is not supported.")
252
+
253
+ def extract_text(self) -> str:
254
+ handler = FileMethods.get_handler(self.suffix)
255
+ if handler and hasattr(handler, "extract_text"):
256
+ return handler(self.path).extract_text()
257
+
258
+ if not self.binary:
259
+ return self.text
260
+
261
+ return None
262
+ # raise TypeError("No text method found for this file type.")
263
+
264
+ def push(
265
+ self, description: Optional[str] = None, visibility: str = "unlisted"
266
+ ) -> dict:
267
+ """
268
+ Push the object to Coop.
269
+ :param description: The description of the object to push.
270
+ :param visibility: The visibility of the object to push.
271
+ """
272
+ scenario_version = Scenario.from_dict(self.to_dict())
273
+ if description is None:
274
+ description = "File: " + self.path
275
+ info = scenario_version.push(description=description, visibility=visibility)
276
+ return info
277
+
278
+ @classmethod
279
+ def pull(cls, uuid: str, expected_parrot_url: Optional[str] = None) -> "FileStore":
280
+ """
281
+ :param uuid: The UUID of the object to pull.
282
+ :param expected_parrot_url: The URL of the Parrot server to use.
283
+ :return: The object pulled from the Parrot server.
284
+ """
285
+ scenario_version = Scenario.pull(uuid, expected_parrot_url=expected_parrot_url)
286
+ return cls.from_dict(scenario_version.to_dict())
287
+
288
+ @classmethod
289
+ def from_url(
290
+ cls,
291
+ url: str,
292
+ download_path: Optional[str] = None,
293
+ mime_type: Optional[str] = None,
294
+ ) -> "FileStore":
295
+ """
296
+ :param url: The URL of the file to download.
297
+ :param download_path: The path to save the downloaded file.
298
+ :param mime_type: The MIME type of the file. If None, it will be guessed from the file extension.
299
+ """
300
+ import requests
301
+ from urllib.parse import urlparse
302
+
303
+ response = requests.get(url, stream=True)
304
+ response.raise_for_status() # Raises an HTTPError for bad responses
305
+
306
+ # Get the filename from the URL if download_path is not provided
307
+ if download_path is None:
308
+ filename = os.path.basename(urlparse(url).path)
309
+ if not filename:
310
+ filename = "downloaded_file"
311
+ # download_path = filename
312
+ download_path = os.path.join(os.getcwd(), filename)
313
+
314
+ # Ensure the directory exists
315
+ os.makedirs(os.path.dirname(download_path), exist_ok=True)
316
+
317
+ # Write the file
318
+ with open(download_path, "wb") as file:
319
+ for chunk in response.iter_content(chunk_size=8192):
320
+ file.write(chunk)
321
+
322
+ # Create and return a new File instance
323
+ return cls(download_path, mime_type=mime_type)
324
+
325
+ def create_link(self, custom_filename=None, style=None):
326
+ from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
327
+
328
+ return ConstructDownloadLink(self).create_link(custom_filename, style)
329
+
330
+
331
+ class CSVFileStore(FileStore):
332
+ @classmethod
333
+ def example(cls):
334
+ from edsl.results.Results import Results
335
+
336
+ r = Results.example()
337
+ import tempfile
338
+
339
+ with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
340
+ r.to_csv(filename=f.name)
341
+
342
+ return cls(f.name)
343
+
344
+ def view(self):
345
+ import pandas as pd
346
+
347
+ return pd.read_csv(self.to_tempfile())
348
+
349
+
350
+ class PDFFileStore(FileStore):
351
+ def view(self):
352
+ pdf_path = self.to_tempfile()
353
+ print(f"PDF path: {pdf_path}") # Print the path to ensure it exists
354
+ import os
355
+ import subprocess
356
+
357
+ if os.path.exists(pdf_path):
358
+ try:
359
+ if os.name == "posix":
360
+ # for cool kids
361
+ subprocess.run(["open", pdf_path], check=True) # macOS
362
+ elif os.name == "nt":
363
+ os.startfile(pdf_path) # Windows
364
+ else:
365
+ subprocess.run(["xdg-open", pdf_path], check=True) # Linux
366
+ except Exception as e:
367
+ print(f"Error opening PDF: {e}")
368
+ else:
369
+ print("PDF file was not created successfully.")
370
+
371
+ @classmethod
372
+ def example(cls):
373
+ import textwrap
374
+
375
+ pdf_string = textwrap.dedent(
376
+ """\
377
+ %PDF-1.4
378
+ 1 0 obj
379
+ << /Type /Catalog /Pages 2 0 R >>
380
+ endobj
381
+ 2 0 obj
382
+ << /Type /Pages /Kids [3 0 R] /Count 1 >>
383
+ endobj
384
+ 3 0 obj
385
+ << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
386
+ endobj
387
+ 4 0 obj
388
+ << /Length 44 >>
389
+ stream
390
+ BT
391
+ /F1 24 Tf
392
+ 100 700 Td
393
+ (Hello, World!) Tj
394
+ ET
395
+ endstream
396
+ endobj
397
+ 5 0 obj
398
+ << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
399
+ endobj
400
+ 6 0 obj
401
+ << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
402
+ endobj
403
+ xref
404
+ 0 7
405
+ 0000000000 65535 f
406
+ 0000000010 00000 n
407
+ 0000000053 00000 n
408
+ 0000000100 00000 n
409
+ 0000000173 00000 n
410
+ 0000000232 00000 n
411
+ 0000000272 00000 n
412
+ trailer
413
+ << /Size 7 /Root 1 0 R >>
414
+ startxref
415
+ 318
416
+ %%EOF"""
417
+ )
418
+ import tempfile
419
+
420
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
421
+ f.write(pdf_string.encode())
422
+
423
+ return cls(f.name)
424
+
425
+
426
+ class PNGFileStore(FileStore):
427
+ @classmethod
428
+ def example(cls):
429
+ import textwrap
430
+
431
+ png_string = textwrap.dedent(
432
+ """\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\x0cIDAT\x08\xd7c\x00\x01"""
433
+ )
434
+ import tempfile
435
+
436
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
437
+ f.write(png_string.encode())
438
+
439
+ return cls(f.name)
440
+
441
+ def view(self):
442
+ import matplotlib.pyplot as plt
443
+ import matplotlib.image as mpimg
444
+
445
+ img = mpimg.imread(self.to_tempfile())
446
+ plt.imshow(img)
447
+ plt.show()
448
+
449
+
450
+ class SQLiteFileStore(FileStore):
451
+ @classmethod
452
+ def example(cls):
453
+ import sqlite3
454
+ import tempfile
455
+
456
+ with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
457
+ conn = sqlite3.connect(f.name)
458
+ c = conn.cursor()
459
+ c.execute("""CREATE TABLE stocks (date text)""")
460
+ conn.commit()
461
+
462
+ return cls(f.name)
463
+
464
+ def view(self):
465
+ import subprocess
466
+ import os
467
+
468
+ sqlite_path = self.to_tempfile()
469
+ os.system(f"sqlite3 {sqlite_path}")
470
+
471
+
472
+ class HTMLFileStore(FileStore):
473
+ @classmethod
474
+ def example(cls):
475
+ import tempfile
476
+
477
+ with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
478
+ f.write("<html><body><h1>Test</h1></body></html>".encode())
479
+
480
+ return cls(f.name)
481
+
482
+ def view(self):
483
+ import webbrowser
484
+
485
+ html_path = self.to_tempfile()
486
+ webbrowser.open("file://" + html_path)
487
+
488
+
489
+ if __name__ == "__main__":
490
+ import doctest
491
+
492
+ doctest.testmod()
493
+
494
+ # fs = FileStore.example("pdf")
495
+ # fs.view()
496
+
497
+ formats = FileMethods.supported_file_types()
498
+ for file_type in formats:
499
+ print("Now testinging", file_type)
500
+ fs = FileStore.example(file_type)
501
+ fs.view()
502
+ input("Press Enter to continue...")
503
+
504
+ # pdf_example.view()
505
+ # FileStore(pdf_example).view()
506
+
507
+ # pdf_methods = methods.get("pdf")
508
+ # file = pdf_methods().example()
509
+ # pdf_methods(file).view()
510
+
511
+ # print(FileMethods._handlers)