edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +430 -113
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/dataset/dataset_operations_mixin.py +1492 -0
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +321 -155
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +10 -16
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +420 -216
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/file_store.py +755 -0
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +20 -21
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
  230. edsl-0.1.48.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -426
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/auto/AutoStudy.py +0 -130
  236. edsl/auto/StageBase.py +0 -243
  237. edsl/auto/StageGenerateSurvey.py +0 -178
  238. edsl/auto/StageLabelQuestions.py +0 -125
  239. edsl/auto/StagePersona.py +0 -61
  240. edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
  241. edsl/auto/StagePersonaDimensionValues.py +0 -74
  242. edsl/auto/StagePersonaDimensions.py +0 -69
  243. edsl/auto/StageQuestions.py +0 -74
  244. edsl/auto/SurveyCreatorPipeline.py +0 -21
  245. edsl/auto/utilities.py +0 -218
  246. edsl/base/Base.py +0 -279
  247. edsl/coop/PriceFetcher.py +0 -54
  248. edsl/data/Cache.py +0 -580
  249. edsl/data/CacheEntry.py +0 -230
  250. edsl/data/SQLiteDict.py +0 -292
  251. edsl/data/__init__.py +0 -5
  252. edsl/data/orm.py +0 -10
  253. edsl/exceptions/cache.py +0 -5
  254. edsl/exceptions/coop.py +0 -14
  255. edsl/exceptions/data.py +0 -14
  256. edsl/exceptions/scenarios.py +0 -29
  257. edsl/jobs/Answers.py +0 -43
  258. edsl/jobs/JobsPrompts.py +0 -354
  259. edsl/jobs/buckets/BucketCollection.py +0 -134
  260. edsl/jobs/buckets/ModelBuckets.py +0 -65
  261. edsl/jobs/buckets/TokenBucket.py +0 -283
  262. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  263. edsl/jobs/interviews/Interview.py +0 -395
  264. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  265. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  266. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  267. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  268. edsl/jobs/tasks/TaskCreators.py +0 -64
  269. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  270. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  271. edsl/language_models/LanguageModel.py +0 -635
  272. edsl/language_models/ServiceDataSources.py +0 -0
  273. edsl/language_models/key_management/KeyLookup.py +0 -63
  274. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  275. edsl/language_models/key_management/models.py +0 -137
  276. edsl/questions/QuestionBase.py +0 -539
  277. edsl/questions/QuestionFreeText.py +0 -130
  278. edsl/questions/derived/QuestionLikertFive.py +0 -76
  279. edsl/results/DatasetExportMixin.py +0 -911
  280. edsl/results/ResultsExportMixin.py +0 -45
  281. edsl/results/TextEditor.py +0 -50
  282. edsl/results/results_fetch_mixin.py +0 -33
  283. edsl/results/results_tools_mixin.py +0 -98
  284. edsl/scenarios/DocumentChunker.py +0 -104
  285. edsl/scenarios/FileStore.py +0 -564
  286. edsl/scenarios/Scenario.py +0 -548
  287. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  288. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  289. edsl/scenarios/handlers/latex.py +0 -5
  290. edsl/shared.py +0 -1
  291. edsl/surveys/Survey.py +0 -1306
  292. edsl/surveys/SurveyQualtricsImport.py +0 -284
  293. edsl/surveys/SurveyToApp.py +0 -141
  294. edsl/surveys/instructions/__init__.py +0 -0
  295. edsl/tools/__init__.py +0 -1
  296. edsl/tools/clusters.py +0 -192
  297. edsl/tools/embeddings.py +0 -27
  298. edsl/tools/embeddings_plotting.py +0 -118
  299. edsl/tools/plotting.py +0 -112
  300. edsl/tools/summarize.py +0 -18
  301. edsl/utilities/data/Registry.py +0 -6
  302. edsl/utilities/data/__init__.py +0 -1
  303. edsl/utilities/data/scooter_results.json +0 -1
  304. edsl-0.1.46.dist-info/RECORD +0 -366
  305. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  306. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  307. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  308. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  309. /edsl/{results → dataset/display}/table_display.css +0 -0
  310. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  311. /edsl/{results → dataset}/tree_explore.py +0 -0
  312. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  313. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  314. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  315. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  316. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  317. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  318. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  319. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  320. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  321. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  322. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  323. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  324. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  325. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  326. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  327. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
  328. {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -1,564 +0,0 @@
1
- import base64
2
- import io
3
- import tempfile
4
- import mimetypes
5
- import os
6
- from typing import Dict, Any, IO, Optional
7
-
8
- from edsl.scenarios.Scenario import Scenario
9
- from edsl.utilities.remove_edsl_version import remove_edsl_version
10
-
11
- from edsl.scenarios.file_methods import FileMethods
12
- from typing import Union
13
- from uuid import UUID
14
-
15
- class FileStore(Scenario):
16
- __documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
17
-
18
- def __init__(
19
- self,
20
- path: Optional[str] = None,
21
- mime_type: Optional[str] = None,
22
- binary: Optional[bool] = None,
23
- suffix: Optional[str] = None,
24
- base64_string: Optional[str] = None,
25
- external_locations: Optional[Dict[str, str]] = None,
26
- extracted_text: Optional[str] = None,
27
- **kwargs,
28
- ):
29
- if path is None and "filename" in kwargs:
30
- path = kwargs["filename"]
31
-
32
- # Check if path is a URL and handle download
33
- if path and (path.startswith('http://') or path.startswith('https://')):
34
- temp_filestore = self.from_url(path, mime_type=mime_type)
35
- path = temp_filestore._path
36
- mime_type = temp_filestore.mime_type
37
-
38
- self._path = path # Store the original path privately
39
- self._temp_path = None # Track any generated temporary file
40
-
41
- self.suffix = suffix or path.split(".")[-1]
42
- self.binary = binary or False
43
- self.mime_type = (
44
- mime_type or mimetypes.guess_type(path)[0] or "application/octet-stream"
45
- )
46
- self.base64_string = base64_string or self.encode_file_to_base64_string(path)
47
- self.external_locations = external_locations or {}
48
-
49
- self.extracted_text = (
50
- self.extract_text() if extracted_text is None else extracted_text
51
- )
52
-
53
- super().__init__(
54
- {
55
- "path": path,
56
- "base64_string": self.base64_string,
57
- "binary": self.binary,
58
- "suffix": self.suffix,
59
- "mime_type": self.mime_type,
60
- "external_locations": self.external_locations,
61
- "extracted_text": self.extracted_text,
62
- }
63
- )
64
-
65
- @property
66
- def path(self) -> str:
67
- """
68
- Property that returns a valid path to the file content.
69
- If the original path doesn't exist, generates a temporary file from the base64 content.
70
- """
71
- # Check if original path exists and is accessible
72
- if self._path and os.path.isfile(self._path):
73
- return self._path
74
-
75
- # If we already have a valid temporary file, use it
76
- if self._temp_path and os.path.isfile(self._temp_path):
77
- return self._temp_path
78
-
79
- # Generate a new temporary file from base64 content
80
- self._temp_path = self.to_tempfile(self.suffix)
81
- return self._temp_path
82
-
83
- def __str__(self):
84
- return "FileStore: self.path"
85
-
86
- @classmethod
87
- def example(cls, example_type="txt"):
88
- file_methods_class = FileMethods.get_handler(example_type)
89
- if file_methods_class:
90
- return cls(file_methods_class().example())
91
- else:
92
- print(f"Example for {example_type} is not supported.")
93
-
94
- @property
95
- def size(self) -> int:
96
- if self.base64_string != None:
97
- return (len(self.base64_string) / 4.0) * 3 # from base64 to char size
98
- return os.path.getsize(self.path)
99
-
100
- def upload_google(self, refresh: bool = False) -> None:
101
- import google.generativeai as genai
102
-
103
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
104
- google_info = genai.upload_file(self.path, mime_type=self.mime_type)
105
- self.external_locations["google"] = google_info.to_dict()
106
-
107
- @classmethod
108
- @remove_edsl_version
109
- def from_dict(cls, d):
110
- # return cls(d["filename"], d["binary"], d["suffix"], d["base64_string"])
111
- return cls(**d)
112
-
113
- def __repr__(self):
114
- import reprlib
115
-
116
- r = reprlib.Repr()
117
- r.maxstring = 20 # Limit strings to 20 chars
118
- r.maxother = 30 # Limit other types to 30 chars
119
-
120
- params = ", ".join(f"{key}={r.repr(value)}" for key, value in self.data.items())
121
- return f"{self.__class__.__name__}({params})"
122
-
123
- def _repr_html_(self):
124
- parent_html = super()._repr_html_()
125
- from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
126
-
127
- link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
128
- return f"{parent_html}<br>{link}"
129
-
130
- def encode_file_to_base64_string(self, file_path: str):
131
- try:
132
- # Attempt to open the file in text mode
133
- with open(file_path, "r") as text_file:
134
- # Read the text data
135
- text_data = text_file.read()
136
- # Encode the text data to a base64 string
137
- base64_encoded_data = base64.b64encode(text_data.encode("utf-8"))
138
- except UnicodeDecodeError:
139
- # If reading as text fails, open the file in binary mode
140
- with open(file_path, "rb") as binary_file:
141
- # Read the binary data
142
- binary_data = binary_file.read()
143
- # Encode the binary data to a base64 string
144
- base64_encoded_data = base64.b64encode(binary_data)
145
- self.binary = True
146
- # Convert the base64 bytes to a string
147
- except FileNotFoundError:
148
- print(f"File not found: {file_path}")
149
- print("Current working directory:", os.getcwd())
150
- raise
151
- base64_string = base64_encoded_data.decode("utf-8")
152
-
153
- return base64_string
154
-
155
- def open(self) -> "IO":
156
- if self.binary:
157
- return self.base64_to_file(self.base64_string, is_binary=True)
158
- else:
159
- return self.base64_to_text_file(self.base64_string)
160
-
161
- def write(self, filename: Optional[str] = None) -> str:
162
- """
163
- Write the file content to disk, either to a specified filename or a temporary file.
164
-
165
- Args:
166
- filename (Optional[str]): The destination filename. If None, creates a temporary file.
167
-
168
- Returns:
169
- str: The path to the written file.
170
- """
171
- # Determine the mode based on binary flag
172
- mode = "wb" if self.binary else "w"
173
-
174
- # If no filename provided, create a temporary file
175
- if filename is None:
176
- from tempfile import NamedTemporaryFile
177
-
178
- with NamedTemporaryFile(delete=False, suffix="." + self.suffix) as f:
179
- filename = f.name
180
-
181
- # Write the content using the appropriate mode
182
- try:
183
- with open(filename, mode) as f:
184
- content = self.open().read()
185
- # For text mode, ensure we're writing a string
186
- if not self.binary and isinstance(content, bytes):
187
- content = content.decode("utf-8")
188
- f.write(content)
189
- print(f"File written to {filename}")
190
- except Exception as e:
191
- print(f"Error writing file: {e}")
192
- raise
193
-
194
- # return filename
195
-
196
- @staticmethod
197
- def base64_to_text_file(base64_string) -> "IO":
198
- # Decode the base64 string to bytes
199
- text_data_bytes = base64.b64decode(base64_string)
200
-
201
- # Convert bytes to string
202
- text_data = text_data_bytes.decode("utf-8")
203
-
204
- # Create a StringIO object from the text data
205
- text_file = io.StringIO(text_data)
206
-
207
- return text_file
208
-
209
- @staticmethod
210
- def base64_to_file(base64_string, is_binary=True):
211
- # Decode the base64 string to bytes
212
- file_data = base64.b64decode(base64_string)
213
-
214
- if is_binary:
215
- # Create a BytesIO object for binary data
216
- return io.BytesIO(file_data)
217
- else:
218
- # Convert bytes to string for text data
219
- text_data = file_data.decode("utf-8")
220
- # Create a StringIO object for text data
221
- return io.StringIO(text_data)
222
-
223
- @property
224
- def text(self):
225
- if self.binary:
226
- import warnings
227
-
228
- warnings.warn("This is a binary file.")
229
- else:
230
- return self.base64_to_text_file(self.base64_string).read()
231
-
232
- def to_tempfile(self, suffix=None):
233
- if suffix is None:
234
- suffix = self.suffix
235
- if self.binary:
236
- file_like_object = self.base64_to_file(
237
- self["base64_string"], is_binary=True
238
- )
239
- else:
240
- file_like_object = self.base64_to_text_file(self.base64_string)
241
-
242
- # Create a named temporary file
243
- mode = "wb" if self.binary else "w"
244
- temp_file = tempfile.NamedTemporaryFile(
245
- delete=False, suffix="." + suffix, mode=mode
246
- )
247
-
248
- if self.binary:
249
- temp_file.write(file_like_object.read())
250
- else:
251
- temp_file.write(file_like_object.read())
252
-
253
- temp_file.close()
254
-
255
- return temp_file.name
256
-
257
- def view(self) -> None:
258
- handler = FileMethods.get_handler(self.suffix)
259
- if handler:
260
- handler(self.path).view()
261
- else:
262
- print(f"Viewing of {self.suffix} files is not supported.")
263
-
264
- def extract_text(self) -> str:
265
- handler = FileMethods.get_handler(self.suffix)
266
- if handler and hasattr(handler, "extract_text"):
267
- return handler(self.path).extract_text()
268
-
269
- if not self.binary:
270
- return self.text
271
-
272
- return None
273
- # raise TypeError("No text method found for this file type.")
274
-
275
- def push(
276
- self,
277
- description: Optional[str] = None,
278
- alias: Optional[str] = None,
279
- visibility: Optional[str] = "unlisted",
280
- expected_parrot_url: Optional[str] = None,
281
-
282
- ) -> dict:
283
- """
284
- Push the object to Coop.
285
- :param description: The description of the object to push.
286
- :param visibility: The visibility of the object to push.
287
- """
288
- scenario_version = Scenario.from_dict(self.to_dict())
289
- if description is None:
290
- description = "File: " + self.path
291
- info = scenario_version.push(description=description, visibility=visibility, expected_parrot_url=expected_parrot_url, alias=alias)
292
- return info
293
-
294
- @classmethod
295
- def pull(cls, url_or_uuid: Union[str, UUID]) -> "FileStore":
296
- """
297
- Pull a FileStore object from Coop.
298
-
299
- Args:
300
- url_or_uuid: Either a UUID string or a URL pointing to the object
301
- expected_parrot_url: Optional URL for the Parrot server
302
-
303
- Returns:
304
- FileStore: The pulled FileStore object
305
- """
306
- scenario_version = Scenario.pull(url_or_uuid)
307
- return cls.from_dict(scenario_version.to_dict())
308
-
309
- @classmethod
310
- def from_url(
311
- cls,
312
- url: str,
313
- download_path: Optional[str] = None,
314
- mime_type: Optional[str] = None,
315
- ) -> "FileStore":
316
- """
317
- :param url: The URL of the file to download.
318
- :param download_path: The path to save the downloaded file.
319
- :param mime_type: The MIME type of the file. If None, it will be guessed from the file extension.
320
- """
321
- import requests
322
- from urllib.parse import urlparse
323
-
324
- response = requests.get(url, stream=True)
325
- response.raise_for_status() # Raises an HTTPError for bad responses
326
-
327
- # Get the filename from the URL if download_path is not provided
328
- if download_path is None:
329
- filename = os.path.basename(urlparse(url).path)
330
- if not filename:
331
- filename = "downloaded_file"
332
- # download_path = filename
333
- download_path = os.path.join(os.getcwd(), filename)
334
-
335
- # Ensure the directory exists
336
- os.makedirs(os.path.dirname(download_path), exist_ok=True)
337
-
338
- # Write the file
339
- with open(download_path, "wb") as file:
340
- for chunk in response.iter_content(chunk_size=8192):
341
- file.write(chunk)
342
-
343
- # Create and return a new File instance
344
- return cls(download_path, mime_type=mime_type)
345
-
346
- def create_link(self, custom_filename=None, style=None):
347
- from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
348
-
349
- return ConstructDownloadLink(self).create_link(custom_filename, style)
350
-
351
- def to_pandas(self):
352
- """
353
- Convert the file content to a pandas DataFrame if supported by the file handler.
354
-
355
- Returns:
356
- pandas.DataFrame: The data from the file as a DataFrame
357
-
358
- Raises:
359
- AttributeError: If the file type's handler doesn't support pandas conversion
360
- """
361
- handler = FileMethods.get_handler(self.suffix)
362
- if handler and hasattr(handler, "to_pandas"):
363
- return handler(self.path).to_pandas()
364
- raise AttributeError(
365
- f"Converting {self.suffix} files to pandas DataFrame is not supported"
366
- )
367
-
368
- def __getattr__(self, name):
369
- """
370
- Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
371
- """
372
- if self.suffix == "csv":
373
- # Get the pandas DataFrame
374
- df = self.to_pandas()
375
- # Check if the requested attribute exists in the DataFrame
376
- if hasattr(df, name):
377
- return getattr(df, name)
378
- # If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
379
- raise AttributeError(
380
- f"'{self.__class__.__name__}' object has no attribute '{name}'"
381
- )
382
-
383
-
384
- class CSVFileStore(FileStore):
385
- @classmethod
386
- def example(cls):
387
- from edsl.results.Results import Results
388
-
389
- r = Results.example()
390
- import tempfile
391
-
392
- with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
393
- r.to_csv(filename=f.name)
394
-
395
- return cls(f.name)
396
-
397
- def view(self):
398
- import pandas as pd
399
-
400
- return pd.read_csv(self.to_tempfile())
401
-
402
-
403
- class PDFFileStore(FileStore):
404
- def view(self):
405
- pdf_path = self.to_tempfile()
406
- print(f"PDF path: {pdf_path}") # Print the path to ensure it exists
407
- import os
408
- import subprocess
409
-
410
- if os.path.exists(pdf_path):
411
- try:
412
- if os.name == "posix":
413
- # for cool kids
414
- subprocess.run(["open", pdf_path], check=True) # macOS
415
- elif os.name == "nt":
416
- os.startfile(pdf_path) # Windows
417
- else:
418
- subprocess.run(["xdg-open", pdf_path], check=True) # Linux
419
- except Exception as e:
420
- print(f"Error opening PDF: {e}")
421
- else:
422
- print("PDF file was not created successfully.")
423
-
424
- @classmethod
425
- def example(cls):
426
- import textwrap
427
-
428
- pdf_string = textwrap.dedent(
429
- """\
430
- %PDF-1.4
431
- 1 0 obj
432
- << /Type /Catalog /Pages 2 0 R >>
433
- endobj
434
- 2 0 obj
435
- << /Type /Pages /Kids [3 0 R] /Count 1 >>
436
- endobj
437
- 3 0 obj
438
- << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
439
- endobj
440
- 4 0 obj
441
- << /Length 44 >>
442
- stream
443
- BT
444
- /F1 24 Tf
445
- 100 700 Td
446
- (Hello, World!) Tj
447
- ET
448
- endstream
449
- endobj
450
- 5 0 obj
451
- << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
452
- endobj
453
- 6 0 obj
454
- << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
455
- endobj
456
- xref
457
- 0 7
458
- 0000000000 65535 f
459
- 0000000010 00000 n
460
- 0000000053 00000 n
461
- 0000000100 00000 n
462
- 0000000173 00000 n
463
- 0000000232 00000 n
464
- 0000000272 00000 n
465
- trailer
466
- << /Size 7 /Root 1 0 R >>
467
- startxref
468
- 318
469
- %%EOF"""
470
- )
471
- import tempfile
472
-
473
- with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
474
- f.write(pdf_string.encode())
475
-
476
- return cls(f.name)
477
-
478
-
479
- class PNGFileStore(FileStore):
480
- @classmethod
481
- def example(cls):
482
- import textwrap
483
-
484
- png_string = textwrap.dedent(
485
- """\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\x0cIDAT\x08\xd7c\x00\x01"""
486
- )
487
- import tempfile
488
-
489
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
490
- f.write(png_string.encode())
491
-
492
- return cls(f.name)
493
-
494
- def view(self):
495
- import matplotlib.pyplot as plt
496
- import matplotlib.image as mpimg
497
-
498
- img = mpimg.imread(self.to_tempfile())
499
- plt.imshow(img)
500
- plt.show()
501
-
502
-
503
- class SQLiteFileStore(FileStore):
504
- @classmethod
505
- def example(cls):
506
- import sqlite3
507
- import tempfile
508
-
509
- with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
510
- conn = sqlite3.connect(f.name)
511
- c = conn.cursor()
512
- c.execute("""CREATE TABLE stocks (date text)""")
513
- conn.commit()
514
-
515
- return cls(f.name)
516
-
517
- def view(self):
518
- import subprocess
519
- import os
520
-
521
- sqlite_path = self.to_tempfile()
522
- os.system(f"sqlite3 {sqlite_path}")
523
-
524
-
525
- class HTMLFileStore(FileStore):
526
- @classmethod
527
- def example(cls):
528
- import tempfile
529
-
530
- with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
531
- f.write("<html><body><h1>Test</h1></body></html>".encode())
532
-
533
- return cls(f.name)
534
-
535
- def view(self):
536
- import webbrowser
537
-
538
- html_path = self.to_tempfile()
539
- webbrowser.open("file://" + html_path)
540
-
541
-
542
- if __name__ == "__main__":
543
- import doctest
544
-
545
- doctest.testmod()
546
-
547
- # fs = FileStore.example("pdf")
548
- # fs.view()
549
-
550
- formats = FileMethods.supported_file_types()
551
- for file_type in formats:
552
- print("Now testinging", file_type)
553
- fs = FileStore.example(file_type)
554
- fs.view()
555
- input("Press Enter to continue...")
556
-
557
- # pdf_example.view()
558
- # FileStore(pdf_example).view()
559
-
560
- # pdf_methods = methods.get("pdf")
561
- # file = pdf_methods().example()
562
- # pdf_methods(file).view()
563
-
564
- # print(FileMethods._handlers)