edsl 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. edsl/__init__.py +44 -39
  2. edsl/__version__.py +1 -1
  3. edsl/agents/__init__.py +4 -2
  4. edsl/agents/{Agent.py → agent.py} +442 -152
  5. edsl/agents/{AgentList.py → agent_list.py} +220 -162
  6. edsl/agents/descriptors.py +46 -7
  7. edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
  8. edsl/base/__init__.py +75 -0
  9. edsl/base/base_class.py +1303 -0
  10. edsl/base/data_transfer_models.py +114 -0
  11. edsl/base/enums.py +215 -0
  12. edsl/base.py +8 -0
  13. edsl/buckets/__init__.py +25 -0
  14. edsl/buckets/bucket_collection.py +324 -0
  15. edsl/buckets/model_buckets.py +206 -0
  16. edsl/buckets/token_bucket.py +502 -0
  17. edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
  18. edsl/buckets/token_bucket_client.py +509 -0
  19. edsl/caching/__init__.py +20 -0
  20. edsl/caching/cache.py +814 -0
  21. edsl/caching/cache_entry.py +427 -0
  22. edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
  23. edsl/caching/exceptions.py +24 -0
  24. edsl/caching/orm.py +30 -0
  25. edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
  26. edsl/caching/sql_dict.py +441 -0
  27. edsl/config/__init__.py +8 -0
  28. edsl/config/config_class.py +177 -0
  29. edsl/config.py +4 -176
  30. edsl/conversation/Conversation.py +7 -7
  31. edsl/conversation/car_buying.py +4 -4
  32. edsl/conversation/chips.py +6 -6
  33. edsl/coop/__init__.py +25 -2
  34. edsl/coop/coop.py +311 -75
  35. edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
  36. edsl/coop/exceptions.py +62 -0
  37. edsl/coop/price_fetcher.py +126 -0
  38. edsl/coop/utils.py +89 -24
  39. edsl/data_transfer_models.py +5 -72
  40. edsl/dataset/__init__.py +10 -0
  41. edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
  42. edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
  43. edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
  44. edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
  45. edsl/{results → dataset/display}/table_renderers.py +58 -2
  46. edsl/{results → dataset}/file_exports.py +4 -5
  47. edsl/{results → dataset}/smart_objects.py +2 -2
  48. edsl/enums.py +5 -205
  49. edsl/inference_services/__init__.py +5 -0
  50. edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
  51. edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
  52. edsl/inference_services/data_structures.py +3 -2
  53. edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
  54. edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
  55. edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
  56. edsl/inference_services/registry.py +4 -41
  57. edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
  58. edsl/inference_services/services/__init__.py +31 -0
  59. edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
  60. edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
  61. edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
  62. edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
  63. edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
  64. edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
  65. edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
  66. edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
  67. edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
  68. edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
  69. edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
  70. edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
  71. edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
  72. edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
  73. edsl/inference_services/write_available.py +1 -2
  74. edsl/instructions/__init__.py +6 -0
  75. edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
  76. edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
  77. edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
  78. edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
  79. edsl/interviews/__init__.py +4 -0
  80. edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
  81. edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
  82. edsl/interviews/interview.py +638 -0
  83. edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
  84. edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
  85. edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
  86. edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
  87. edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
  88. edsl/invigilators/__init__.py +38 -0
  89. edsl/invigilators/invigilator_base.py +477 -0
  90. edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
  91. edsl/invigilators/prompt_constructor.py +476 -0
  92. edsl/{agents → invigilators}/prompt_helpers.py +2 -1
  93. edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
  94. edsl/{agents → invigilators}/question_option_processor.py +96 -21
  95. edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
  96. edsl/jobs/__init__.py +7 -1
  97. edsl/jobs/async_interview_runner.py +99 -35
  98. edsl/jobs/check_survey_scenario_compatibility.py +7 -5
  99. edsl/jobs/data_structures.py +153 -22
  100. edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
  101. edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
  102. edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
  103. edsl/jobs/{Jobs.py → jobs.py} +313 -167
  104. edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
  105. edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
  106. edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
  107. edsl/jobs/jobs_pricing_estimation.py +347 -0
  108. edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
  109. edsl/jobs/jobs_runner_asyncio.py +282 -0
  110. edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
  111. edsl/jobs/results_exceptions_handler.py +2 -2
  112. edsl/key_management/__init__.py +28 -0
  113. edsl/key_management/key_lookup.py +161 -0
  114. edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
  115. edsl/key_management/key_lookup_collection.py +82 -0
  116. edsl/key_management/models.py +218 -0
  117. edsl/language_models/__init__.py +7 -2
  118. edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
  119. edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
  120. edsl/language_models/language_model.py +1080 -0
  121. edsl/language_models/model.py +10 -25
  122. edsl/language_models/{ModelList.py → model_list.py} +9 -14
  123. edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
  124. edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
  125. edsl/language_models/repair.py +4 -4
  126. edsl/language_models/utilities.py +4 -4
  127. edsl/notebooks/__init__.py +3 -1
  128. edsl/notebooks/{Notebook.py → notebook.py} +7 -8
  129. edsl/prompts/__init__.py +1 -1
  130. edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
  131. edsl/prompts/{Prompt.py → prompt.py} +101 -95
  132. edsl/questions/HTMLQuestion.py +1 -1
  133. edsl/questions/__init__.py +154 -25
  134. edsl/questions/answer_validator_mixin.py +1 -1
  135. edsl/questions/compose_questions.py +4 -3
  136. edsl/questions/derived/question_likert_five.py +166 -0
  137. edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
  138. edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
  139. edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
  140. edsl/questions/descriptors.py +24 -30
  141. edsl/questions/loop_processor.py +65 -19
  142. edsl/questions/question_base.py +881 -0
  143. edsl/questions/question_base_gen_mixin.py +15 -16
  144. edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
  145. edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
  146. edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
  147. edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
  148. edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
  149. edsl/questions/question_free_text.py +282 -0
  150. edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
  151. edsl/questions/{QuestionList.py → question_list.py} +6 -7
  152. edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
  153. edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
  154. edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
  155. edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
  156. edsl/questions/question_registry.py +4 -9
  157. edsl/questions/register_questions_meta.py +8 -4
  158. edsl/questions/response_validator_abc.py +17 -16
  159. edsl/results/__init__.py +4 -1
  160. edsl/{exceptions/results.py → results/exceptions.py} +1 -1
  161. edsl/results/report.py +197 -0
  162. edsl/results/{Result.py → result.py} +131 -45
  163. edsl/results/{Results.py → results.py} +365 -220
  164. edsl/results/results_selector.py +344 -25
  165. edsl/scenarios/__init__.py +30 -3
  166. edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
  167. edsl/scenarios/directory_scanner.py +156 -13
  168. edsl/scenarios/document_chunker.py +186 -0
  169. edsl/scenarios/exceptions.py +101 -0
  170. edsl/scenarios/file_methods.py +2 -3
  171. edsl/scenarios/{FileStore.py → file_store.py} +275 -189
  172. edsl/scenarios/handlers/__init__.py +14 -14
  173. edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
  174. edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
  175. edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
  176. edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
  177. edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
  178. edsl/scenarios/handlers/latex_file_store.py +5 -0
  179. edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
  180. edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
  181. edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
  182. edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
  183. edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
  184. edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
  185. edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
  186. edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
  187. edsl/scenarios/scenario.py +928 -0
  188. edsl/scenarios/scenario_join.py +18 -5
  189. edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
  190. edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
  191. edsl/scenarios/scenario_selector.py +5 -1
  192. edsl/study/ObjectEntry.py +2 -2
  193. edsl/study/SnapShot.py +5 -5
  194. edsl/study/Study.py +18 -19
  195. edsl/study/__init__.py +6 -4
  196. edsl/surveys/__init__.py +7 -4
  197. edsl/surveys/dag/__init__.py +2 -0
  198. edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
  199. edsl/surveys/{DAG.py → dag/dag.py} +13 -10
  200. edsl/surveys/descriptors.py +1 -1
  201. edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
  202. edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
  203. edsl/surveys/memory/__init__.py +3 -0
  204. edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
  205. edsl/surveys/rules/__init__.py +3 -0
  206. edsl/surveys/{Rule.py → rules/rule.py} +103 -43
  207. edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
  208. edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
  209. edsl/surveys/survey.py +1743 -0
  210. edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
  211. edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
  212. edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
  213. edsl/tasks/__init__.py +32 -0
  214. edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
  215. edsl/tasks/task_creators.py +135 -0
  216. edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
  217. edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
  218. edsl/tasks/task_status_log.py +85 -0
  219. edsl/tokens/__init__.py +2 -0
  220. edsl/tokens/interview_token_usage.py +53 -0
  221. edsl/utilities/PrettyList.py +1 -1
  222. edsl/utilities/SystemInfo.py +25 -22
  223. edsl/utilities/__init__.py +29 -21
  224. edsl/utilities/gcp_bucket/__init__.py +2 -0
  225. edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
  226. edsl/utilities/interface.py +44 -536
  227. edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
  228. edsl/utilities/repair_functions.py +1 -1
  229. {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/METADATA +1 -1
  230. edsl-0.1.49.dist-info/RECORD +347 -0
  231. edsl/Base.py +0 -493
  232. edsl/BaseDiff.py +0 -260
  233. edsl/agents/InvigilatorBase.py +0 -260
  234. edsl/agents/PromptConstructor.py +0 -318
  235. edsl/coop/PriceFetcher.py +0 -54
  236. edsl/data/Cache.py +0 -582
  237. edsl/data/CacheEntry.py +0 -238
  238. edsl/data/SQLiteDict.py +0 -292
  239. edsl/data/__init__.py +0 -5
  240. edsl/data/orm.py +0 -10
  241. edsl/exceptions/cache.py +0 -5
  242. edsl/exceptions/coop.py +0 -14
  243. edsl/exceptions/data.py +0 -14
  244. edsl/exceptions/scenarios.py +0 -29
  245. edsl/jobs/Answers.py +0 -43
  246. edsl/jobs/JobsPrompts.py +0 -354
  247. edsl/jobs/buckets/BucketCollection.py +0 -134
  248. edsl/jobs/buckets/ModelBuckets.py +0 -65
  249. edsl/jobs/buckets/TokenBucket.py +0 -283
  250. edsl/jobs/buckets/TokenBucketClient.py +0 -191
  251. edsl/jobs/interviews/Interview.py +0 -395
  252. edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
  253. edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
  254. edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
  255. edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
  256. edsl/jobs/tasks/TaskCreators.py +0 -64
  257. edsl/jobs/tasks/TaskStatusLog.py +0 -23
  258. edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
  259. edsl/language_models/LanguageModel.py +0 -635
  260. edsl/language_models/ServiceDataSources.py +0 -0
  261. edsl/language_models/key_management/KeyLookup.py +0 -63
  262. edsl/language_models/key_management/KeyLookupCollection.py +0 -38
  263. edsl/language_models/key_management/models.py +0 -137
  264. edsl/questions/QuestionBase.py +0 -544
  265. edsl/questions/QuestionFreeText.py +0 -130
  266. edsl/questions/derived/QuestionLikertFive.py +0 -76
  267. edsl/results/ResultsExportMixin.py +0 -45
  268. edsl/results/TextEditor.py +0 -50
  269. edsl/results/results_fetch_mixin.py +0 -33
  270. edsl/results/results_tools_mixin.py +0 -98
  271. edsl/scenarios/DocumentChunker.py +0 -104
  272. edsl/scenarios/Scenario.py +0 -548
  273. edsl/scenarios/ScenarioHtmlMixin.py +0 -65
  274. edsl/scenarios/ScenarioListExportMixin.py +0 -45
  275. edsl/scenarios/handlers/latex.py +0 -5
  276. edsl/shared.py +0 -1
  277. edsl/surveys/Survey.py +0 -1301
  278. edsl/surveys/SurveyQualtricsImport.py +0 -284
  279. edsl/surveys/SurveyToApp.py +0 -141
  280. edsl/surveys/instructions/__init__.py +0 -0
  281. edsl/tools/__init__.py +0 -1
  282. edsl/tools/clusters.py +0 -192
  283. edsl/tools/embeddings.py +0 -27
  284. edsl/tools/embeddings_plotting.py +0 -118
  285. edsl/tools/plotting.py +0 -112
  286. edsl/tools/summarize.py +0 -18
  287. edsl/utilities/data/Registry.py +0 -6
  288. edsl/utilities/data/__init__.py +0 -1
  289. edsl/utilities/data/scooter_results.json +0 -1
  290. edsl-0.1.47.dist-info/RECORD +0 -354
  291. /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
  292. /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
  293. /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
  294. /edsl/{results → dataset/display}/table_data_class.py +0 -0
  295. /edsl/{results → dataset/display}/table_display.css +0 -0
  296. /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
  297. /edsl/{results → dataset}/tree_explore.py +0 -0
  298. /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
  299. /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
  300. /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
  301. /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
  302. /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
  303. /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
  304. /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
  305. /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
  306. /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
  307. /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
  308. /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
  309. /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
  310. /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
  311. /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
  312. /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
  313. {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/LICENSE +0 -0
  314. {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/WHEEL +0 -0
@@ -2,21 +2,61 @@ import base64
2
2
  import io
3
3
  import tempfile
4
4
  import mimetypes
5
+ import asyncio
5
6
  import os
6
7
  from typing import Dict, Any, IO, Optional
7
-
8
- from edsl.scenarios.Scenario import Scenario
9
- from edsl.utilities.remove_edsl_version import remove_edsl_version
10
-
11
- from edsl.scenarios.file_methods import FileMethods
12
8
  from typing import Union
13
9
  from uuid import UUID
14
10
  import time
15
11
  from typing import Dict, Any, IO, Optional, List, Union, Literal
16
12
 
17
-
13
+ from .scenario import Scenario
14
+ from ..utilities import remove_edsl_version
15
+ from .file_methods import FileMethods
18
16
 
19
17
  class FileStore(Scenario):
18
+ """
19
+ A specialized Scenario subclass for managing file content and metadata.
20
+
21
+ FileStore provides functionality for working with files in EDSL, handling various
22
+ file formats with appropriate encoding, storage, and access methods. It extends
23
+ Scenario to allow files to be included in surveys, questions, and other EDSL components.
24
+
25
+ FileStore supports multiple file formats including text, PDF, Word documents, images,
26
+ and more. It can load files from local paths or URLs, and provides methods for
27
+ accessing file content, extracting text, and managing file operations.
28
+
29
+ Key features:
30
+ - Base64 encoding for portability and serialization
31
+ - Lazy loading through temporary files when needed
32
+ - Automatic MIME type detection
33
+ - Text extraction from various file formats
34
+ - Format-specific operations through specialized handlers
35
+
36
+ Attributes:
37
+ _path (str): The original file path.
38
+ _temp_path (str): Path to any generated temporary file.
39
+ suffix (str): File extension.
40
+ binary (bool): Whether the file is binary.
41
+ mime_type (str): The file's MIME type.
42
+ base64_string (str): Base64-encoded file content.
43
+ external_locations (dict): Dictionary of external locations.
44
+ extracted_text (str): Text extracted from the file.
45
+
46
+ Examples:
47
+ >>> import tempfile
48
+ >>> # Create a text file
49
+ >>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
50
+ ... _ = f.write("Hello World")
51
+ ... _ = f.flush()
52
+ ... fs = FileStore(f.name)
53
+
54
+ # The following example works locally but is commented out for CI environments
55
+ # where dependencies like pandoc may not be available:
56
+ # >>> # FileStore supports various formats
57
+ # >>> formats = ["txt", "pdf", "docx", "pptx", "md", "py", "json", "csv", "html", "png", "db"]
58
+ # >>> _ = [FileStore.example(format) for format in formats]
59
+ """
20
60
  __documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
21
61
 
22
62
  def __init__(
@@ -30,6 +70,31 @@ class FileStore(Scenario):
30
70
  extracted_text: Optional[str] = None,
31
71
  **kwargs,
32
72
  ):
73
+ """
74
+ Initialize a new FileStore object.
75
+
76
+ This constructor creates a FileStore object from either a file path or a base64-encoded
77
+ string representation of file content. It handles automatic detection of file properties
78
+ like MIME type, extracts text content when possible, and manages file encoding.
79
+
80
+ Args:
81
+ path: Path to the file to load. Can be a local file path or URL.
82
+ mime_type: MIME type of the file. If not provided, will be auto-detected.
83
+ binary: Whether the file is binary. Defaults to False.
84
+ suffix: File extension. If not provided, will be extracted from the path.
85
+ base64_string: Base64-encoded file content. If provided, the file content
86
+ will be loaded from this string instead of the path.
87
+ external_locations: Dictionary mapping location names to URLs or paths where
88
+ the file can also be accessed.
89
+ extracted_text: Pre-extracted text content from the file. If not provided,
90
+ text will be extracted automatically if possible.
91
+ **kwargs: Additional keyword arguments. 'filename' can be used as an
92
+ alternative to 'path'.
93
+
94
+ Note:
95
+ If path is a URL (starts with http:// or https://), the file will be
96
+ downloaded automatically.
97
+ """
33
98
  if path is None and "filename" in kwargs:
34
99
  path = kwargs["filename"]
35
100
 
@@ -69,8 +134,32 @@ class FileStore(Scenario):
69
134
  @property
70
135
  def path(self) -> str:
71
136
  """
72
- Property that returns a valid path to the file content.
73
- If the original path doesn't exist, generates a temporary file from the base64 content.
137
+ Returns a valid path to the file content, creating a temporary file if needed.
138
+
139
+ This property ensures that a valid file path is always available for the file
140
+ content, even if the original file is no longer accessible or if the FileStore
141
+ was created from a base64 string without a path. If the original path doesn't
142
+ exist, it automatically generates a temporary file from the base64 content.
143
+
144
+ Returns:
145
+ A string containing a valid file path to access the file content.
146
+
147
+ Examples:
148
+ >>> import tempfile, os
149
+ >>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
150
+ ... _ = f.write("Hello World")
151
+ ... _ = f.flush()
152
+ ... fs = FileStore(f.name)
153
+ ... os.path.isfile(fs.path)
154
+ True
155
+
156
+
157
+ Notes:
158
+ - The path may point to a temporary file that will be cleaned up when the
159
+ Python process exits
160
+ - Accessing this property may create a new temporary file if needed
161
+ - This property provides a consistent interface regardless of how the
162
+ FileStore was created (from file or from base64 string)
74
163
  """
75
164
  # Check if original path exists and is accessible
76
165
  if self._path and os.path.isfile(self._path):
@@ -157,7 +246,7 @@ class FileStore(Scenario):
157
246
  Returns:
158
247
  ScenarioList containing FileStore objects with their corresponding URLs
159
248
  """
160
- from edsl import ScenarioList
249
+ from .scenario_list import ScenarioList
161
250
 
162
251
  try:
163
252
  # Try using get_event_loop first (works in regular Python)
@@ -222,10 +311,14 @@ class FileStore(Scenario):
222
311
 
223
312
  def _repr_html_(self):
224
313
  parent_html = super()._repr_html_()
225
- from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
314
+ from .construct_download_link import ConstructDownloadLink
226
315
 
227
316
  link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
228
317
  return f"{parent_html}<br>{link}"
318
+
319
+ def download_link(self):
320
+ from .construct_download_link import ConstructDownloadLink
321
+ return ConstructDownloadLink(self).html_create_link(self.path, style=None)
229
322
 
230
323
  def encode_file_to_base64_string(self, file_path: str):
231
324
  try:
@@ -340,10 +433,15 @@ class FileStore(Scenario):
340
433
  file_like_object = self.base64_to_text_file(self.base64_string)
341
434
 
342
435
  # Create a named temporary file
343
- mode = "wb" if self.binary else "w"
344
- temp_file = tempfile.NamedTemporaryFile(
345
- delete=False, suffix="." + suffix, mode=mode
346
- )
436
+ # We need different parameters for binary vs text mode
437
+ if self.binary:
438
+ temp_file = tempfile.NamedTemporaryFile(
439
+ delete=False, suffix="." + suffix, mode="wb"
440
+ )
441
+ else:
442
+ temp_file = tempfile.NamedTemporaryFile(
443
+ delete=False, suffix="." + suffix, encoding="utf-8", mode="w"
444
+ )
347
445
 
348
446
  if self.binary:
349
447
  temp_file.write(file_like_object.read())
@@ -449,7 +547,7 @@ class FileStore(Scenario):
449
547
  return cls(download_path, mime_type=mime_type)
450
548
 
451
549
  def create_link(self, custom_filename=None, style=None):
452
- from edsl.scenarios.ConstructDownloadLink import ConstructDownloadLink
550
+ from .construct_download_link import ConstructDownloadLink
453
551
 
454
552
  return ConstructDownloadLink(self).create_link(custom_filename, style)
455
553
 
@@ -486,184 +584,172 @@ class FileStore(Scenario):
486
584
  )
487
585
 
488
586
 
489
- class CSVFileStore(FileStore):
490
- @classmethod
491
- def example(cls):
492
- from edsl.results.Results import Results
493
-
494
- r = Results.example()
495
- import tempfile
496
-
497
- with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
498
- r.to_csv(filename=f.name)
499
-
500
- return cls(f.name)
501
-
502
- def view(self):
503
- import pandas as pd
504
-
505
- return pd.read_csv(self.to_tempfile())
506
-
507
-
508
- class PDFFileStore(FileStore):
509
- def view(self):
510
- pdf_path = self.to_tempfile()
511
- print(f"PDF path: {pdf_path}") # Print the path to ensure it exists
512
- import os
513
- import subprocess
514
-
515
- if os.path.exists(pdf_path):
516
- try:
517
- if os.name == "posix":
518
- # for cool kids
519
- subprocess.run(["open", pdf_path], check=True) # macOS
520
- elif os.name == "nt":
521
- os.startfile(pdf_path) # Windows
522
- else:
523
- subprocess.run(["xdg-open", pdf_path], check=True) # Linux
524
- except Exception as e:
525
- print(f"Error opening PDF: {e}")
526
- else:
527
- print("PDF file was not created successfully.")
528
-
529
- @classmethod
530
- def example(cls):
531
- import textwrap
532
-
533
- pdf_string = textwrap.dedent(
534
- """\
535
- %PDF-1.4
536
- 1 0 obj
537
- << /Type /Catalog /Pages 2 0 R >>
538
- endobj
539
- 2 0 obj
540
- << /Type /Pages /Kids [3 0 R] /Count 1 >>
541
- endobj
542
- 3 0 obj
543
- << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
544
- endobj
545
- 4 0 obj
546
- << /Length 44 >>
547
- stream
548
- BT
549
- /F1 24 Tf
550
- 100 700 Td
551
- (Hello, World!) Tj
552
- ET
553
- endstream
554
- endobj
555
- 5 0 obj
556
- << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
557
- endobj
558
- 6 0 obj
559
- << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
560
- endobj
561
- xref
562
- 0 7
563
- 0000000000 65535 f
564
- 0000000010 00000 n
565
- 0000000053 00000 n
566
- 0000000100 00000 n
567
- 0000000173 00000 n
568
- 0000000232 00000 n
569
- 0000000272 00000 n
570
- trailer
571
- << /Size 7 /Root 1 0 R >>
572
- startxref
573
- 318
574
- %%EOF"""
575
- )
576
- import tempfile
577
-
578
- with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
579
- f.write(pdf_string.encode())
580
-
581
- return cls(f.name)
582
-
583
-
584
- class PNGFileStore(FileStore):
585
- @classmethod
586
- def example(cls):
587
- import textwrap
588
-
589
- png_string = textwrap.dedent(
590
- """\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\x0cIDAT\x08\xd7c\x00\x01"""
591
- )
592
- import tempfile
593
-
594
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
595
- f.write(png_string.encode())
596
-
597
- return cls(f.name)
598
-
599
- def view(self):
600
- import matplotlib.pyplot as plt
601
- import matplotlib.image as mpimg
602
-
603
- img = mpimg.imread(self.to_tempfile())
604
- plt.imshow(img)
605
- plt.show()
606
-
607
-
608
- class SQLiteFileStore(FileStore):
609
- @classmethod
610
- def example(cls):
611
- import sqlite3
612
- import tempfile
613
-
614
- with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
615
- conn = sqlite3.connect(f.name)
616
- c = conn.cursor()
617
- c.execute("""CREATE TABLE stocks (date text)""")
618
- conn.commit()
619
-
620
- return cls(f.name)
621
-
622
- def view(self):
623
- import subprocess
624
- import os
625
-
626
- sqlite_path = self.to_tempfile()
627
- os.system(f"sqlite3 {sqlite_path}")
628
-
629
-
630
- class HTMLFileStore(FileStore):
631
- @classmethod
632
- def example(cls):
633
- import tempfile
634
-
635
- with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
636
- f.write("<html><body><h1>Test</h1></body></html>".encode())
637
-
638
- return cls(f.name)
639
-
640
- def view(self):
641
- import webbrowser
642
-
643
- html_path = self.to_tempfile()
644
- webbrowser.open("file://" + html_path)
587
+ # class CSVFileStore(FileStore):
588
+ # @classmethod
589
+ # def example(cls):
590
+ # from ..results import Results
591
+
592
+ # r = Results.example()
593
+ # import tempfile
594
+
595
+ # with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
596
+ # r.to_csv(filename=f.name)
597
+
598
+ # return cls(f.name)
599
+
600
+ # def view(self):
601
+ # import pandas as pd
602
+
603
+ # return pd.read_csv(self.to_tempfile())
604
+
605
+
606
+ # class PDFFileStore(FileStore):
607
+ # def view(self):
608
+ # pdf_path = self.to_tempfile()
609
+ # print(f"PDF path: {pdf_path}") # Print the path to ensure it exists
610
+ # import os
611
+ # import subprocess
612
+
613
+ # if os.path.exists(pdf_path):
614
+ # try:
615
+ # if os.name == "posix":
616
+ # # for cool kids
617
+ # subprocess.run(["open", pdf_path], check=True) # macOS
618
+ # elif os.name == "nt":
619
+ # os.startfile(pdf_path) # Windows
620
+ # else:
621
+ # subprocess.run(["xdg-open", pdf_path], check=True) # Linux
622
+ # except Exception as e:
623
+ # print(f"Error opening PDF: {e}")
624
+ # else:
625
+ # print("PDF file was not created successfully.")
626
+
627
+ # @classmethod
628
+ # def example(cls):
629
+ # import textwrap
630
+
631
+ # pdf_string = textwrap.dedent(
632
+ # """\
633
+ # %PDF-1.4
634
+ # 1 0 obj
635
+ # << /Type /Catalog /Pages 2 0 R >>
636
+ # endobj
637
+ # 2 0 obj
638
+ # << /Type /Pages /Kids [3 0 R] /Count 1 >>
639
+ # endobj
640
+ # 3 0 obj
641
+ # << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
642
+ # endobj
643
+ # 4 0 obj
644
+ # << /Length 44 >>
645
+ # stream
646
+ # BT
647
+ # /F1 24 Tf
648
+ # 100 700 Td
649
+ # (Hello, World!) Tj
650
+ # ET
651
+ # endstream
652
+ # endobj
653
+ # 5 0 obj
654
+ # << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
655
+ # endobj
656
+ # 6 0 obj
657
+ # << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
658
+ # endobj
659
+ # xref
660
+ # 0 7
661
+ # 0000000000 65535 f
662
+ # 0000000010 00000 n
663
+ # 0000000053 00000 n
664
+ # 0000000100 00000 n
665
+ # 0000000173 00000 n
666
+ # 0000000232 00000 n
667
+ # 0000000272 00000 n
668
+ # trailer
669
+ # << /Size 7 /Root 1 0 R >>
670
+ # startxref
671
+ # 318
672
+ # %%EOF"""
673
+ # )
674
+ # import tempfile
675
+
676
+ # with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
677
+ # f.write(pdf_string.encode())
678
+
679
+ # return cls(f.name)
680
+
681
+
682
+ # class PNGFileStore(FileStore):
683
+ # @classmethod
684
+ # def example(cls):
685
+ # import textwrap
686
+
687
+ # png_string = textwrap.dedent(
688
+ # """\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\x0cIDAT\x08\xd7c\x00\x01"""
689
+ # )
690
+ # import tempfile
691
+
692
+ # with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
693
+ # f.write(png_string.encode())
694
+
695
+ # return cls(f.name)
696
+
697
+ # def view(self):
698
+ # import matplotlib.pyplot as plt
699
+ # import matplotlib.image as mpimg
700
+
701
+ # img = mpimg.imread(self.to_tempfile())
702
+ # plt.imshow(img)
703
+ # plt.show()
704
+
705
+
706
+ # class SQLiteFileStore(FileStore):
707
+ # @classmethod
708
+ # def example(cls):
709
+ # import sqlite3
710
+ # import tempfile
711
+
712
+ # with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
713
+ # conn = sqlite3.connect(f.name)
714
+ # c = conn.cursor()
715
+ # c.execute("""CREATE TABLE stocks (date text)""")
716
+ # conn.commit()
717
+
718
+ # return cls(f.name)
719
+
720
+ # def view(self):
721
+ # import subprocess
722
+ # import os
723
+
724
+ # sqlite_path = self.to_tempfile()
725
+ # os.system(f"sqlite3 {sqlite_path}")
726
+
727
+
728
+ # class HTMLFileStore(FileStore):
729
+ # @classmethod
730
+ # def example(cls):
731
+ # import tempfile
732
+
733
+ # with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
734
+ # f.write("<html><body><h1>Test</h1></body></html>".encode())
735
+
736
+ # return cls(f.name)
737
+
738
+ # def view(self):
739
+ # import webbrowser
740
+
741
+ # html_path = self.to_tempfile()
742
+ # webbrowser.open("file://" + html_path)
645
743
 
646
744
 
647
745
  if __name__ == "__main__":
648
746
  import doctest
649
-
650
747
  doctest.testmod()
651
748
 
652
- # fs = FileStore.example("pdf")
653
- # fs.view()
654
-
655
- formats = FileMethods.supported_file_types()
656
- for file_type in formats:
657
- print("Now testinging", file_type)
658
- fs = FileStore.example(file_type)
659
- fs.view()
660
- input("Press Enter to continue...")
661
-
662
- # pdf_example.view()
663
- # FileStore(pdf_example).view()
664
-
665
- # pdf_methods = methods.get("pdf")
666
- # file = pdf_methods().example()
667
- # pdf_methods(file).view()
749
+ # formats = FileMethods.supported_file_types()
750
+ # for file_type in formats:
751
+ # print("Now testinging", file_type)
752
+ # fs = FileStore.example(file_type)
753
+ # fs.view()
754
+ # input("Press Enter to continue...")
668
755
 
669
- # print(FileMethods._handlers)
@@ -1,14 +1,14 @@
1
- from .pdf import PdfMethods
2
- from .docx import DocxMethods
3
- from .png import PngMethods
4
- from .txt import TxtMethods
5
- from .html import HtmlMethods
6
- from .md import MarkdownMethods
7
- from .csv import CsvMethods
8
- from .json import JsonMethods
9
- from .sql import SqlMethods
10
- from .pptx import PptxMethods
11
- from .latex import LaTeXMethods
12
- from .py import PyMethods
13
- from .sqlite import SQLiteMethods
14
- from .jpeg import JpegMethods
1
+ from .pdf_file_store import PdfMethods
2
+ from .docx_file_store import DocxMethods
3
+ from .png_file_store import PngMethods
4
+ from .txt_file_store import TxtMethods
5
+ from .html_file_store import HtmlMethods
6
+ from .md_file_store import MarkdownMethods
7
+ from .csv_file_store import CsvMethods
8
+ from .json_file_store import JsonMethods
9
+ from .sql_file_store import SqlMethods
10
+ from .pptx_file_store import PptxMethods
11
+ from .latex_file_store import LaTeXMethods
12
+ from .py_file_store import PyMethods
13
+ from .sqlite_file_store import SQLiteMethods
14
+ from .jpeg_file_store import JpegMethods
@@ -1,6 +1,5 @@
1
1
  import tempfile
2
- from edsl.scenarios.file_methods import FileMethods
3
-
2
+ from ..file_methods import FileMethods
4
3
 
5
4
  class CsvMethods(FileMethods):
6
5
  suffix = "csv"
@@ -1,7 +1,10 @@
1
- from edsl.scenarios.file_methods import FileMethods
2
1
  import os
3
2
  import tempfile
4
3
 
4
+ from ..file_methods import FileMethods
5
+ from ..scenario import Scenario
6
+ from ..scenario_list import ScenarioList
7
+ from ..file_store import FileStore
5
8
 
6
9
  class DocxMethods(FileMethods):
7
10
  suffix = "docx"
@@ -56,8 +59,8 @@ class DocxMethods(FileMethods):
56
59
 
57
60
  def example(self):
58
61
  from docx import Document
59
- from edsl.scenarios.Scenario import Scenario
60
- from edsl.scenarios.ScenarioList import ScenarioList
62
+ from ..scenario import Scenario
63
+ from ..scenario_list import ScenarioList
61
64
 
62
65
  os.makedirs("test_dir", exist_ok=True)
63
66
  doc1 = Document()
@@ -74,7 +77,5 @@ class DocxMethods(FileMethods):
74
77
 
75
78
 
76
79
  if __name__ == "__main__":
77
- docx_temp = DocxMethods.example()
78
- from edsl.scenarios.FileStore import FileStore
79
-
80
- fs = FileStore(docx_temp)
80
+ import doctest
81
+ doctest.testmod()
@@ -1,6 +1,5 @@
1
- from edsl.scenarios.file_methods import FileMethods
2
1
  import tempfile
3
-
2
+ from ..file_methods import FileMethods
4
3
 
5
4
  class HtmlMethods(FileMethods):
6
5
  suffix = "html"
@@ -1,5 +1,5 @@
1
1
  import tempfile
2
- from edsl.scenarios.file_methods import FileMethods
2
+ from ..file_methods import FileMethods
3
3
 
4
4
 
5
5
  class JpegMethods(FileMethods):
@@ -1,8 +1,8 @@
1
- from edsl.scenarios.file_methods import FileMethods
2
1
  import tempfile
3
2
  import json
4
3
  from typing import Optional, Dict, Any
5
4
 
5
+ from ..file_methods import FileMethods
6
6
 
7
7
  class JsonMethods(FileMethods):
8
8
  suffix = "json"
@@ -0,0 +1,5 @@
1
+ from ..handlers.txt_file_store import TxtMethods
2
+
3
+
4
+ class LaTeXMethods(TxtMethods):
5
+ pass
@@ -1,5 +1,5 @@
1
- from edsl.scenarios.file_methods import FileMethods
2
1
  import tempfile
2
+ from ..file_methods import FileMethods
3
3
 
4
4
 
5
5
  class MarkdownMethods(FileMethods):
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import base64
3
3
 
4
- from edsl.scenarios.file_methods import FileMethods
4
+ from ..file_methods import FileMethods
5
5
 
6
6
 
7
7
  class PdfMethods(FileMethods):
@@ -61,7 +61,7 @@ class PdfMethods(FileMethods):
61
61
  return
62
62
 
63
63
  def example(self):
64
- from edsl.results.Results import Results
64
+ from ...results import Results
65
65
 
66
66
  return (
67
67
  Results.example().select("answer.how_feeling").first().pdf().to_tempfile()