edsl 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +311 -75
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/{results/DatasetExportMixin.py → dataset/dataset_operations_mixin.py} +606 -122
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +3 -7
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +313 -167
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +19 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +4 -9
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +365 -220
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/{FileStore.py → file_store.py} +275 -189
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +294 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +18 -19
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/METADATA +1 -1
- edsl-0.1.49.dist-info/RECORD +347 -0
- edsl/Base.py +0 -493
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -582
- edsl/data/CacheEntry.py +0 -238
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -544
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1301
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.47.dist-info/RECORD +0 -354
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/LICENSE +0 -0
- {edsl-0.1.47.dist-info → edsl-0.1.49.dist-info}/WHEEL +0 -0
@@ -2,21 +2,61 @@ import base64
|
|
2
2
|
import io
|
3
3
|
import tempfile
|
4
4
|
import mimetypes
|
5
|
+
import asyncio
|
5
6
|
import os
|
6
7
|
from typing import Dict, Any, IO, Optional
|
7
|
-
|
8
|
-
from edsl.scenarios.Scenario import Scenario
|
9
|
-
from edsl.utilities.remove_edsl_version import remove_edsl_version
|
10
|
-
|
11
|
-
from edsl.scenarios.file_methods import FileMethods
|
12
8
|
from typing import Union
|
13
9
|
from uuid import UUID
|
14
10
|
import time
|
15
11
|
from typing import Dict, Any, IO, Optional, List, Union, Literal
|
16
12
|
|
17
|
-
|
13
|
+
from .scenario import Scenario
|
14
|
+
from ..utilities import remove_edsl_version
|
15
|
+
from .file_methods import FileMethods
|
18
16
|
|
19
17
|
class FileStore(Scenario):
|
18
|
+
"""
|
19
|
+
A specialized Scenario subclass for managing file content and metadata.
|
20
|
+
|
21
|
+
FileStore provides functionality for working with files in EDSL, handling various
|
22
|
+
file formats with appropriate encoding, storage, and access methods. It extends
|
23
|
+
Scenario to allow files to be included in surveys, questions, and other EDSL components.
|
24
|
+
|
25
|
+
FileStore supports multiple file formats including text, PDF, Word documents, images,
|
26
|
+
and more. It can load files from local paths or URLs, and provides methods for
|
27
|
+
accessing file content, extracting text, and managing file operations.
|
28
|
+
|
29
|
+
Key features:
|
30
|
+
- Base64 encoding for portability and serialization
|
31
|
+
- Lazy loading through temporary files when needed
|
32
|
+
- Automatic MIME type detection
|
33
|
+
- Text extraction from various file formats
|
34
|
+
- Format-specific operations through specialized handlers
|
35
|
+
|
36
|
+
Attributes:
|
37
|
+
_path (str): The original file path.
|
38
|
+
_temp_path (str): Path to any generated temporary file.
|
39
|
+
suffix (str): File extension.
|
40
|
+
binary (bool): Whether the file is binary.
|
41
|
+
mime_type (str): The file's MIME type.
|
42
|
+
base64_string (str): Base64-encoded file content.
|
43
|
+
external_locations (dict): Dictionary of external locations.
|
44
|
+
extracted_text (str): Text extracted from the file.
|
45
|
+
|
46
|
+
Examples:
|
47
|
+
>>> import tempfile
|
48
|
+
>>> # Create a text file
|
49
|
+
>>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
|
50
|
+
... _ = f.write("Hello World")
|
51
|
+
... _ = f.flush()
|
52
|
+
... fs = FileStore(f.name)
|
53
|
+
|
54
|
+
# The following example works locally but is commented out for CI environments
|
55
|
+
# where dependencies like pandoc may not be available:
|
56
|
+
# >>> # FileStore supports various formats
|
57
|
+
# >>> formats = ["txt", "pdf", "docx", "pptx", "md", "py", "json", "csv", "html", "png", "db"]
|
58
|
+
# >>> _ = [FileStore.example(format) for format in formats]
|
59
|
+
"""
|
20
60
|
__documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
|
21
61
|
|
22
62
|
def __init__(
|
@@ -30,6 +70,31 @@ class FileStore(Scenario):
|
|
30
70
|
extracted_text: Optional[str] = None,
|
31
71
|
**kwargs,
|
32
72
|
):
|
73
|
+
"""
|
74
|
+
Initialize a new FileStore object.
|
75
|
+
|
76
|
+
This constructor creates a FileStore object from either a file path or a base64-encoded
|
77
|
+
string representation of file content. It handles automatic detection of file properties
|
78
|
+
like MIME type, extracts text content when possible, and manages file encoding.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
path: Path to the file to load. Can be a local file path or URL.
|
82
|
+
mime_type: MIME type of the file. If not provided, will be auto-detected.
|
83
|
+
binary: Whether the file is binary. Defaults to False.
|
84
|
+
suffix: File extension. If not provided, will be extracted from the path.
|
85
|
+
base64_string: Base64-encoded file content. If provided, the file content
|
86
|
+
will be loaded from this string instead of the path.
|
87
|
+
external_locations: Dictionary mapping location names to URLs or paths where
|
88
|
+
the file can also be accessed.
|
89
|
+
extracted_text: Pre-extracted text content from the file. If not provided,
|
90
|
+
text will be extracted automatically if possible.
|
91
|
+
**kwargs: Additional keyword arguments. 'filename' can be used as an
|
92
|
+
alternative to 'path'.
|
93
|
+
|
94
|
+
Note:
|
95
|
+
If path is a URL (starts with http:// or https://), the file will be
|
96
|
+
downloaded automatically.
|
97
|
+
"""
|
33
98
|
if path is None and "filename" in kwargs:
|
34
99
|
path = kwargs["filename"]
|
35
100
|
|
@@ -69,8 +134,32 @@ class FileStore(Scenario):
|
|
69
134
|
@property
|
70
135
|
def path(self) -> str:
|
71
136
|
"""
|
72
|
-
|
73
|
-
|
137
|
+
Returns a valid path to the file content, creating a temporary file if needed.
|
138
|
+
|
139
|
+
This property ensures that a valid file path is always available for the file
|
140
|
+
content, even if the original file is no longer accessible or if the FileStore
|
141
|
+
was created from a base64 string without a path. If the original path doesn't
|
142
|
+
exist, it automatically generates a temporary file from the base64 content.
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
A string containing a valid file path to access the file content.
|
146
|
+
|
147
|
+
Examples:
|
148
|
+
>>> import tempfile, os
|
149
|
+
>>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
|
150
|
+
... _ = f.write("Hello World")
|
151
|
+
... _ = f.flush()
|
152
|
+
... fs = FileStore(f.name)
|
153
|
+
... os.path.isfile(fs.path)
|
154
|
+
True
|
155
|
+
|
156
|
+
|
157
|
+
Notes:
|
158
|
+
- The path may point to a temporary file that will be cleaned up when the
|
159
|
+
Python process exits
|
160
|
+
- Accessing this property may create a new temporary file if needed
|
161
|
+
- This property provides a consistent interface regardless of how the
|
162
|
+
FileStore was created (from file or from base64 string)
|
74
163
|
"""
|
75
164
|
# Check if original path exists and is accessible
|
76
165
|
if self._path and os.path.isfile(self._path):
|
@@ -157,7 +246,7 @@ class FileStore(Scenario):
|
|
157
246
|
Returns:
|
158
247
|
ScenarioList containing FileStore objects with their corresponding URLs
|
159
248
|
"""
|
160
|
-
from
|
249
|
+
from .scenario_list import ScenarioList
|
161
250
|
|
162
251
|
try:
|
163
252
|
# Try using get_event_loop first (works in regular Python)
|
@@ -222,10 +311,14 @@ class FileStore(Scenario):
|
|
222
311
|
|
223
312
|
def _repr_html_(self):
|
224
313
|
parent_html = super()._repr_html_()
|
225
|
-
from
|
314
|
+
from .construct_download_link import ConstructDownloadLink
|
226
315
|
|
227
316
|
link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
|
228
317
|
return f"{parent_html}<br>{link}"
|
318
|
+
|
319
|
+
def download_link(self):
|
320
|
+
from .construct_download_link import ConstructDownloadLink
|
321
|
+
return ConstructDownloadLink(self).html_create_link(self.path, style=None)
|
229
322
|
|
230
323
|
def encode_file_to_base64_string(self, file_path: str):
|
231
324
|
try:
|
@@ -340,10 +433,15 @@ class FileStore(Scenario):
|
|
340
433
|
file_like_object = self.base64_to_text_file(self.base64_string)
|
341
434
|
|
342
435
|
# Create a named temporary file
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
436
|
+
# We need different parameters for binary vs text mode
|
437
|
+
if self.binary:
|
438
|
+
temp_file = tempfile.NamedTemporaryFile(
|
439
|
+
delete=False, suffix="." + suffix, mode="wb"
|
440
|
+
)
|
441
|
+
else:
|
442
|
+
temp_file = tempfile.NamedTemporaryFile(
|
443
|
+
delete=False, suffix="." + suffix, encoding="utf-8", mode="w"
|
444
|
+
)
|
347
445
|
|
348
446
|
if self.binary:
|
349
447
|
temp_file.write(file_like_object.read())
|
@@ -449,7 +547,7 @@ class FileStore(Scenario):
|
|
449
547
|
return cls(download_path, mime_type=mime_type)
|
450
548
|
|
451
549
|
def create_link(self, custom_filename=None, style=None):
|
452
|
-
from
|
550
|
+
from .construct_download_link import ConstructDownloadLink
|
453
551
|
|
454
552
|
return ConstructDownloadLink(self).create_link(custom_filename, style)
|
455
553
|
|
@@ -486,184 +584,172 @@ class FileStore(Scenario):
|
|
486
584
|
)
|
487
585
|
|
488
586
|
|
489
|
-
class CSVFileStore(FileStore):
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
class PDFFileStore(FileStore):
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
class PNGFileStore(FileStore):
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
class SQLiteFileStore(FileStore):
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
class HTMLFileStore(FileStore):
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
587
|
+
# class CSVFileStore(FileStore):
|
588
|
+
# @classmethod
|
589
|
+
# def example(cls):
|
590
|
+
# from ..results import Results
|
591
|
+
|
592
|
+
# r = Results.example()
|
593
|
+
# import tempfile
|
594
|
+
|
595
|
+
# with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
|
596
|
+
# r.to_csv(filename=f.name)
|
597
|
+
|
598
|
+
# return cls(f.name)
|
599
|
+
|
600
|
+
# def view(self):
|
601
|
+
# import pandas as pd
|
602
|
+
|
603
|
+
# return pd.read_csv(self.to_tempfile())
|
604
|
+
|
605
|
+
|
606
|
+
# class PDFFileStore(FileStore):
|
607
|
+
# def view(self):
|
608
|
+
# pdf_path = self.to_tempfile()
|
609
|
+
# print(f"PDF path: {pdf_path}") # Print the path to ensure it exists
|
610
|
+
# import os
|
611
|
+
# import subprocess
|
612
|
+
|
613
|
+
# if os.path.exists(pdf_path):
|
614
|
+
# try:
|
615
|
+
# if os.name == "posix":
|
616
|
+
# # for cool kids
|
617
|
+
# subprocess.run(["open", pdf_path], check=True) # macOS
|
618
|
+
# elif os.name == "nt":
|
619
|
+
# os.startfile(pdf_path) # Windows
|
620
|
+
# else:
|
621
|
+
# subprocess.run(["xdg-open", pdf_path], check=True) # Linux
|
622
|
+
# except Exception as e:
|
623
|
+
# print(f"Error opening PDF: {e}")
|
624
|
+
# else:
|
625
|
+
# print("PDF file was not created successfully.")
|
626
|
+
|
627
|
+
# @classmethod
|
628
|
+
# def example(cls):
|
629
|
+
# import textwrap
|
630
|
+
|
631
|
+
# pdf_string = textwrap.dedent(
|
632
|
+
# """\
|
633
|
+
# %PDF-1.4
|
634
|
+
# 1 0 obj
|
635
|
+
# << /Type /Catalog /Pages 2 0 R >>
|
636
|
+
# endobj
|
637
|
+
# 2 0 obj
|
638
|
+
# << /Type /Pages /Kids [3 0 R] /Count 1 >>
|
639
|
+
# endobj
|
640
|
+
# 3 0 obj
|
641
|
+
# << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
|
642
|
+
# endobj
|
643
|
+
# 4 0 obj
|
644
|
+
# << /Length 44 >>
|
645
|
+
# stream
|
646
|
+
# BT
|
647
|
+
# /F1 24 Tf
|
648
|
+
# 100 700 Td
|
649
|
+
# (Hello, World!) Tj
|
650
|
+
# ET
|
651
|
+
# endstream
|
652
|
+
# endobj
|
653
|
+
# 5 0 obj
|
654
|
+
# << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
655
|
+
# endobj
|
656
|
+
# 6 0 obj
|
657
|
+
# << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
|
658
|
+
# endobj
|
659
|
+
# xref
|
660
|
+
# 0 7
|
661
|
+
# 0000000000 65535 f
|
662
|
+
# 0000000010 00000 n
|
663
|
+
# 0000000053 00000 n
|
664
|
+
# 0000000100 00000 n
|
665
|
+
# 0000000173 00000 n
|
666
|
+
# 0000000232 00000 n
|
667
|
+
# 0000000272 00000 n
|
668
|
+
# trailer
|
669
|
+
# << /Size 7 /Root 1 0 R >>
|
670
|
+
# startxref
|
671
|
+
# 318
|
672
|
+
# %%EOF"""
|
673
|
+
# )
|
674
|
+
# import tempfile
|
675
|
+
|
676
|
+
# with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
|
677
|
+
# f.write(pdf_string.encode())
|
678
|
+
|
679
|
+
# return cls(f.name)
|
680
|
+
|
681
|
+
|
682
|
+
# class PNGFileStore(FileStore):
|
683
|
+
# @classmethod
|
684
|
+
# def example(cls):
|
685
|
+
# import textwrap
|
686
|
+
|
687
|
+
# png_string = textwrap.dedent(
|
688
|
+
# """\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\x0cIDAT\x08\xd7c\x00\x01"""
|
689
|
+
# )
|
690
|
+
# import tempfile
|
691
|
+
|
692
|
+
# with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
|
693
|
+
# f.write(png_string.encode())
|
694
|
+
|
695
|
+
# return cls(f.name)
|
696
|
+
|
697
|
+
# def view(self):
|
698
|
+
# import matplotlib.pyplot as plt
|
699
|
+
# import matplotlib.image as mpimg
|
700
|
+
|
701
|
+
# img = mpimg.imread(self.to_tempfile())
|
702
|
+
# plt.imshow(img)
|
703
|
+
# plt.show()
|
704
|
+
|
705
|
+
|
706
|
+
# class SQLiteFileStore(FileStore):
|
707
|
+
# @classmethod
|
708
|
+
# def example(cls):
|
709
|
+
# import sqlite3
|
710
|
+
# import tempfile
|
711
|
+
|
712
|
+
# with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
|
713
|
+
# conn = sqlite3.connect(f.name)
|
714
|
+
# c = conn.cursor()
|
715
|
+
# c.execute("""CREATE TABLE stocks (date text)""")
|
716
|
+
# conn.commit()
|
717
|
+
|
718
|
+
# return cls(f.name)
|
719
|
+
|
720
|
+
# def view(self):
|
721
|
+
# import subprocess
|
722
|
+
# import os
|
723
|
+
|
724
|
+
# sqlite_path = self.to_tempfile()
|
725
|
+
# os.system(f"sqlite3 {sqlite_path}")
|
726
|
+
|
727
|
+
|
728
|
+
# class HTMLFileStore(FileStore):
|
729
|
+
# @classmethod
|
730
|
+
# def example(cls):
|
731
|
+
# import tempfile
|
732
|
+
|
733
|
+
# with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
|
734
|
+
# f.write("<html><body><h1>Test</h1></body></html>".encode())
|
735
|
+
|
736
|
+
# return cls(f.name)
|
737
|
+
|
738
|
+
# def view(self):
|
739
|
+
# import webbrowser
|
740
|
+
|
741
|
+
# html_path = self.to_tempfile()
|
742
|
+
# webbrowser.open("file://" + html_path)
|
645
743
|
|
646
744
|
|
647
745
|
if __name__ == "__main__":
|
648
746
|
import doctest
|
649
|
-
|
650
747
|
doctest.testmod()
|
651
748
|
|
652
|
-
#
|
653
|
-
#
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
fs = FileStore.example(file_type)
|
659
|
-
fs.view()
|
660
|
-
input("Press Enter to continue...")
|
661
|
-
|
662
|
-
# pdf_example.view()
|
663
|
-
# FileStore(pdf_example).view()
|
664
|
-
|
665
|
-
# pdf_methods = methods.get("pdf")
|
666
|
-
# file = pdf_methods().example()
|
667
|
-
# pdf_methods(file).view()
|
749
|
+
# formats = FileMethods.supported_file_types()
|
750
|
+
# for file_type in formats:
|
751
|
+
# print("Now testinging", file_type)
|
752
|
+
# fs = FileStore.example(file_type)
|
753
|
+
# fs.view()
|
754
|
+
# input("Press Enter to continue...")
|
668
755
|
|
669
|
-
# print(FileMethods._handlers)
|
@@ -1,14 +1,14 @@
|
|
1
|
-
from .
|
2
|
-
from .
|
3
|
-
from .
|
4
|
-
from .
|
5
|
-
from .
|
6
|
-
from .
|
7
|
-
from .
|
8
|
-
from .
|
9
|
-
from .
|
10
|
-
from .
|
11
|
-
from .
|
12
|
-
from .
|
13
|
-
from .
|
14
|
-
from .
|
1
|
+
from .pdf_file_store import PdfMethods
|
2
|
+
from .docx_file_store import DocxMethods
|
3
|
+
from .png_file_store import PngMethods
|
4
|
+
from .txt_file_store import TxtMethods
|
5
|
+
from .html_file_store import HtmlMethods
|
6
|
+
from .md_file_store import MarkdownMethods
|
7
|
+
from .csv_file_store import CsvMethods
|
8
|
+
from .json_file_store import JsonMethods
|
9
|
+
from .sql_file_store import SqlMethods
|
10
|
+
from .pptx_file_store import PptxMethods
|
11
|
+
from .latex_file_store import LaTeXMethods
|
12
|
+
from .py_file_store import PyMethods
|
13
|
+
from .sqlite_file_store import SQLiteMethods
|
14
|
+
from .jpeg_file_store import JpegMethods
|
@@ -1,7 +1,10 @@
|
|
1
|
-
from edsl.scenarios.file_methods import FileMethods
|
2
1
|
import os
|
3
2
|
import tempfile
|
4
3
|
|
4
|
+
from ..file_methods import FileMethods
|
5
|
+
from ..scenario import Scenario
|
6
|
+
from ..scenario_list import ScenarioList
|
7
|
+
from ..file_store import FileStore
|
5
8
|
|
6
9
|
class DocxMethods(FileMethods):
|
7
10
|
suffix = "docx"
|
@@ -56,8 +59,8 @@ class DocxMethods(FileMethods):
|
|
56
59
|
|
57
60
|
def example(self):
|
58
61
|
from docx import Document
|
59
|
-
from
|
60
|
-
from
|
62
|
+
from ..scenario import Scenario
|
63
|
+
from ..scenario_list import ScenarioList
|
61
64
|
|
62
65
|
os.makedirs("test_dir", exist_ok=True)
|
63
66
|
doc1 = Document()
|
@@ -74,7 +77,5 @@ class DocxMethods(FileMethods):
|
|
74
77
|
|
75
78
|
|
76
79
|
if __name__ == "__main__":
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
fs = FileStore(docx_temp)
|
80
|
+
import doctest
|
81
|
+
doctest.testmod()
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import os
|
2
2
|
import base64
|
3
3
|
|
4
|
-
from
|
4
|
+
from ..file_methods import FileMethods
|
5
5
|
|
6
6
|
|
7
7
|
class PdfMethods(FileMethods):
|
@@ -61,7 +61,7 @@ class PdfMethods(FileMethods):
|
|
61
61
|
return
|
62
62
|
|
63
63
|
def example(self):
|
64
|
-
from
|
64
|
+
from ...results import Results
|
65
65
|
|
66
66
|
return (
|
67
67
|
Results.example().select("answer.how_feeling").first().pdf().to_tempfile()
|