edsl 0.1.46__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +44 -39
- edsl/__version__.py +1 -1
- edsl/agents/__init__.py +4 -2
- edsl/agents/{Agent.py → agent.py} +442 -152
- edsl/agents/{AgentList.py → agent_list.py} +220 -162
- edsl/agents/descriptors.py +46 -7
- edsl/{exceptions/agents.py → agents/exceptions.py} +3 -12
- edsl/base/__init__.py +75 -0
- edsl/base/base_class.py +1303 -0
- edsl/base/data_transfer_models.py +114 -0
- edsl/base/enums.py +215 -0
- edsl/base.py +8 -0
- edsl/buckets/__init__.py +25 -0
- edsl/buckets/bucket_collection.py +324 -0
- edsl/buckets/model_buckets.py +206 -0
- edsl/buckets/token_bucket.py +502 -0
- edsl/{jobs/buckets/TokenBucketAPI.py → buckets/token_bucket_api.py} +1 -1
- edsl/buckets/token_bucket_client.py +509 -0
- edsl/caching/__init__.py +20 -0
- edsl/caching/cache.py +814 -0
- edsl/caching/cache_entry.py +427 -0
- edsl/{data/CacheHandler.py → caching/cache_handler.py} +14 -15
- edsl/caching/exceptions.py +24 -0
- edsl/caching/orm.py +30 -0
- edsl/{data/RemoteCacheSync.py → caching/remote_cache_sync.py} +3 -3
- edsl/caching/sql_dict.py +441 -0
- edsl/config/__init__.py +8 -0
- edsl/config/config_class.py +177 -0
- edsl/config.py +4 -176
- edsl/conversation/Conversation.py +7 -7
- edsl/conversation/car_buying.py +4 -4
- edsl/conversation/chips.py +6 -6
- edsl/coop/__init__.py +25 -2
- edsl/coop/coop.py +430 -113
- edsl/coop/{ExpectedParrotKeyHandler.py → ep_key_handling.py} +86 -10
- edsl/coop/exceptions.py +62 -0
- edsl/coop/price_fetcher.py +126 -0
- edsl/coop/utils.py +89 -24
- edsl/data_transfer_models.py +5 -72
- edsl/dataset/__init__.py +10 -0
- edsl/{results/Dataset.py → dataset/dataset.py} +116 -36
- edsl/dataset/dataset_operations_mixin.py +1492 -0
- edsl/{results/DatasetTree.py → dataset/dataset_tree.py} +156 -75
- edsl/{results/TableDisplay.py → dataset/display/table_display.py} +18 -7
- edsl/{results → dataset/display}/table_renderers.py +58 -2
- edsl/{results → dataset}/file_exports.py +4 -5
- edsl/{results → dataset}/smart_objects.py +2 -2
- edsl/enums.py +5 -205
- edsl/inference_services/__init__.py +5 -0
- edsl/inference_services/{AvailableModelCacheHandler.py → available_model_cache_handler.py} +2 -3
- edsl/inference_services/{AvailableModelFetcher.py → available_model_fetcher.py} +8 -14
- edsl/inference_services/data_structures.py +3 -2
- edsl/{exceptions/inference_services.py → inference_services/exceptions.py} +1 -1
- edsl/inference_services/{InferenceServiceABC.py → inference_service_abc.py} +1 -1
- edsl/inference_services/{InferenceServicesCollection.py → inference_services_collection.py} +8 -7
- edsl/inference_services/registry.py +4 -41
- edsl/inference_services/{ServiceAvailability.py → service_availability.py} +5 -25
- edsl/inference_services/services/__init__.py +31 -0
- edsl/inference_services/{AnthropicService.py → services/anthropic_service.py} +3 -3
- edsl/inference_services/{AwsBedrock.py → services/aws_bedrock.py} +2 -2
- edsl/inference_services/{AzureAI.py → services/azure_ai.py} +2 -2
- edsl/inference_services/{DeepInfraService.py → services/deep_infra_service.py} +1 -3
- edsl/inference_services/{DeepSeekService.py → services/deep_seek_service.py} +2 -4
- edsl/inference_services/{GoogleService.py → services/google_service.py} +5 -4
- edsl/inference_services/{GroqService.py → services/groq_service.py} +1 -1
- edsl/inference_services/{MistralAIService.py → services/mistral_ai_service.py} +3 -3
- edsl/inference_services/{OllamaService.py → services/ollama_service.py} +1 -7
- edsl/inference_services/{OpenAIService.py → services/open_ai_service.py} +5 -6
- edsl/inference_services/{PerplexityService.py → services/perplexity_service.py} +12 -12
- edsl/inference_services/{TestService.py → services/test_service.py} +7 -6
- edsl/inference_services/{TogetherAIService.py → services/together_ai_service.py} +2 -6
- edsl/inference_services/{XAIService.py → services/xai_service.py} +1 -1
- edsl/inference_services/write_available.py +1 -2
- edsl/instructions/__init__.py +6 -0
- edsl/{surveys/instructions/Instruction.py → instructions/instruction.py} +11 -6
- edsl/{surveys/instructions/InstructionCollection.py → instructions/instruction_collection.py} +10 -5
- edsl/{surveys/InstructionHandler.py → instructions/instruction_handler.py} +3 -3
- edsl/{jobs/interviews → interviews}/ReportErrors.py +2 -2
- edsl/interviews/__init__.py +4 -0
- edsl/{jobs/AnswerQuestionFunctionConstructor.py → interviews/answering_function.py} +45 -18
- edsl/{jobs/interviews/InterviewExceptionEntry.py → interviews/exception_tracking.py} +107 -22
- edsl/interviews/interview.py +638 -0
- edsl/{jobs/interviews/InterviewStatusDictionary.py → interviews/interview_status_dictionary.py} +21 -12
- edsl/{jobs/interviews/InterviewStatusLog.py → interviews/interview_status_log.py} +16 -7
- edsl/{jobs/InterviewTaskManager.py → interviews/interview_task_manager.py} +12 -7
- edsl/{jobs/RequestTokenEstimator.py → interviews/request_token_estimator.py} +8 -3
- edsl/{jobs/interviews/InterviewStatistic.py → interviews/statistics.py} +36 -10
- edsl/invigilators/__init__.py +38 -0
- edsl/invigilators/invigilator_base.py +477 -0
- edsl/{agents/Invigilator.py → invigilators/invigilators.py} +263 -10
- edsl/invigilators/prompt_constructor.py +476 -0
- edsl/{agents → invigilators}/prompt_helpers.py +2 -1
- edsl/{agents/QuestionInstructionPromptBuilder.py → invigilators/question_instructions_prompt_builder.py} +18 -13
- edsl/{agents → invigilators}/question_option_processor.py +96 -21
- edsl/{agents/QuestionTemplateReplacementsBuilder.py → invigilators/question_template_replacements_builder.py} +64 -12
- edsl/jobs/__init__.py +7 -1
- edsl/jobs/async_interview_runner.py +99 -35
- edsl/jobs/check_survey_scenario_compatibility.py +7 -5
- edsl/jobs/data_structures.py +153 -22
- edsl/{exceptions/jobs.py → jobs/exceptions.py} +2 -1
- edsl/jobs/{FetchInvigilator.py → fetch_invigilator.py} +4 -4
- edsl/jobs/{loggers/HTMLTableJobLogger.py → html_table_job_logger.py} +6 -2
- edsl/jobs/{Jobs.py → jobs.py} +321 -155
- edsl/jobs/{JobsChecks.py → jobs_checks.py} +15 -7
- edsl/jobs/{JobsComponentConstructor.py → jobs_component_constructor.py} +20 -17
- edsl/jobs/{InterviewsConstructor.py → jobs_interview_constructor.py} +10 -5
- edsl/jobs/jobs_pricing_estimation.py +347 -0
- edsl/jobs/{JobsRemoteInferenceLogger.py → jobs_remote_inference_logger.py} +4 -3
- edsl/jobs/jobs_runner_asyncio.py +282 -0
- edsl/jobs/{JobsRemoteInferenceHandler.py → remote_inference.py} +19 -22
- edsl/jobs/results_exceptions_handler.py +2 -2
- edsl/key_management/__init__.py +28 -0
- edsl/key_management/key_lookup.py +161 -0
- edsl/{language_models/key_management/KeyLookupBuilder.py → key_management/key_lookup_builder.py} +118 -47
- edsl/key_management/key_lookup_collection.py +82 -0
- edsl/key_management/models.py +218 -0
- edsl/language_models/__init__.py +7 -2
- edsl/language_models/{ComputeCost.py → compute_cost.py} +18 -3
- edsl/{exceptions/language_models.py → language_models/exceptions.py} +2 -1
- edsl/language_models/language_model.py +1080 -0
- edsl/language_models/model.py +10 -25
- edsl/language_models/{ModelList.py → model_list.py} +9 -14
- edsl/language_models/{RawResponseHandler.py → raw_response_handler.py} +1 -1
- edsl/language_models/{RegisterLanguageModelsMeta.py → registry.py} +1 -1
- edsl/language_models/repair.py +4 -4
- edsl/language_models/utilities.py +4 -4
- edsl/notebooks/__init__.py +3 -1
- edsl/notebooks/{Notebook.py → notebook.py} +7 -8
- edsl/prompts/__init__.py +1 -1
- edsl/{exceptions/prompts.py → prompts/exceptions.py} +3 -1
- edsl/prompts/{Prompt.py → prompt.py} +101 -95
- edsl/questions/HTMLQuestion.py +1 -1
- edsl/questions/__init__.py +154 -25
- edsl/questions/answer_validator_mixin.py +1 -1
- edsl/questions/compose_questions.py +4 -3
- edsl/questions/derived/question_likert_five.py +166 -0
- edsl/questions/derived/{QuestionLinearScale.py → question_linear_scale.py} +4 -4
- edsl/questions/derived/{QuestionTopK.py → question_top_k.py} +4 -4
- edsl/questions/derived/{QuestionYesNo.py → question_yes_no.py} +4 -5
- edsl/questions/descriptors.py +24 -30
- edsl/questions/loop_processor.py +65 -19
- edsl/questions/question_base.py +881 -0
- edsl/questions/question_base_gen_mixin.py +15 -16
- edsl/questions/{QuestionBasePromptsMixin.py → question_base_prompts_mixin.py} +2 -2
- edsl/questions/{QuestionBudget.py → question_budget.py} +3 -4
- edsl/questions/{QuestionCheckBox.py → question_check_box.py} +16 -16
- edsl/questions/{QuestionDict.py → question_dict.py} +39 -5
- edsl/questions/{QuestionExtract.py → question_extract.py} +9 -9
- edsl/questions/question_free_text.py +282 -0
- edsl/questions/{QuestionFunctional.py → question_functional.py} +6 -5
- edsl/questions/{QuestionList.py → question_list.py} +6 -7
- edsl/questions/{QuestionMatrix.py → question_matrix.py} +6 -5
- edsl/questions/{QuestionMultipleChoice.py → question_multiple_choice.py} +126 -21
- edsl/questions/{QuestionNumerical.py → question_numerical.py} +5 -5
- edsl/questions/{QuestionRank.py → question_rank.py} +6 -6
- edsl/questions/question_registry.py +10 -16
- edsl/questions/register_questions_meta.py +8 -4
- edsl/questions/response_validator_abc.py +17 -16
- edsl/results/__init__.py +4 -1
- edsl/{exceptions/results.py → results/exceptions.py} +1 -1
- edsl/results/report.py +197 -0
- edsl/results/{Result.py → result.py} +131 -45
- edsl/results/{Results.py → results.py} +420 -216
- edsl/results/results_selector.py +344 -25
- edsl/scenarios/__init__.py +30 -3
- edsl/scenarios/{ConstructDownloadLink.py → construct_download_link.py} +7 -0
- edsl/scenarios/directory_scanner.py +156 -13
- edsl/scenarios/document_chunker.py +186 -0
- edsl/scenarios/exceptions.py +101 -0
- edsl/scenarios/file_methods.py +2 -3
- edsl/scenarios/file_store.py +755 -0
- edsl/scenarios/handlers/__init__.py +14 -14
- edsl/scenarios/handlers/{csv.py → csv_file_store.py} +1 -2
- edsl/scenarios/handlers/{docx.py → docx_file_store.py} +8 -7
- edsl/scenarios/handlers/{html.py → html_file_store.py} +1 -2
- edsl/scenarios/handlers/{jpeg.py → jpeg_file_store.py} +1 -1
- edsl/scenarios/handlers/{json.py → json_file_store.py} +1 -1
- edsl/scenarios/handlers/latex_file_store.py +5 -0
- edsl/scenarios/handlers/{md.py → md_file_store.py} +1 -1
- edsl/scenarios/handlers/{pdf.py → pdf_file_store.py} +2 -2
- edsl/scenarios/handlers/{png.py → png_file_store.py} +1 -1
- edsl/scenarios/handlers/{pptx.py → pptx_file_store.py} +8 -7
- edsl/scenarios/handlers/{py.py → py_file_store.py} +1 -3
- edsl/scenarios/handlers/{sql.py → sql_file_store.py} +2 -1
- edsl/scenarios/handlers/{sqlite.py → sqlite_file_store.py} +2 -3
- edsl/scenarios/handlers/{txt.py → txt_file_store.py} +1 -1
- edsl/scenarios/scenario.py +928 -0
- edsl/scenarios/scenario_join.py +18 -5
- edsl/scenarios/{ScenarioList.py → scenario_list.py} +424 -106
- edsl/scenarios/{ScenarioListPdfMixin.py → scenario_list_pdf_tools.py} +16 -15
- edsl/scenarios/scenario_selector.py +5 -1
- edsl/study/ObjectEntry.py +2 -2
- edsl/study/SnapShot.py +5 -5
- edsl/study/Study.py +20 -21
- edsl/study/__init__.py +6 -4
- edsl/surveys/__init__.py +7 -4
- edsl/surveys/dag/__init__.py +2 -0
- edsl/surveys/{ConstructDAG.py → dag/construct_dag.py} +3 -3
- edsl/surveys/{DAG.py → dag/dag.py} +13 -10
- edsl/surveys/descriptors.py +1 -1
- edsl/surveys/{EditSurvey.py → edit_survey.py} +9 -9
- edsl/{exceptions/surveys.py → surveys/exceptions.py} +1 -2
- edsl/surveys/memory/__init__.py +3 -0
- edsl/surveys/{MemoryPlan.py → memory/memory_plan.py} +10 -9
- edsl/surveys/rules/__init__.py +3 -0
- edsl/surveys/{Rule.py → rules/rule.py} +103 -43
- edsl/surveys/{RuleCollection.py → rules/rule_collection.py} +21 -30
- edsl/surveys/{RuleManager.py → rules/rule_manager.py} +19 -13
- edsl/surveys/survey.py +1743 -0
- edsl/surveys/{SurveyExportMixin.py → survey_export.py} +22 -27
- edsl/surveys/{SurveyFlowVisualization.py → survey_flow_visualization.py} +11 -2
- edsl/surveys/{Simulator.py → survey_simulator.py} +10 -3
- edsl/tasks/__init__.py +32 -0
- edsl/{jobs/tasks/QuestionTaskCreator.py → tasks/question_task_creator.py} +115 -57
- edsl/tasks/task_creators.py +135 -0
- edsl/{jobs/tasks/TaskHistory.py → tasks/task_history.py} +86 -47
- edsl/{jobs/tasks → tasks}/task_status_enum.py +91 -7
- edsl/tasks/task_status_log.py +85 -0
- edsl/tokens/__init__.py +2 -0
- edsl/tokens/interview_token_usage.py +53 -0
- edsl/utilities/PrettyList.py +1 -1
- edsl/utilities/SystemInfo.py +25 -22
- edsl/utilities/__init__.py +29 -21
- edsl/utilities/gcp_bucket/__init__.py +2 -0
- edsl/utilities/gcp_bucket/cloud_storage.py +99 -96
- edsl/utilities/interface.py +44 -536
- edsl/{results/MarkdownToPDF.py → utilities/markdown_to_pdf.py} +13 -5
- edsl/utilities/repair_functions.py +1 -1
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/METADATA +3 -2
- edsl-0.1.48.dist-info/RECORD +347 -0
- edsl/Base.py +0 -426
- edsl/BaseDiff.py +0 -260
- edsl/agents/InvigilatorBase.py +0 -260
- edsl/agents/PromptConstructor.py +0 -318
- edsl/auto/AutoStudy.py +0 -130
- edsl/auto/StageBase.py +0 -243
- edsl/auto/StageGenerateSurvey.py +0 -178
- edsl/auto/StageLabelQuestions.py +0 -125
- edsl/auto/StagePersona.py +0 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
- edsl/auto/StagePersonaDimensionValues.py +0 -74
- edsl/auto/StagePersonaDimensions.py +0 -69
- edsl/auto/StageQuestions.py +0 -74
- edsl/auto/SurveyCreatorPipeline.py +0 -21
- edsl/auto/utilities.py +0 -218
- edsl/base/Base.py +0 -279
- edsl/coop/PriceFetcher.py +0 -54
- edsl/data/Cache.py +0 -580
- edsl/data/CacheEntry.py +0 -230
- edsl/data/SQLiteDict.py +0 -292
- edsl/data/__init__.py +0 -5
- edsl/data/orm.py +0 -10
- edsl/exceptions/cache.py +0 -5
- edsl/exceptions/coop.py +0 -14
- edsl/exceptions/data.py +0 -14
- edsl/exceptions/scenarios.py +0 -29
- edsl/jobs/Answers.py +0 -43
- edsl/jobs/JobsPrompts.py +0 -354
- edsl/jobs/buckets/BucketCollection.py +0 -134
- edsl/jobs/buckets/ModelBuckets.py +0 -65
- edsl/jobs/buckets/TokenBucket.py +0 -283
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/interviews/Interview.py +0 -395
- edsl/jobs/interviews/InterviewExceptionCollection.py +0 -99
- edsl/jobs/interviews/InterviewStatisticsCollection.py +0 -25
- edsl/jobs/runners/JobsRunnerAsyncio.py +0 -163
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -0
- edsl/jobs/tasks/TaskCreators.py +0 -64
- edsl/jobs/tasks/TaskStatusLog.py +0 -23
- edsl/jobs/tokens/InterviewTokenUsage.py +0 -27
- edsl/language_models/LanguageModel.py +0 -635
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/models.py +0 -137
- edsl/questions/QuestionBase.py +0 -539
- edsl/questions/QuestionFreeText.py +0 -130
- edsl/questions/derived/QuestionLikertFive.py +0 -76
- edsl/results/DatasetExportMixin.py +0 -911
- edsl/results/ResultsExportMixin.py +0 -45
- edsl/results/TextEditor.py +0 -50
- edsl/results/results_fetch_mixin.py +0 -33
- edsl/results/results_tools_mixin.py +0 -98
- edsl/scenarios/DocumentChunker.py +0 -104
- edsl/scenarios/FileStore.py +0 -564
- edsl/scenarios/Scenario.py +0 -548
- edsl/scenarios/ScenarioHtmlMixin.py +0 -65
- edsl/scenarios/ScenarioListExportMixin.py +0 -45
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/shared.py +0 -1
- edsl/surveys/Survey.py +0 -1306
- edsl/surveys/SurveyQualtricsImport.py +0 -284
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/surveys/instructions/__init__.py +0 -0
- edsl/tools/__init__.py +0 -1
- edsl/tools/clusters.py +0 -192
- edsl/tools/embeddings.py +0 -27
- edsl/tools/embeddings_plotting.py +0 -118
- edsl/tools/plotting.py +0 -112
- edsl/tools/summarize.py +0 -18
- edsl/utilities/data/Registry.py +0 -6
- edsl/utilities/data/__init__.py +0 -1
- edsl/utilities/data/scooter_results.json +0 -1
- edsl-0.1.46.dist-info/RECORD +0 -366
- /edsl/coop/{CoopFunctionsMixin.py → coop_functions.py} +0 -0
- /edsl/{results → dataset/display}/CSSParameterizer.py +0 -0
- /edsl/{language_models/key_management → dataset/display}/__init__.py +0 -0
- /edsl/{results → dataset/display}/table_data_class.py +0 -0
- /edsl/{results → dataset/display}/table_display.css +0 -0
- /edsl/{results/ResultsGGMixin.py → dataset/r/ggplot.py} +0 -0
- /edsl/{results → dataset}/tree_explore.py +0 -0
- /edsl/{surveys/instructions/ChangeInstruction.py → instructions/change_instruction.py} +0 -0
- /edsl/{jobs/interviews → interviews}/interview_status_enum.py +0 -0
- /edsl/jobs/{runners/JobsRunnerStatus.py → jobs_runner_status.py} +0 -0
- /edsl/language_models/{PriceManager.py → price_manager.py} +0 -0
- /edsl/language_models/{fake_openai_call.py → unused/fake_openai_call.py} +0 -0
- /edsl/language_models/{fake_openai_service.py → unused/fake_openai_service.py} +0 -0
- /edsl/notebooks/{NotebookToLaTeX.py → notebook_to_latex.py} +0 -0
- /edsl/{exceptions/questions.py → questions/exceptions.py} +0 -0
- /edsl/questions/{SimpleAskMixin.py → simple_ask_mixin.py} +0 -0
- /edsl/surveys/{Memory.py → memory/memory.py} +0 -0
- /edsl/surveys/{MemoryManagement.py → memory/memory_management.py} +0 -0
- /edsl/surveys/{SurveyCSS.py → survey_css.py} +0 -0
- /edsl/{jobs/tokens/TokenUsage.py → tokens/token_usage.py} +0 -0
- /edsl/{results/MarkdownToDocx.py → utilities/markdown_to_docx.py} +0 -0
- /edsl/{TemplateLoader.py → utilities/template_loader.py} +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/LICENSE +0 -0
- {edsl-0.1.46.dist-info → edsl-0.1.48.dist-info}/WHEEL +0 -0
@@ -0,0 +1,755 @@
|
|
1
|
+
import base64
|
2
|
+
import io
|
3
|
+
import tempfile
|
4
|
+
import mimetypes
|
5
|
+
import asyncio
|
6
|
+
import os
|
7
|
+
from typing import Dict, Any, IO, Optional
|
8
|
+
from typing import Union
|
9
|
+
from uuid import UUID
|
10
|
+
import time
|
11
|
+
from typing import Dict, Any, IO, Optional, List, Union, Literal
|
12
|
+
|
13
|
+
from .scenario import Scenario
|
14
|
+
from ..utilities import remove_edsl_version
|
15
|
+
from .file_methods import FileMethods
|
16
|
+
|
17
|
+
class FileStore(Scenario):
|
18
|
+
"""
|
19
|
+
A specialized Scenario subclass for managing file content and metadata.
|
20
|
+
|
21
|
+
FileStore provides functionality for working with files in EDSL, handling various
|
22
|
+
file formats with appropriate encoding, storage, and access methods. It extends
|
23
|
+
Scenario to allow files to be included in surveys, questions, and other EDSL components.
|
24
|
+
|
25
|
+
FileStore supports multiple file formats including text, PDF, Word documents, images,
|
26
|
+
and more. It can load files from local paths or URLs, and provides methods for
|
27
|
+
accessing file content, extracting text, and managing file operations.
|
28
|
+
|
29
|
+
Key features:
|
30
|
+
- Base64 encoding for portability and serialization
|
31
|
+
- Lazy loading through temporary files when needed
|
32
|
+
- Automatic MIME type detection
|
33
|
+
- Text extraction from various file formats
|
34
|
+
- Format-specific operations through specialized handlers
|
35
|
+
|
36
|
+
Attributes:
|
37
|
+
_path (str): The original file path.
|
38
|
+
_temp_path (str): Path to any generated temporary file.
|
39
|
+
suffix (str): File extension.
|
40
|
+
binary (bool): Whether the file is binary.
|
41
|
+
mime_type (str): The file's MIME type.
|
42
|
+
base64_string (str): Base64-encoded file content.
|
43
|
+
external_locations (dict): Dictionary of external locations.
|
44
|
+
extracted_text (str): Text extracted from the file.
|
45
|
+
|
46
|
+
Examples:
|
47
|
+
>>> import tempfile
|
48
|
+
>>> # Create a text file
|
49
|
+
>>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
|
50
|
+
... _ = f.write("Hello World")
|
51
|
+
... _ = f.flush()
|
52
|
+
... fs = FileStore(f.name)
|
53
|
+
|
54
|
+
# The following example works locally but is commented out for CI environments
|
55
|
+
# where dependencies like pandoc may not be available:
|
56
|
+
# >>> # FileStore supports various formats
|
57
|
+
# >>> formats = ["txt", "pdf", "docx", "pptx", "md", "py", "json", "csv", "html", "png", "db"]
|
58
|
+
# >>> _ = [FileStore.example(format) for format in formats]
|
59
|
+
"""
|
60
|
+
__documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
|
61
|
+
|
62
|
+
def __init__(
|
63
|
+
self,
|
64
|
+
path: Optional[str] = None,
|
65
|
+
mime_type: Optional[str] = None,
|
66
|
+
binary: Optional[bool] = None,
|
67
|
+
suffix: Optional[str] = None,
|
68
|
+
base64_string: Optional[str] = None,
|
69
|
+
external_locations: Optional[Dict[str, str]] = None,
|
70
|
+
extracted_text: Optional[str] = None,
|
71
|
+
**kwargs,
|
72
|
+
):
|
73
|
+
"""
|
74
|
+
Initialize a new FileStore object.
|
75
|
+
|
76
|
+
This constructor creates a FileStore object from either a file path or a base64-encoded
|
77
|
+
string representation of file content. It handles automatic detection of file properties
|
78
|
+
like MIME type, extracts text content when possible, and manages file encoding.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
path: Path to the file to load. Can be a local file path or URL.
|
82
|
+
mime_type: MIME type of the file. If not provided, will be auto-detected.
|
83
|
+
binary: Whether the file is binary. Defaults to False.
|
84
|
+
suffix: File extension. If not provided, will be extracted from the path.
|
85
|
+
base64_string: Base64-encoded file content. If provided, the file content
|
86
|
+
will be loaded from this string instead of the path.
|
87
|
+
external_locations: Dictionary mapping location names to URLs or paths where
|
88
|
+
the file can also be accessed.
|
89
|
+
extracted_text: Pre-extracted text content from the file. If not provided,
|
90
|
+
text will be extracted automatically if possible.
|
91
|
+
**kwargs: Additional keyword arguments. 'filename' can be used as an
|
92
|
+
alternative to 'path'.
|
93
|
+
|
94
|
+
Note:
|
95
|
+
If path is a URL (starts with http:// or https://), the file will be
|
96
|
+
downloaded automatically.
|
97
|
+
"""
|
98
|
+
if path is None and "filename" in kwargs:
|
99
|
+
path = kwargs["filename"]
|
100
|
+
|
101
|
+
# Check if path is a URL and handle download
|
102
|
+
if path and (path.startswith("http://") or path.startswith("https://")):
|
103
|
+
temp_filestore = self.from_url(path, mime_type=mime_type)
|
104
|
+
path = temp_filestore._path
|
105
|
+
mime_type = temp_filestore.mime_type
|
106
|
+
|
107
|
+
self._path = path # Store the original path privately
|
108
|
+
self._temp_path = None # Track any generated temporary file
|
109
|
+
|
110
|
+
self.suffix = suffix or path.split(".")[-1]
|
111
|
+
self.binary = binary or False
|
112
|
+
self.mime_type = (
|
113
|
+
mime_type or mimetypes.guess_type(path)[0] or "application/octet-stream"
|
114
|
+
)
|
115
|
+
self.base64_string = base64_string or self.encode_file_to_base64_string(path)
|
116
|
+
self.external_locations = external_locations or {}
|
117
|
+
|
118
|
+
self.extracted_text = (
|
119
|
+
self.extract_text() if extracted_text is None else extracted_text
|
120
|
+
)
|
121
|
+
|
122
|
+
super().__init__(
|
123
|
+
{
|
124
|
+
"path": path,
|
125
|
+
"base64_string": self.base64_string,
|
126
|
+
"binary": self.binary,
|
127
|
+
"suffix": self.suffix,
|
128
|
+
"mime_type": self.mime_type,
|
129
|
+
"external_locations": self.external_locations,
|
130
|
+
"extracted_text": self.extracted_text,
|
131
|
+
}
|
132
|
+
)
|
133
|
+
|
134
|
+
@property
|
135
|
+
def path(self) -> str:
|
136
|
+
"""
|
137
|
+
Returns a valid path to the file content, creating a temporary file if needed.
|
138
|
+
|
139
|
+
This property ensures that a valid file path is always available for the file
|
140
|
+
content, even if the original file is no longer accessible or if the FileStore
|
141
|
+
was created from a base64 string without a path. If the original path doesn't
|
142
|
+
exist, it automatically generates a temporary file from the base64 content.
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
A string containing a valid file path to access the file content.
|
146
|
+
|
147
|
+
Examples:
|
148
|
+
>>> import tempfile, os
|
149
|
+
>>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
|
150
|
+
... _ = f.write("Hello World")
|
151
|
+
... _ = f.flush()
|
152
|
+
... fs = FileStore(f.name)
|
153
|
+
... os.path.isfile(fs.path)
|
154
|
+
True
|
155
|
+
|
156
|
+
|
157
|
+
Notes:
|
158
|
+
- The path may point to a temporary file that will be cleaned up when the
|
159
|
+
Python process exits
|
160
|
+
- Accessing this property may create a new temporary file if needed
|
161
|
+
- This property provides a consistent interface regardless of how the
|
162
|
+
FileStore was created (from file or from base64 string)
|
163
|
+
"""
|
164
|
+
# Check if original path exists and is accessible
|
165
|
+
if self._path and os.path.isfile(self._path):
|
166
|
+
return self._path
|
167
|
+
|
168
|
+
# If we already have a valid temporary file, use it
|
169
|
+
if self._temp_path and os.path.isfile(self._temp_path):
|
170
|
+
return self._temp_path
|
171
|
+
|
172
|
+
# Generate a new temporary file from base64 content
|
173
|
+
self._temp_path = self.to_tempfile(self.suffix)
|
174
|
+
return self._temp_path
|
175
|
+
|
176
|
+
def __str__(self):
|
177
|
+
return "FileStore: self.path"
|
178
|
+
|
179
|
+
@classmethod
|
180
|
+
def example(cls, example_type="txt"):
|
181
|
+
file_methods_class = FileMethods.get_handler(example_type)
|
182
|
+
if file_methods_class:
|
183
|
+
return cls(file_methods_class().example())
|
184
|
+
else:
|
185
|
+
print(f"Example for {example_type} is not supported.")
|
186
|
+
|
187
|
+
@classmethod
|
188
|
+
async def _async_screenshot(
|
189
|
+
cls,
|
190
|
+
url: str,
|
191
|
+
full_page: bool = True,
|
192
|
+
wait_until: Literal[
|
193
|
+
"load", "domcontentloaded", "networkidle", "commit"
|
194
|
+
] = "networkidle",
|
195
|
+
download_path: Optional[str] = None,
|
196
|
+
) -> "FileStore":
|
197
|
+
"""Async version of screenshot functionality"""
|
198
|
+
try:
|
199
|
+
from playwright.async_api import async_playwright
|
200
|
+
except ImportError:
|
201
|
+
raise ImportError(
|
202
|
+
"Screenshot functionality requires additional dependencies.\n"
|
203
|
+
"Install them with: pip install 'edsl[screenshot]'"
|
204
|
+
)
|
205
|
+
|
206
|
+
if download_path is None:
|
207
|
+
download_path = os.path.join(
|
208
|
+
os.getcwd(), f"screenshot_{int(time.time())}.png"
|
209
|
+
)
|
210
|
+
|
211
|
+
async with async_playwright() as p:
|
212
|
+
browser = await p.chromium.launch()
|
213
|
+
page = await browser.new_page()
|
214
|
+
await page.goto(url, wait_until=wait_until)
|
215
|
+
await page.screenshot(path=download_path, full_page=full_page)
|
216
|
+
await browser.close()
|
217
|
+
|
218
|
+
return cls(download_path, mime_type="image/png")
|
219
|
+
|
220
|
+
@classmethod
|
221
|
+
def from_url_screenshot(cls, url: str, **kwargs) -> "FileStore":
|
222
|
+
"""Synchronous wrapper for screenshot functionality"""
|
223
|
+
import asyncio
|
224
|
+
|
225
|
+
try:
|
226
|
+
# Try using get_event_loop first (works in regular Python)
|
227
|
+
loop = asyncio.get_event_loop()
|
228
|
+
except RuntimeError:
|
229
|
+
# If we're in IPython/Jupyter, create a new loop
|
230
|
+
loop = asyncio.new_event_loop()
|
231
|
+
asyncio.set_event_loop(loop)
|
232
|
+
|
233
|
+
try:
|
234
|
+
return loop.run_until_complete(cls._async_screenshot(url, **kwargs))
|
235
|
+
finally:
|
236
|
+
if not loop.is_running():
|
237
|
+
loop.close()
|
238
|
+
|
239
|
+
@classmethod
|
240
|
+
def batch_screenshots(cls, urls: List[str], **kwargs) -> "ScenarioList":
|
241
|
+
"""
|
242
|
+
Take screenshots of multiple URLs concurrently.
|
243
|
+
Args:
|
244
|
+
urls: List of URLs to screenshot
|
245
|
+
**kwargs: Additional arguments passed to screenshot function (full_page, wait_until, etc.)
|
246
|
+
Returns:
|
247
|
+
ScenarioList containing FileStore objects with their corresponding URLs
|
248
|
+
"""
|
249
|
+
from .scenario_list import ScenarioList
|
250
|
+
|
251
|
+
try:
|
252
|
+
# Try using get_event_loop first (works in regular Python)
|
253
|
+
loop = asyncio.get_event_loop()
|
254
|
+
except RuntimeError:
|
255
|
+
# If we're in IPython/Jupyter, create a new loop
|
256
|
+
loop = asyncio.new_event_loop()
|
257
|
+
asyncio.set_event_loop(loop)
|
258
|
+
|
259
|
+
# Create tasks for all screenshots
|
260
|
+
tasks = [cls._async_screenshot(url, **kwargs) for url in urls]
|
261
|
+
|
262
|
+
try:
|
263
|
+
# Run all screenshots concurrently
|
264
|
+
results = loop.run_until_complete(
|
265
|
+
asyncio.gather(*tasks, return_exceptions=True)
|
266
|
+
)
|
267
|
+
|
268
|
+
# Filter out any errors and log them
|
269
|
+
successful_results = []
|
270
|
+
for url, result in zip(urls, results):
|
271
|
+
if isinstance(result, Exception):
|
272
|
+
print(f"Failed to screenshot {url}: {result}")
|
273
|
+
else:
|
274
|
+
successful_results.append(
|
275
|
+
Scenario({"url": url, "screenshot": result})
|
276
|
+
)
|
277
|
+
|
278
|
+
return ScenarioList(successful_results)
|
279
|
+
finally:
|
280
|
+
if not loop.is_running():
|
281
|
+
loop.close()
|
282
|
+
|
283
|
+
@property
|
284
|
+
def size(self) -> int:
|
285
|
+
if self.base64_string != None:
|
286
|
+
return (len(self.base64_string) / 4.0) * 3 # from base64 to char size
|
287
|
+
return os.path.getsize(self.path)
|
288
|
+
|
289
|
+
def upload_google(self, refresh: bool = False) -> None:
|
290
|
+
import google.generativeai as genai
|
291
|
+
|
292
|
+
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
293
|
+
google_info = genai.upload_file(self.path, mime_type=self.mime_type)
|
294
|
+
self.external_locations["google"] = google_info.to_dict()
|
295
|
+
|
296
|
+
@classmethod
|
297
|
+
@remove_edsl_version
|
298
|
+
def from_dict(cls, d):
|
299
|
+
# return cls(d["filename"], d["binary"], d["suffix"], d["base64_string"])
|
300
|
+
return cls(**d)
|
301
|
+
|
302
|
+
def __repr__(self):
|
303
|
+
import reprlib
|
304
|
+
|
305
|
+
r = reprlib.Repr()
|
306
|
+
r.maxstring = 20 # Limit strings to 20 chars
|
307
|
+
r.maxother = 30 # Limit other types to 30 chars
|
308
|
+
|
309
|
+
params = ", ".join(f"{key}={r.repr(value)}" for key, value in self.data.items())
|
310
|
+
return f"{self.__class__.__name__}({params})"
|
311
|
+
|
312
|
+
def _repr_html_(self):
|
313
|
+
parent_html = super()._repr_html_()
|
314
|
+
from .construct_download_link import ConstructDownloadLink
|
315
|
+
|
316
|
+
link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
|
317
|
+
return f"{parent_html}<br>{link}"
|
318
|
+
|
319
|
+
def download_link(self):
|
320
|
+
from .construct_download_link import ConstructDownloadLink
|
321
|
+
return ConstructDownloadLink(self).html_create_link(self.path, style=None)
|
322
|
+
|
323
|
+
def encode_file_to_base64_string(self, file_path: str):
|
324
|
+
try:
|
325
|
+
# Attempt to open the file in text mode
|
326
|
+
with open(file_path, "r") as text_file:
|
327
|
+
# Read the text data
|
328
|
+
text_data = text_file.read()
|
329
|
+
# Encode the text data to a base64 string
|
330
|
+
base64_encoded_data = base64.b64encode(text_data.encode("utf-8"))
|
331
|
+
except UnicodeDecodeError:
|
332
|
+
# If reading as text fails, open the file in binary mode
|
333
|
+
with open(file_path, "rb") as binary_file:
|
334
|
+
# Read the binary data
|
335
|
+
binary_data = binary_file.read()
|
336
|
+
# Encode the binary data to a base64 string
|
337
|
+
base64_encoded_data = base64.b64encode(binary_data)
|
338
|
+
self.binary = True
|
339
|
+
# Convert the base64 bytes to a string
|
340
|
+
except FileNotFoundError:
|
341
|
+
print(f"File not found: {file_path}")
|
342
|
+
print("Current working directory:", os.getcwd())
|
343
|
+
raise
|
344
|
+
base64_string = base64_encoded_data.decode("utf-8")
|
345
|
+
|
346
|
+
return base64_string
|
347
|
+
|
348
|
+
def open(self) -> "IO":
|
349
|
+
if self.binary:
|
350
|
+
return self.base64_to_file(self.base64_string, is_binary=True)
|
351
|
+
else:
|
352
|
+
return self.base64_to_text_file(self.base64_string)
|
353
|
+
|
354
|
+
def write(self, filename: Optional[str] = None) -> str:
|
355
|
+
"""
|
356
|
+
Write the file content to disk, either to a specified filename or a temporary file.
|
357
|
+
|
358
|
+
Args:
|
359
|
+
filename (Optional[str]): The destination filename. If None, creates a temporary file.
|
360
|
+
|
361
|
+
Returns:
|
362
|
+
str: The path to the written file.
|
363
|
+
"""
|
364
|
+
# Determine the mode based on binary flag
|
365
|
+
mode = "wb" if self.binary else "w"
|
366
|
+
|
367
|
+
# If no filename provided, create a temporary file
|
368
|
+
if filename is None:
|
369
|
+
from tempfile import NamedTemporaryFile
|
370
|
+
|
371
|
+
with NamedTemporaryFile(delete=False, suffix="." + self.suffix) as f:
|
372
|
+
filename = f.name
|
373
|
+
|
374
|
+
# Write the content using the appropriate mode
|
375
|
+
try:
|
376
|
+
with open(filename, mode) as f:
|
377
|
+
content = self.open().read()
|
378
|
+
# For text mode, ensure we're writing a string
|
379
|
+
if not self.binary and isinstance(content, bytes):
|
380
|
+
content = content.decode("utf-8")
|
381
|
+
f.write(content)
|
382
|
+
print(f"File written to {filename}")
|
383
|
+
except Exception as e:
|
384
|
+
print(f"Error writing file: {e}")
|
385
|
+
raise
|
386
|
+
|
387
|
+
# return filename
|
388
|
+
|
389
|
+
@staticmethod
|
390
|
+
def base64_to_text_file(base64_string) -> "IO":
|
391
|
+
# Decode the base64 string to bytes
|
392
|
+
text_data_bytes = base64.b64decode(base64_string)
|
393
|
+
|
394
|
+
# Convert bytes to string
|
395
|
+
text_data = text_data_bytes.decode("utf-8")
|
396
|
+
|
397
|
+
# Create a StringIO object from the text data
|
398
|
+
text_file = io.StringIO(text_data)
|
399
|
+
|
400
|
+
return text_file
|
401
|
+
|
402
|
+
@staticmethod
|
403
|
+
def base64_to_file(base64_string, is_binary=True):
|
404
|
+
# Decode the base64 string to bytes
|
405
|
+
file_data = base64.b64decode(base64_string)
|
406
|
+
|
407
|
+
if is_binary:
|
408
|
+
# Create a BytesIO object for binary data
|
409
|
+
return io.BytesIO(file_data)
|
410
|
+
else:
|
411
|
+
# Convert bytes to string for text data
|
412
|
+
text_data = file_data.decode("utf-8")
|
413
|
+
# Create a StringIO object for text data
|
414
|
+
return io.StringIO(text_data)
|
415
|
+
|
416
|
+
@property
|
417
|
+
def text(self):
|
418
|
+
if self.binary:
|
419
|
+
import warnings
|
420
|
+
|
421
|
+
warnings.warn("This is a binary file.")
|
422
|
+
else:
|
423
|
+
return self.base64_to_text_file(self.base64_string).read()
|
424
|
+
|
425
|
+
def to_tempfile(self, suffix=None):
|
426
|
+
if suffix is None:
|
427
|
+
suffix = self.suffix
|
428
|
+
if self.binary:
|
429
|
+
file_like_object = self.base64_to_file(
|
430
|
+
self["base64_string"], is_binary=True
|
431
|
+
)
|
432
|
+
else:
|
433
|
+
file_like_object = self.base64_to_text_file(self.base64_string)
|
434
|
+
|
435
|
+
# Create a named temporary file
|
436
|
+
# We need different parameters for binary vs text mode
|
437
|
+
if self.binary:
|
438
|
+
temp_file = tempfile.NamedTemporaryFile(
|
439
|
+
delete=False, suffix="." + suffix, mode="wb"
|
440
|
+
)
|
441
|
+
else:
|
442
|
+
temp_file = tempfile.NamedTemporaryFile(
|
443
|
+
delete=False, suffix="." + suffix, encoding="utf-8", mode="w"
|
444
|
+
)
|
445
|
+
|
446
|
+
if self.binary:
|
447
|
+
temp_file.write(file_like_object.read())
|
448
|
+
else:
|
449
|
+
temp_file.write(file_like_object.read())
|
450
|
+
|
451
|
+
temp_file.close()
|
452
|
+
|
453
|
+
return temp_file.name
|
454
|
+
|
455
|
+
def view(self) -> None:
|
456
|
+
handler = FileMethods.get_handler(self.suffix)
|
457
|
+
if handler:
|
458
|
+
handler(self.path).view()
|
459
|
+
else:
|
460
|
+
print(f"Viewing of {self.suffix} files is not supported.")
|
461
|
+
|
462
|
+
def extract_text(self) -> str:
|
463
|
+
handler = FileMethods.get_handler(self.suffix)
|
464
|
+
if handler and hasattr(handler, "extract_text"):
|
465
|
+
return handler(self.path).extract_text()
|
466
|
+
|
467
|
+
if not self.binary:
|
468
|
+
return self.text
|
469
|
+
|
470
|
+
return None
|
471
|
+
# raise TypeError("No text method found for this file type.")
|
472
|
+
|
473
|
+
def push(
|
474
|
+
self,
|
475
|
+
description: Optional[str] = None,
|
476
|
+
alias: Optional[str] = None,
|
477
|
+
visibility: Optional[str] = "unlisted",
|
478
|
+
expected_parrot_url: Optional[str] = None,
|
479
|
+
) -> dict:
|
480
|
+
"""
|
481
|
+
Push the object to Coop.
|
482
|
+
:param description: The description of the object to push.
|
483
|
+
:param visibility: The visibility of the object to push.
|
484
|
+
"""
|
485
|
+
scenario_version = Scenario.from_dict(self.to_dict())
|
486
|
+
|
487
|
+
if description is None:
|
488
|
+
description = "File: " + self.path
|
489
|
+
info = scenario_version.push(
|
490
|
+
description=description,
|
491
|
+
visibility=visibility,
|
492
|
+
expected_parrot_url=expected_parrot_url,
|
493
|
+
alias=alias,
|
494
|
+
)
|
495
|
+
return info
|
496
|
+
|
497
|
+
@classmethod
|
498
|
+
def pull(cls, url_or_uuid: Union[str, UUID]) -> "FileStore":
|
499
|
+
"""
|
500
|
+
Pull a FileStore object from Coop.
|
501
|
+
|
502
|
+
Args:
|
503
|
+
url_or_uuid: Either a UUID string or a URL pointing to the object
|
504
|
+
expected_parrot_url: Optional URL for the Parrot server
|
505
|
+
|
506
|
+
Returns:
|
507
|
+
FileStore: The pulled FileStore object
|
508
|
+
"""
|
509
|
+
scenario_version = Scenario.pull(url_or_uuid)
|
510
|
+
return cls.from_dict(scenario_version.to_dict())
|
511
|
+
|
512
|
+
@classmethod
|
513
|
+
def from_url(
|
514
|
+
cls,
|
515
|
+
url: str,
|
516
|
+
download_path: Optional[str] = None,
|
517
|
+
mime_type: Optional[str] = None,
|
518
|
+
) -> "FileStore":
|
519
|
+
"""
|
520
|
+
:param url: The URL of the file to download.
|
521
|
+
:param download_path: The path to save the downloaded file.
|
522
|
+
:param mime_type: The MIME type of the file. If None, it will be guessed from the file extension.
|
523
|
+
"""
|
524
|
+
import requests
|
525
|
+
from urllib.parse import urlparse
|
526
|
+
|
527
|
+
response = requests.get(url, stream=True)
|
528
|
+
response.raise_for_status() # Raises an HTTPError for bad responses
|
529
|
+
|
530
|
+
# Get the filename from the URL if download_path is not provided
|
531
|
+
if download_path is None:
|
532
|
+
filename = os.path.basename(urlparse(url).path)
|
533
|
+
if not filename:
|
534
|
+
filename = "downloaded_file"
|
535
|
+
# download_path = filename
|
536
|
+
download_path = os.path.join(os.getcwd(), filename)
|
537
|
+
|
538
|
+
# Ensure the directory exists
|
539
|
+
os.makedirs(os.path.dirname(download_path), exist_ok=True)
|
540
|
+
|
541
|
+
# Write the file
|
542
|
+
with open(download_path, "wb") as file:
|
543
|
+
for chunk in response.iter_content(chunk_size=8192):
|
544
|
+
file.write(chunk)
|
545
|
+
|
546
|
+
# Create and return a new File instance
|
547
|
+
return cls(download_path, mime_type=mime_type)
|
548
|
+
|
549
|
+
def create_link(self, custom_filename=None, style=None):
|
550
|
+
from .construct_download_link import ConstructDownloadLink
|
551
|
+
|
552
|
+
return ConstructDownloadLink(self).create_link(custom_filename, style)
|
553
|
+
|
554
|
+
def to_pandas(self):
|
555
|
+
"""
|
556
|
+
Convert the file content to a pandas DataFrame if supported by the file handler.
|
557
|
+
|
558
|
+
Returns:
|
559
|
+
pandas.DataFrame: The data from the file as a DataFrame
|
560
|
+
|
561
|
+
Raises:
|
562
|
+
AttributeError: If the file type's handler doesn't support pandas conversion
|
563
|
+
"""
|
564
|
+
handler = FileMethods.get_handler(self.suffix)
|
565
|
+
if handler and hasattr(handler, "to_pandas"):
|
566
|
+
return handler(self.path).to_pandas()
|
567
|
+
raise AttributeError(
|
568
|
+
f"Converting {self.suffix} files to pandas DataFrame is not supported"
|
569
|
+
)
|
570
|
+
|
571
|
+
def __getattr__(self, name):
|
572
|
+
"""
|
573
|
+
Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
|
574
|
+
"""
|
575
|
+
if self.suffix == "csv":
|
576
|
+
# Get the pandas DataFrame
|
577
|
+
df = self.to_pandas()
|
578
|
+
# Check if the requested attribute exists in the DataFrame
|
579
|
+
if hasattr(df, name):
|
580
|
+
return getattr(df, name)
|
581
|
+
# If not a CSV or attribute doesn't exist in DataFrame, raise AttributeError
|
582
|
+
raise AttributeError(
|
583
|
+
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
584
|
+
)
|
585
|
+
|
586
|
+
|
587
|
+
# class CSVFileStore(FileStore):
|
588
|
+
# @classmethod
|
589
|
+
# def example(cls):
|
590
|
+
# from ..results import Results
|
591
|
+
|
592
|
+
# r = Results.example()
|
593
|
+
# import tempfile
|
594
|
+
|
595
|
+
# with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f:
|
596
|
+
# r.to_csv(filename=f.name)
|
597
|
+
|
598
|
+
# return cls(f.name)
|
599
|
+
|
600
|
+
# def view(self):
|
601
|
+
# import pandas as pd
|
602
|
+
|
603
|
+
# return pd.read_csv(self.to_tempfile())
|
604
|
+
|
605
|
+
|
606
|
+
# class PDFFileStore(FileStore):
|
607
|
+
# def view(self):
|
608
|
+
# pdf_path = self.to_tempfile()
|
609
|
+
# print(f"PDF path: {pdf_path}") # Print the path to ensure it exists
|
610
|
+
# import os
|
611
|
+
# import subprocess
|
612
|
+
|
613
|
+
# if os.path.exists(pdf_path):
|
614
|
+
# try:
|
615
|
+
# if os.name == "posix":
|
616
|
+
# # for cool kids
|
617
|
+
# subprocess.run(["open", pdf_path], check=True) # macOS
|
618
|
+
# elif os.name == "nt":
|
619
|
+
# os.startfile(pdf_path) # Windows
|
620
|
+
# else:
|
621
|
+
# subprocess.run(["xdg-open", pdf_path], check=True) # Linux
|
622
|
+
# except Exception as e:
|
623
|
+
# print(f"Error opening PDF: {e}")
|
624
|
+
# else:
|
625
|
+
# print("PDF file was not created successfully.")
|
626
|
+
|
627
|
+
# @classmethod
|
628
|
+
# def example(cls):
|
629
|
+
# import textwrap
|
630
|
+
|
631
|
+
# pdf_string = textwrap.dedent(
|
632
|
+
# """\
|
633
|
+
# %PDF-1.4
|
634
|
+
# 1 0 obj
|
635
|
+
# << /Type /Catalog /Pages 2 0 R >>
|
636
|
+
# endobj
|
637
|
+
# 2 0 obj
|
638
|
+
# << /Type /Pages /Kids [3 0 R] /Count 1 >>
|
639
|
+
# endobj
|
640
|
+
# 3 0 obj
|
641
|
+
# << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
|
642
|
+
# endobj
|
643
|
+
# 4 0 obj
|
644
|
+
# << /Length 44 >>
|
645
|
+
# stream
|
646
|
+
# BT
|
647
|
+
# /F1 24 Tf
|
648
|
+
# 100 700 Td
|
649
|
+
# (Hello, World!) Tj
|
650
|
+
# ET
|
651
|
+
# endstream
|
652
|
+
# endobj
|
653
|
+
# 5 0 obj
|
654
|
+
# << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
655
|
+
# endobj
|
656
|
+
# 6 0 obj
|
657
|
+
# << /ProcSet [/PDF /Text] /Font << /F1 5 0 R >> >>
|
658
|
+
# endobj
|
659
|
+
# xref
|
660
|
+
# 0 7
|
661
|
+
# 0000000000 65535 f
|
662
|
+
# 0000000010 00000 n
|
663
|
+
# 0000000053 00000 n
|
664
|
+
# 0000000100 00000 n
|
665
|
+
# 0000000173 00000 n
|
666
|
+
# 0000000232 00000 n
|
667
|
+
# 0000000272 00000 n
|
668
|
+
# trailer
|
669
|
+
# << /Size 7 /Root 1 0 R >>
|
670
|
+
# startxref
|
671
|
+
# 318
|
672
|
+
# %%EOF"""
|
673
|
+
# )
|
674
|
+
# import tempfile
|
675
|
+
|
676
|
+
# with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
|
677
|
+
# f.write(pdf_string.encode())
|
678
|
+
|
679
|
+
# return cls(f.name)
|
680
|
+
|
681
|
+
|
682
|
+
# class PNGFileStore(FileStore):
|
683
|
+
# @classmethod
|
684
|
+
# def example(cls):
|
685
|
+
# import textwrap
|
686
|
+
|
687
|
+
# png_string = textwrap.dedent(
|
688
|
+
# """\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\x0cIDAT\x08\xd7c\x00\x01"""
|
689
|
+
# )
|
690
|
+
# import tempfile
|
691
|
+
|
692
|
+
# with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
|
693
|
+
# f.write(png_string.encode())
|
694
|
+
|
695
|
+
# return cls(f.name)
|
696
|
+
|
697
|
+
# def view(self):
|
698
|
+
# import matplotlib.pyplot as plt
|
699
|
+
# import matplotlib.image as mpimg
|
700
|
+
|
701
|
+
# img = mpimg.imread(self.to_tempfile())
|
702
|
+
# plt.imshow(img)
|
703
|
+
# plt.show()
|
704
|
+
|
705
|
+
|
706
|
+
# class SQLiteFileStore(FileStore):
|
707
|
+
# @classmethod
|
708
|
+
# def example(cls):
|
709
|
+
# import sqlite3
|
710
|
+
# import tempfile
|
711
|
+
|
712
|
+
# with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as f:
|
713
|
+
# conn = sqlite3.connect(f.name)
|
714
|
+
# c = conn.cursor()
|
715
|
+
# c.execute("""CREATE TABLE stocks (date text)""")
|
716
|
+
# conn.commit()
|
717
|
+
|
718
|
+
# return cls(f.name)
|
719
|
+
|
720
|
+
# def view(self):
|
721
|
+
# import subprocess
|
722
|
+
# import os
|
723
|
+
|
724
|
+
# sqlite_path = self.to_tempfile()
|
725
|
+
# os.system(f"sqlite3 {sqlite_path}")
|
726
|
+
|
727
|
+
|
728
|
+
# class HTMLFileStore(FileStore):
|
729
|
+
# @classmethod
|
730
|
+
# def example(cls):
|
731
|
+
# import tempfile
|
732
|
+
|
733
|
+
# with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as f:
|
734
|
+
# f.write("<html><body><h1>Test</h1></body></html>".encode())
|
735
|
+
|
736
|
+
# return cls(f.name)
|
737
|
+
|
738
|
+
# def view(self):
|
739
|
+
# import webbrowser
|
740
|
+
|
741
|
+
# html_path = self.to_tempfile()
|
742
|
+
# webbrowser.open("file://" + html_path)
|
743
|
+
|
744
|
+
|
745
|
+
if __name__ == "__main__":
|
746
|
+
import doctest
|
747
|
+
doctest.testmod()
|
748
|
+
|
749
|
+
# formats = FileMethods.supported_file_types()
|
750
|
+
# for file_type in formats:
|
751
|
+
# print("Now testinging", file_type)
|
752
|
+
# fs = FileStore.example(file_type)
|
753
|
+
# fs.view()
|
754
|
+
# input("Press Enter to continue...")
|
755
|
+
|