edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +413 -332
- edsl/BaseDiff.py +260 -260
- edsl/TemplateLoader.py +24 -24
- edsl/__init__.py +57 -49
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +1071 -867
- edsl/agents/AgentList.py +551 -413
- edsl/agents/Invigilator.py +284 -233
- edsl/agents/InvigilatorBase.py +257 -270
- edsl/agents/PromptConstructor.py +272 -354
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +2 -3
- edsl/agents/descriptors.py +99 -99
- edsl/agents/prompt_helpers.py +129 -129
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +130 -117
- edsl/auto/StageBase.py +243 -230
- edsl/auto/StageGenerateSurvey.py +178 -178
- edsl/auto/StageLabelQuestions.py +125 -125
- edsl/auto/StagePersona.py +61 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
- edsl/auto/StagePersonaDimensionValues.py +74 -74
- edsl/auto/StagePersonaDimensions.py +69 -69
- edsl/auto/StageQuestions.py +74 -73
- edsl/auto/SurveyCreatorPipeline.py +21 -21
- edsl/auto/utilities.py +218 -224
- edsl/base/Base.py +279 -279
- edsl/config.py +177 -157
- edsl/conversation/Conversation.py +290 -290
- edsl/conversation/car_buying.py +59 -58
- edsl/conversation/chips.py +95 -95
- edsl/conversation/mug_negotiation.py +81 -81
- edsl/conversation/next_speaker_utilities.py +93 -93
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +54 -54
- edsl/coop/__init__.py +2 -2
- edsl/coop/coop.py +1106 -1028
- edsl/coop/utils.py +131 -131
- edsl/data/Cache.py +573 -555
- edsl/data/CacheEntry.py +230 -233
- edsl/data/CacheHandler.py +168 -149
- edsl/data/RemoteCacheSync.py +186 -78
- edsl/data/SQLiteDict.py +292 -292
- edsl/data/__init__.py +5 -4
- edsl/data/orm.py +10 -10
- edsl/data_transfer_models.py +74 -73
- edsl/enums.py +202 -175
- edsl/exceptions/BaseException.py +21 -21
- edsl/exceptions/__init__.py +54 -54
- edsl/exceptions/agents.py +54 -42
- edsl/exceptions/cache.py +5 -5
- edsl/exceptions/configuration.py +16 -16
- edsl/exceptions/coop.py +10 -10
- edsl/exceptions/data.py +14 -14
- edsl/exceptions/general.py +34 -34
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/jobs.py +33 -33
- edsl/exceptions/language_models.py +63 -63
- edsl/exceptions/prompts.py +15 -15
- edsl/exceptions/questions.py +109 -91
- edsl/exceptions/results.py +29 -29
- edsl/exceptions/scenarios.py +29 -22
- edsl/exceptions/surveys.py +37 -37
- edsl/inference_services/AnthropicService.py +106 -87
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +118 -120
- edsl/inference_services/AzureAI.py +215 -217
- edsl/inference_services/DeepInfraService.py +18 -18
- edsl/inference_services/GoogleService.py +143 -148
- edsl/inference_services/GroqService.py +20 -20
- edsl/inference_services/InferenceServiceABC.py +80 -147
- edsl/inference_services/InferenceServicesCollection.py +138 -97
- edsl/inference_services/MistralAIService.py +120 -123
- edsl/inference_services/OllamaService.py +18 -18
- edsl/inference_services/OpenAIService.py +236 -224
- edsl/inference_services/PerplexityService.py +160 -163
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +90 -89
- edsl/inference_services/TogetherAIService.py +172 -170
- edsl/inference_services/data_structures.py +134 -0
- edsl/inference_services/models_available_cache.py +118 -118
- edsl/inference_services/rate_limits_cache.py +25 -25
- edsl/inference_services/registry.py +41 -41
- edsl/inference_services/write_available.py +10 -10
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +43 -56
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +823 -898
- edsl/jobs/JobsChecks.py +172 -147
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +270 -268
- edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/__init__.py +1 -1
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +104 -63
- edsl/jobs/buckets/ModelBuckets.py +65 -65
- edsl/jobs/buckets/TokenBucket.py +283 -251
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +396 -661
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
- edsl/jobs/interviews/InterviewStatistic.py +63 -63
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
- edsl/jobs/interviews/InterviewStatusLog.py +92 -92
- edsl/jobs/interviews/ReportErrors.py +66 -66
- edsl/jobs/interviews/interview_status_enum.py +9 -9
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
- edsl/jobs/runners/JobsRunnerStatus.py +297 -330
- edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
- edsl/jobs/tasks/TaskCreators.py +64 -64
- edsl/jobs/tasks/TaskHistory.py +470 -450
- edsl/jobs/tasks/TaskStatusLog.py +23 -23
- edsl/jobs/tasks/task_status_enum.py +161 -163
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
- edsl/jobs/tokens/TokenUsage.py +34 -34
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +626 -668
- edsl/language_models/ModelList.py +164 -155
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +2 -3
- edsl/language_models/fake_openai_call.py +15 -15
- edsl/language_models/fake_openai_service.py +61 -61
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +156 -156
- edsl/language_models/utilities.py +65 -64
- edsl/notebooks/Notebook.py +263 -258
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/notebooks/__init__.py +1 -1
- edsl/prompts/Prompt.py +352 -362
- edsl/prompts/__init__.py +2 -2
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +518 -664
- edsl/questions/QuestionBasePromptsMixin.py +221 -217
- edsl/questions/QuestionBudget.py +227 -227
- edsl/questions/QuestionCheckBox.py +359 -359
- edsl/questions/QuestionExtract.py +180 -182
- edsl/questions/QuestionFreeText.py +113 -114
- edsl/questions/QuestionFunctional.py +166 -166
- edsl/questions/QuestionList.py +223 -231
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +330 -286
- edsl/questions/QuestionNumerical.py +151 -153
- edsl/questions/QuestionRank.py +314 -324
- edsl/questions/Quick.py +41 -41
- edsl/questions/SimpleAskMixin.py +74 -73
- edsl/questions/__init__.py +27 -26
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
- edsl/questions/compose_questions.py +98 -98
- edsl/questions/data_structures.py +20 -0
- edsl/questions/decorators.py +21 -21
- edsl/questions/derived/QuestionLikertFive.py +76 -76
- edsl/questions/derived/QuestionLinearScale.py +90 -87
- edsl/questions/derived/QuestionTopK.py +93 -93
- edsl/questions/derived/QuestionYesNo.py +82 -82
- edsl/questions/descriptors.py +427 -413
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/prompt_templates/question_budget.jinja +13 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
- edsl/questions/prompt_templates/question_extract.jinja +11 -11
- edsl/questions/prompt_templates/question_free_text.jinja +3 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
- edsl/questions/prompt_templates/question_list.jinja +17 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
- edsl/questions/prompt_templates/question_numerical.jinja +36 -36
- edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
- edsl/questions/question_registry.py +177 -177
- edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
- edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/settings.py +12 -12
- edsl/questions/templates/budget/answering_instructions.jinja +7 -7
- edsl/questions/templates/budget/question_presentation.jinja +7 -7
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
- edsl/questions/templates/extract/answering_instructions.jinja +7 -7
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
- edsl/questions/templates/list/answering_instructions.jinja +3 -3
- edsl/questions/templates/list/question_presentation.jinja +5 -5
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
- edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
- edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
- edsl/questions/templates/numerical/question_presentation.jinja +6 -6
- edsl/questions/templates/rank/answering_instructions.jinja +11 -11
- edsl/questions/templates/rank/question_presentation.jinja +15 -15
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
- edsl/questions/templates/top_k/question_presentation.jinja +22 -22
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
- edsl/results/CSSParameterizer.py +108 -108
- edsl/results/Dataset.py +587 -424
- edsl/results/DatasetExportMixin.py +594 -731
- edsl/results/DatasetTree.py +295 -275
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +557 -465
- edsl/results/Results.py +1183 -1165
- edsl/results/ResultsExportMixin.py +45 -43
- edsl/results/ResultsGGMixin.py +121 -121
- edsl/results/TableDisplay.py +125 -198
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +2 -2
- edsl/results/file_exports.py +252 -0
- edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
- edsl/results/{Selector.py → results_selector.py} +145 -135
- edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_display.css +77 -77
- edsl/results/table_renderers.py +118 -0
- edsl/results/tree_explore.py +115 -115
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +511 -632
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +498 -601
- edsl/scenarios/ScenarioHtmlMixin.py +65 -64
- edsl/scenarios/ScenarioList.py +1458 -1287
- edsl/scenarios/ScenarioListExportMixin.py +45 -52
- edsl/scenarios/ScenarioListPdfMixin.py +239 -261
- edsl/scenarios/__init__.py +3 -4
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +38 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/shared.py +1 -1
- edsl/study/ObjectEntry.py +173 -173
- edsl/study/ProofOfWork.py +113 -113
- edsl/study/SnapShot.py +80 -80
- edsl/study/Study.py +521 -528
- edsl/study/__init__.py +4 -4
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/DAG.py +148 -148
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/Memory.py +31 -31
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/MemoryPlan.py +244 -244
- edsl/surveys/Rule.py +327 -326
- edsl/surveys/RuleCollection.py +385 -387
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +1280 -1801
- edsl/surveys/SurveyCSS.py +273 -261
- edsl/surveys/SurveyExportMixin.py +259 -259
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
- edsl/surveys/SurveyQualtricsImport.py +284 -284
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +5 -3
- edsl/surveys/base.py +53 -53
- edsl/surveys/descriptors.py +60 -56
- edsl/surveys/instructions/ChangeInstruction.py +48 -49
- edsl/surveys/instructions/Instruction.py +56 -65
- edsl/surveys/instructions/InstructionCollection.py +82 -77
- edsl/templates/error_reporting/base.html +23 -23
- edsl/templates/error_reporting/exceptions_by_model.html +34 -34
- edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
- edsl/templates/error_reporting/exceptions_by_type.html +16 -16
- edsl/templates/error_reporting/interview_details.html +115 -115
- edsl/templates/error_reporting/interviews.html +19 -19
- edsl/templates/error_reporting/overview.html +4 -4
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +73 -73
- edsl/templates/error_reporting/report.html +117 -117
- edsl/templates/error_reporting/report.js +25 -25
- edsl/tools/__init__.py +1 -1
- edsl/tools/clusters.py +192 -192
- edsl/tools/embeddings.py +27 -27
- edsl/tools/embeddings_plotting.py +118 -118
- edsl/tools/plotting.py +112 -112
- edsl/tools/summarize.py +18 -18
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/SystemInfo.py +28 -28
- edsl/utilities/__init__.py +22 -22
- edsl/utilities/ast_utilities.py +25 -25
- edsl/utilities/data/Registry.py +6 -6
- edsl/utilities/data/__init__.py +1 -1
- edsl/utilities/data/scooter_results.json +1 -1
- edsl/utilities/decorators.py +77 -77
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
- edsl/utilities/interface.py +627 -627
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/naming_utilities.py +263 -263
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/repair_functions.py +28 -28
- edsl/utilities/restricted_python.py +70 -70
- edsl/utilities/utilities.py +436 -424
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/LICENSE +21 -21
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/METADATA +13 -11
- edsl-0.1.39.dev5.dist-info/RECORD +358 -0
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/WHEEL +1 -1
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/registry.py +0 -190
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.39.dev3.dist-info/RECORD +0 -277
@@ -0,0 +1,96 @@
|
|
1
|
+
# directory_scanner.py
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import Optional, List, Iterator, TypeVar, Generic, Callable, Any
|
4
|
+
import os
|
5
|
+
|
6
|
+
T = TypeVar("T")
|
7
|
+
|
8
|
+
|
9
|
+
@dataclass
|
10
|
+
class DirectoryScanner:
|
11
|
+
"""
|
12
|
+
Scanner for finding files in a directory based on various criteria.
|
13
|
+
"""
|
14
|
+
|
15
|
+
directory_path: str
|
16
|
+
|
17
|
+
def scan(
|
18
|
+
self,
|
19
|
+
factory: Callable[[str], T],
|
20
|
+
recursive: bool = False,
|
21
|
+
suffix_allow_list: Optional[List[str]] = None,
|
22
|
+
suffix_exclude_list: Optional[List[str]] = None,
|
23
|
+
example_suffix: Optional[str] = None,
|
24
|
+
include_no_extension: bool = True,
|
25
|
+
) -> List[T]:
|
26
|
+
"""
|
27
|
+
Eagerly scan directory and return list of objects created by factory.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
factory: Callable that creates objects from file paths
|
31
|
+
recursive: If True, recursively traverse subdirectories
|
32
|
+
suffix_allow_list: List of allowed file extensions (without dots)
|
33
|
+
suffix_exclude_list: List of excluded file extensions (takes precedence over allow list)
|
34
|
+
example_suffix: If provided, only include files with this example suffix
|
35
|
+
include_no_extension: Whether to include files without extensions
|
36
|
+
"""
|
37
|
+
return list(
|
38
|
+
self.iter_scan(
|
39
|
+
factory,
|
40
|
+
recursive=recursive,
|
41
|
+
suffix_allow_list=suffix_allow_list,
|
42
|
+
suffix_exclude_list=suffix_exclude_list,
|
43
|
+
example_suffix=example_suffix,
|
44
|
+
include_no_extension=include_no_extension,
|
45
|
+
)
|
46
|
+
)
|
47
|
+
|
48
|
+
def iter_scan(
|
49
|
+
self,
|
50
|
+
factory: Callable[[str], T],
|
51
|
+
recursive: bool = False,
|
52
|
+
suffix_allow_list: Optional[List[str]] = None,
|
53
|
+
suffix_exclude_list: Optional[List[str]] = None,
|
54
|
+
example_suffix: Optional[str] = None,
|
55
|
+
include_no_extension: bool = True,
|
56
|
+
) -> Iterator[T]:
|
57
|
+
"""
|
58
|
+
Lazily scan directory and yield objects created by factory.
|
59
|
+
"""
|
60
|
+
|
61
|
+
def should_include_file(filepath: str) -> bool:
|
62
|
+
_, ext = os.path.splitext(filepath)
|
63
|
+
ext = ext[1:] if ext else ""
|
64
|
+
|
65
|
+
# Handle no extension case
|
66
|
+
if not ext:
|
67
|
+
return include_no_extension
|
68
|
+
|
69
|
+
# Check exclusions first (they take precedence)
|
70
|
+
if suffix_exclude_list and ext in suffix_exclude_list:
|
71
|
+
return False
|
72
|
+
|
73
|
+
# Check example suffix if specified
|
74
|
+
if example_suffix and not filepath.endswith(example_suffix):
|
75
|
+
return False
|
76
|
+
|
77
|
+
# Check allowed suffixes if specified
|
78
|
+
if suffix_allow_list and ext not in suffix_allow_list:
|
79
|
+
return False
|
80
|
+
|
81
|
+
return True
|
82
|
+
|
83
|
+
def iter_files():
|
84
|
+
if recursive:
|
85
|
+
for root, _, files in os.walk(self.directory_path):
|
86
|
+
for file in files:
|
87
|
+
yield os.path.join(root, file)
|
88
|
+
else:
|
89
|
+
for file in os.listdir(self.directory_path):
|
90
|
+
file_path = os.path.join(self.directory_path, file)
|
91
|
+
if os.path.isfile(file_path):
|
92
|
+
yield file_path
|
93
|
+
|
94
|
+
for file_path in iter_files():
|
95
|
+
if should_include_file(file_path):
|
96
|
+
yield factory(file_path)
|
@@ -0,0 +1,85 @@
|
|
1
|
+
from typing import Optional, Dict, Type
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
import importlib.metadata
|
4
|
+
import importlib.util
|
5
|
+
|
6
|
+
from edsl.utilities.is_notebook import is_notebook
|
7
|
+
|
8
|
+
|
9
|
+
class FileMethods(ABC):
|
10
|
+
_handlers: Dict[str, Type["FileMethods"]] = {}
|
11
|
+
|
12
|
+
def __init__(self, path: Optional[str] = None):
|
13
|
+
self.path = path
|
14
|
+
|
15
|
+
def __init_subclass__(cls) -> None:
|
16
|
+
"""Register subclasses automatically when they're defined."""
|
17
|
+
super().__init_subclass__()
|
18
|
+
if hasattr(cls, "suffix"):
|
19
|
+
FileMethods._handlers[cls.suffix] = cls
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def get_handler(cls, suffix: str) -> Optional[Type["FileMethods"]]:
|
23
|
+
"""Get the appropriate handler class for a given suffix."""
|
24
|
+
# Load plugins if they haven't been loaded yet
|
25
|
+
if not cls._handlers:
|
26
|
+
cls.load_plugins()
|
27
|
+
return cls._handlers.get(suffix.lower())
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def load_plugins(cls):
|
31
|
+
"""Load all file handler plugins including built-ins and external plugins."""
|
32
|
+
|
33
|
+
from edsl.scenarios import handlers
|
34
|
+
|
35
|
+
# Then load any external plugins
|
36
|
+
try:
|
37
|
+
entries = importlib.metadata.entry_points(group="file_handlers")
|
38
|
+
except TypeError: # some Python 3.9 bullshit
|
39
|
+
# entries = importlib.metadata.entry_points()
|
40
|
+
entries = []
|
41
|
+
|
42
|
+
for ep in entries:
|
43
|
+
try:
|
44
|
+
handler_class = ep.load()
|
45
|
+
# Registration happens automatically via __init_subclass__
|
46
|
+
except Exception as e:
|
47
|
+
print(f"Failed to load external handler {ep.name}: {e}")
|
48
|
+
|
49
|
+
@classmethod
|
50
|
+
def get_handler_for_path(cls, path: str) -> Optional[Type["FileMethods"]]:
|
51
|
+
"""Get the appropriate handler class for a file path."""
|
52
|
+
suffix = path.split(".")[-1].lower() if "." in path else ""
|
53
|
+
return cls.get_handler(suffix)
|
54
|
+
|
55
|
+
@classmethod
|
56
|
+
def create(cls, path: str) -> Optional["FileMethods"]:
|
57
|
+
"""Create an appropriate handler instance for the given path."""
|
58
|
+
handler_class = cls.get_handler_for_path(path)
|
59
|
+
if handler_class:
|
60
|
+
return handler_class(path)
|
61
|
+
return None
|
62
|
+
|
63
|
+
@classmethod
|
64
|
+
def supported_file_types(cls):
|
65
|
+
if not cls._handlers:
|
66
|
+
cls.load_plugins()
|
67
|
+
return list(cls._handlers.keys())
|
68
|
+
|
69
|
+
@abstractmethod
|
70
|
+
def view_system(self):
|
71
|
+
...
|
72
|
+
|
73
|
+
@abstractmethod
|
74
|
+
def view_notebook(self):
|
75
|
+
...
|
76
|
+
|
77
|
+
def view(self):
|
78
|
+
if is_notebook():
|
79
|
+
self.view_notebook()
|
80
|
+
else:
|
81
|
+
self.view_system()
|
82
|
+
|
83
|
+
@abstractmethod
|
84
|
+
def example(self):
|
85
|
+
...
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from .pdf import PdfMethods
|
2
|
+
from .docx import DocxMethods
|
3
|
+
from .png import PngMethods
|
4
|
+
from .txt import TxtMethods
|
5
|
+
from .html import HtmlMethods
|
6
|
+
from .md import MarkdownMethods
|
7
|
+
from .csv import CsvMethods
|
8
|
+
from .json import JsonMethods
|
9
|
+
from .sql import SqlMethods
|
10
|
+
from .pptx import PptxMethods
|
11
|
+
from .latex import LaTeXMethods
|
12
|
+
from .py import PyMethods
|
13
|
+
from .sqlite import SQLiteMethods
|
@@ -0,0 +1,38 @@
|
|
1
|
+
import tempfile
|
2
|
+
from edsl.scenarios.file_methods import FileMethods
|
3
|
+
|
4
|
+
|
5
|
+
class CsvMethods(FileMethods):
|
6
|
+
suffix = "csv"
|
7
|
+
|
8
|
+
def view_system(self):
|
9
|
+
import os
|
10
|
+
import subprocess
|
11
|
+
|
12
|
+
if os.path.exists(self.path):
|
13
|
+
try:
|
14
|
+
if (os_name := os.name) == "posix":
|
15
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
16
|
+
elif os_name == "nt":
|
17
|
+
os.startfile(self.path) # Windows
|
18
|
+
else:
|
19
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
20
|
+
except Exception as e:
|
21
|
+
print(f"Error opening CSV: {e}")
|
22
|
+
else:
|
23
|
+
print("CSV file was not found.")
|
24
|
+
|
25
|
+
def view_notebook(self):
|
26
|
+
import pandas as pd
|
27
|
+
from IPython.display import display
|
28
|
+
|
29
|
+
df = pd.read_csv(self.path)
|
30
|
+
display(df)
|
31
|
+
|
32
|
+
def example(self):
|
33
|
+
import pandas as pd
|
34
|
+
|
35
|
+
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
|
36
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as f:
|
37
|
+
df.to_csv(f.name, index=False)
|
38
|
+
return f.name
|
@@ -0,0 +1,76 @@
|
|
1
|
+
from edsl.scenarios.file_methods import FileMethods
|
2
|
+
import os
|
3
|
+
import tempfile
|
4
|
+
|
5
|
+
|
6
|
+
class DocxMethods(FileMethods):
|
7
|
+
suffix = "docx"
|
8
|
+
|
9
|
+
def extract_text(self):
|
10
|
+
from docx import Document
|
11
|
+
|
12
|
+
self.doc = Document(self.path)
|
13
|
+
|
14
|
+
# Extract all text
|
15
|
+
full_text = []
|
16
|
+
for para in self.doc.paragraphs:
|
17
|
+
full_text.append(para.text)
|
18
|
+
|
19
|
+
text = "\n".join(full_text)
|
20
|
+
return text
|
21
|
+
|
22
|
+
def view_system(self):
|
23
|
+
import os
|
24
|
+
import subprocess
|
25
|
+
|
26
|
+
if os.path.exists(self.path):
|
27
|
+
try:
|
28
|
+
if (os_name := os.name) == "posix":
|
29
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
30
|
+
elif os_name == "nt":
|
31
|
+
os.startfile(self.path) # Windows
|
32
|
+
else:
|
33
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
34
|
+
except Exception as e:
|
35
|
+
print(f"Error opening DOCX: {e}")
|
36
|
+
else:
|
37
|
+
print("DOCX file was not found.")
|
38
|
+
|
39
|
+
def view_notebook(self):
|
40
|
+
import mammoth
|
41
|
+
from IPython.display import HTML, display
|
42
|
+
|
43
|
+
with open(self.path, "rb") as docx_file:
|
44
|
+
result = mammoth.convert_to_html(docx_file)
|
45
|
+
html = f"""
|
46
|
+
<div style="width: 800px; height: 800px; padding: 20px;
|
47
|
+
border: 1px solid #ccc; overflow-y: auto;">
|
48
|
+
{result.value}
|
49
|
+
</div>
|
50
|
+
"""
|
51
|
+
display(HTML(html))
|
52
|
+
|
53
|
+
def example(self):
|
54
|
+
from docx import Document
|
55
|
+
from edsl.scenarios.Scenario import Scenario
|
56
|
+
from edsl.scenarios.ScenarioList import ScenarioList
|
57
|
+
|
58
|
+
os.makedirs("test_dir", exist_ok=True)
|
59
|
+
doc1 = Document()
|
60
|
+
_ = doc1.add_heading("First Survey")
|
61
|
+
doc1.save("test_dir/test1.docx")
|
62
|
+
doc2 = Document()
|
63
|
+
_ = doc2.add_heading("Second Survey")
|
64
|
+
|
65
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
|
66
|
+
doc2.save(tmp.name)
|
67
|
+
tmp.close()
|
68
|
+
|
69
|
+
return tmp.name
|
70
|
+
|
71
|
+
|
72
|
+
if __name__ == "__main__":
|
73
|
+
docx_temp = DocxMethods.example()
|
74
|
+
from edsl.scenarios.FileStore import FileStore
|
75
|
+
|
76
|
+
fs = FileStore(docx_temp)
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from edsl.scenarios.file_methods import FileMethods
|
2
|
+
import tempfile
|
3
|
+
|
4
|
+
|
5
|
+
class HtmlMethods(FileMethods):
|
6
|
+
suffix = "html"
|
7
|
+
|
8
|
+
def view_system(self):
|
9
|
+
import webbrowser
|
10
|
+
|
11
|
+
# with open(self.path, "r") as f:
|
12
|
+
# html_string = f.read()
|
13
|
+
|
14
|
+
# html_path = self.to_tempfile()
|
15
|
+
# webbrowser.open("file://" + html_path)
|
16
|
+
webbrowser.open("file://" + self.path)
|
17
|
+
|
18
|
+
def view_notebook(self):
|
19
|
+
from IPython.display import IFrame, display
|
20
|
+
|
21
|
+
display(IFrame(self.path, width=800, height=800))
|
22
|
+
|
23
|
+
def example(self):
|
24
|
+
html_string = b"""
|
25
|
+
<html>
|
26
|
+
<head>
|
27
|
+
<title>Test</title>
|
28
|
+
</head>
|
29
|
+
<body>
|
30
|
+
<h1>Hello, World!</h1>
|
31
|
+
</body>
|
32
|
+
</html>
|
33
|
+
"""
|
34
|
+
|
35
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
|
36
|
+
f.write(html_string)
|
37
|
+
return f.name
|
@@ -0,0 +1,111 @@
|
|
1
|
+
from edsl.scenarios.file_methods import FileMethods
|
2
|
+
import tempfile
|
3
|
+
import json
|
4
|
+
from typing import Optional, Dict, Any
|
5
|
+
|
6
|
+
|
7
|
+
class JsonMethods(FileMethods):
|
8
|
+
suffix = "json"
|
9
|
+
|
10
|
+
def view_system(self):
|
11
|
+
import os
|
12
|
+
import subprocess
|
13
|
+
|
14
|
+
if os.path.exists(self.path):
|
15
|
+
try:
|
16
|
+
if (os_name := os.name) == "posix":
|
17
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
18
|
+
elif os_name == "nt":
|
19
|
+
os.startfile(self.path) # Windows
|
20
|
+
else:
|
21
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
22
|
+
except Exception as e:
|
23
|
+
print(f"Error opening JSON: {e}")
|
24
|
+
else:
|
25
|
+
print("JSON file was not found.")
|
26
|
+
|
27
|
+
def view_notebook(self):
|
28
|
+
from IPython.display import FileLink, JSON, display
|
29
|
+
import json
|
30
|
+
|
31
|
+
# Read and parse the JSON file
|
32
|
+
try:
|
33
|
+
with open(self.path, "r", encoding="utf-8") as f:
|
34
|
+
content = json.load(f)
|
35
|
+
|
36
|
+
# Display formatted JSON
|
37
|
+
display(JSON(content))
|
38
|
+
|
39
|
+
# Provide download link
|
40
|
+
display(FileLink(self.path))
|
41
|
+
except json.JSONDecodeError as e:
|
42
|
+
print(f"Error parsing JSON: {e}")
|
43
|
+
except Exception as e:
|
44
|
+
print(f"Error reading file: {e}")
|
45
|
+
|
46
|
+
def validate_json(self, schema: Optional[Dict[str, Any]] = None) -> bool:
|
47
|
+
"""
|
48
|
+
Validate the JSON file against a schema if provided,
|
49
|
+
or check if it's valid JSON if no schema is provided.
|
50
|
+
"""
|
51
|
+
try:
|
52
|
+
with open(self.path, "r", encoding="utf-8") as f:
|
53
|
+
content = json.load(f)
|
54
|
+
|
55
|
+
if schema is not None:
|
56
|
+
from jsonschema import validate
|
57
|
+
|
58
|
+
validate(instance=content, schema=schema)
|
59
|
+
|
60
|
+
return True
|
61
|
+
except json.JSONDecodeError as e:
|
62
|
+
print(f"Invalid JSON format: {e}")
|
63
|
+
return False
|
64
|
+
except Exception as e:
|
65
|
+
print(f"Validation error: {e}")
|
66
|
+
return False
|
67
|
+
|
68
|
+
def pretty_print(self):
|
69
|
+
"""Pretty print the JSON content with proper indentation."""
|
70
|
+
try:
|
71
|
+
with open(self.path, "r", encoding="utf-8") as f:
|
72
|
+
content = json.load(f)
|
73
|
+
|
74
|
+
pretty_json = json.dumps(content, indent=2, sort_keys=True)
|
75
|
+
print(pretty_json)
|
76
|
+
except Exception as e:
|
77
|
+
print(f"Error pretty printing JSON: {e}")
|
78
|
+
|
79
|
+
def example(self):
|
80
|
+
sample_json = {
|
81
|
+
"person": {
|
82
|
+
"name": "John Doe",
|
83
|
+
"age": 30,
|
84
|
+
"contact": {"email": "john@example.com", "phone": "+1-555-555-5555"},
|
85
|
+
"interests": ["programming", "data science", "machine learning"],
|
86
|
+
"active": True,
|
87
|
+
"metadata": {"last_updated": "2024-01-01", "version": 1.0},
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
with tempfile.NamedTemporaryFile(
|
92
|
+
delete=False, suffix=".json", mode="w", encoding="utf-8"
|
93
|
+
) as f:
|
94
|
+
json.dump(sample_json, f, indent=2)
|
95
|
+
return f.name
|
96
|
+
|
97
|
+
def format_file(self):
|
98
|
+
"""Read, format, and write back the JSON with consistent formatting."""
|
99
|
+
try:
|
100
|
+
# Read the current content
|
101
|
+
with open(self.path, "r", encoding="utf-8") as f:
|
102
|
+
content = json.load(f)
|
103
|
+
|
104
|
+
# Write back with consistent formatting
|
105
|
+
with open(self.path, "w", encoding="utf-8") as f:
|
106
|
+
json.dump(content, f, indent=2, sort_keys=True)
|
107
|
+
|
108
|
+
return True
|
109
|
+
except Exception as e:
|
110
|
+
print(f"Error formatting JSON file: {e}")
|
111
|
+
return False
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from edsl.scenarios.file_methods import FileMethods
|
2
|
+
import tempfile
|
3
|
+
|
4
|
+
|
5
|
+
class MarkdownMethods(FileMethods):
|
6
|
+
suffix = "md"
|
7
|
+
|
8
|
+
def view_system(self):
|
9
|
+
import os
|
10
|
+
import subprocess
|
11
|
+
|
12
|
+
if os.path.exists(self.path):
|
13
|
+
try:
|
14
|
+
if (os_name := os.name) == "posix":
|
15
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
16
|
+
elif os_name == "nt":
|
17
|
+
os.startfile(self.path) # Windows
|
18
|
+
else:
|
19
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
20
|
+
except Exception as e:
|
21
|
+
print(f"Error opening Markdown: {e}")
|
22
|
+
else:
|
23
|
+
print("Markdown file was not found.")
|
24
|
+
|
25
|
+
def view_notebook(self):
|
26
|
+
from IPython.display import FileLink, Markdown, display
|
27
|
+
|
28
|
+
# First display the content of the markdown file
|
29
|
+
with open(self.path, "r", encoding="utf-8") as f:
|
30
|
+
content = f.read()
|
31
|
+
display(Markdown(content))
|
32
|
+
|
33
|
+
# Then provide a download link
|
34
|
+
display(FileLink(self.path))
|
35
|
+
|
36
|
+
def example(self):
|
37
|
+
markdown_content = """# Sample Markdown
|
38
|
+
|
39
|
+
## Features
|
40
|
+
- **Bold text** demonstration
|
41
|
+
- *Italic text* demonstration
|
42
|
+
- Code block example:
|
43
|
+
```python
|
44
|
+
print("Hello, World!")
|
45
|
+
```
|
46
|
+
"""
|
47
|
+
with tempfile.NamedTemporaryFile(
|
48
|
+
delete=False, suffix=".md", mode="w", encoding="utf-8"
|
49
|
+
) as f:
|
50
|
+
f.write(markdown_content)
|
51
|
+
return f.name
|
@@ -0,0 +1,68 @@
|
|
1
|
+
import os
|
2
|
+
import base64
|
3
|
+
|
4
|
+
from edsl.scenarios.file_methods import FileMethods
|
5
|
+
|
6
|
+
|
7
|
+
class PdfMethods(FileMethods):
|
8
|
+
suffix = "pdf"
|
9
|
+
|
10
|
+
def extract_text(self):
|
11
|
+
from PyPDF2 import PdfReader
|
12
|
+
|
13
|
+
# Create a PDF reader object
|
14
|
+
reader = PdfReader(self.path)
|
15
|
+
|
16
|
+
# Get number of pages
|
17
|
+
num_pages = len(reader.pages)
|
18
|
+
|
19
|
+
# Extract text from all pages
|
20
|
+
text = ""
|
21
|
+
for page_num in range(num_pages):
|
22
|
+
# Get the page object
|
23
|
+
page = reader.pages[page_num]
|
24
|
+
# Extract text from page
|
25
|
+
text += page.extract_text()
|
26
|
+
|
27
|
+
return text
|
28
|
+
|
29
|
+
def view_system(self):
|
30
|
+
import os
|
31
|
+
import subprocess
|
32
|
+
|
33
|
+
if os.path.exists(self.path):
|
34
|
+
try:
|
35
|
+
if (os_name := os.name) == "posix":
|
36
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
37
|
+
elif os_name == "nt":
|
38
|
+
os.startfile(self.path) # Windows
|
39
|
+
else:
|
40
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
41
|
+
except Exception as e:
|
42
|
+
print(f"Error opening PDF: {e}")
|
43
|
+
else:
|
44
|
+
print("PDF file was not found.")
|
45
|
+
|
46
|
+
def view_notebook(self):
|
47
|
+
from IPython.display import HTML, display
|
48
|
+
|
49
|
+
with open(self.path, "rb") as f:
|
50
|
+
base64_pdf = base64.b64encode(f.read()).decode("utf-8")
|
51
|
+
|
52
|
+
html = f"""
|
53
|
+
<iframe
|
54
|
+
src="data:application/pdf;base64,{base64_pdf}"
|
55
|
+
width="800px"
|
56
|
+
height="800px"
|
57
|
+
type="application/pdf"
|
58
|
+
></iframe>
|
59
|
+
"""
|
60
|
+
display(HTML(html))
|
61
|
+
return
|
62
|
+
|
63
|
+
def example(self):
|
64
|
+
from edsl.results.Results import Results
|
65
|
+
|
66
|
+
return (
|
67
|
+
Results.example().select("answer.how_feeling").first().pdf().to_tempfile()
|
68
|
+
)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
import tempfile
|
2
|
+
from edsl.scenarios.file_methods import FileMethods
|
3
|
+
|
4
|
+
|
5
|
+
class PngMethods(FileMethods):
|
6
|
+
suffix = "png"
|
7
|
+
|
8
|
+
def view_system(self):
|
9
|
+
import os
|
10
|
+
import subprocess
|
11
|
+
|
12
|
+
if os.path.exists(self.path):
|
13
|
+
try:
|
14
|
+
if (os_name := os.name) == "posix":
|
15
|
+
subprocess.run(["open", self.path], check=True) # macOS
|
16
|
+
elif os_name == "nt":
|
17
|
+
os.startfile(self.path) # Windows
|
18
|
+
else:
|
19
|
+
subprocess.run(["xdg-open", self.path], check=True) # Linux
|
20
|
+
except Exception as e:
|
21
|
+
print(f"Error opening PNG: {e}")
|
22
|
+
else:
|
23
|
+
print("PNG file was not found.")
|
24
|
+
|
25
|
+
def view_notebook(self):
|
26
|
+
from IPython.display import Image, display
|
27
|
+
|
28
|
+
display(Image(filename=self.path))
|
29
|
+
|
30
|
+
def example(self):
|
31
|
+
import matplotlib.pyplot as plt
|
32
|
+
import numpy as np
|
33
|
+
|
34
|
+
x = np.linspace(0, 10, 100)
|
35
|
+
y = np.sin(x)
|
36
|
+
plt.plot(x, y)
|
37
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f:
|
38
|
+
plt.savefig(f.name)
|
39
|
+
return f.name
|