PyPI - edsl - Versions diffs - 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev5__py3-none-any.whl - Mend

edsl 0.1.39.dev3py3-none-any.whl → 0.1.39.dev5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (341) hide show

edsl/Base.py +413 -332
edsl/BaseDiff.py +260 -260
edsl/TemplateLoader.py +24 -24
edsl/__init__.py +57 -49
edsl/__version__.py +1 -1
edsl/agents/Agent.py +1071 -867
edsl/agents/AgentList.py +551 -413
edsl/agents/Invigilator.py +284 -233
edsl/agents/InvigilatorBase.py +257 -270
edsl/agents/PromptConstructor.py +272 -354
edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
edsl/agents/__init__.py +2 -3
edsl/agents/descriptors.py +99 -99
edsl/agents/prompt_helpers.py +129 -129
edsl/agents/question_option_processor.py +172 -0
edsl/auto/AutoStudy.py +130 -117
edsl/auto/StageBase.py +243 -230
edsl/auto/StageGenerateSurvey.py +178 -178
edsl/auto/StageLabelQuestions.py +125 -125
edsl/auto/StagePersona.py +61 -61
edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
edsl/auto/StagePersonaDimensionValues.py +74 -74
edsl/auto/StagePersonaDimensions.py +69 -69
edsl/auto/StageQuestions.py +74 -73
edsl/auto/SurveyCreatorPipeline.py +21 -21
edsl/auto/utilities.py +218 -224
edsl/base/Base.py +279 -279
edsl/config.py +177 -157
edsl/conversation/Conversation.py +290 -290
edsl/conversation/car_buying.py +59 -58
edsl/conversation/chips.py +95 -95
edsl/conversation/mug_negotiation.py +81 -81
edsl/conversation/next_speaker_utilities.py +93 -93
edsl/coop/CoopFunctionsMixin.py +15 -0
edsl/coop/ExpectedParrotKeyHandler.py +125 -0
edsl/coop/PriceFetcher.py +54 -54
edsl/coop/__init__.py +2 -2
edsl/coop/coop.py +1106 -1028
edsl/coop/utils.py +131 -131
edsl/data/Cache.py +573 -555
edsl/data/CacheEntry.py +230 -233
edsl/data/CacheHandler.py +168 -149
edsl/data/RemoteCacheSync.py +186 -78
edsl/data/SQLiteDict.py +292 -292
edsl/data/__init__.py +5 -4
edsl/data/orm.py +10 -10
edsl/data_transfer_models.py +74 -73
edsl/enums.py +202 -175
edsl/exceptions/BaseException.py +21 -21
edsl/exceptions/__init__.py +54 -54
edsl/exceptions/agents.py +54 -42
edsl/exceptions/cache.py +5 -5
edsl/exceptions/configuration.py +16 -16
edsl/exceptions/coop.py +10 -10
edsl/exceptions/data.py +14 -14
edsl/exceptions/general.py +34 -34
edsl/exceptions/inference_services.py +5 -0
edsl/exceptions/jobs.py +33 -33
edsl/exceptions/language_models.py +63 -63
edsl/exceptions/prompts.py +15 -15
edsl/exceptions/questions.py +109 -91
edsl/exceptions/results.py +29 -29
edsl/exceptions/scenarios.py +29 -22
edsl/exceptions/surveys.py +37 -37
edsl/inference_services/AnthropicService.py +106 -87
edsl/inference_services/AvailableModelCacheHandler.py +184 -0
edsl/inference_services/AvailableModelFetcher.py +215 -0
edsl/inference_services/AwsBedrock.py +118 -120
edsl/inference_services/AzureAI.py +215 -217
edsl/inference_services/DeepInfraService.py +18 -18
edsl/inference_services/GoogleService.py +143 -148
edsl/inference_services/GroqService.py +20 -20
edsl/inference_services/InferenceServiceABC.py +80 -147
edsl/inference_services/InferenceServicesCollection.py +138 -97
edsl/inference_services/MistralAIService.py +120 -123
edsl/inference_services/OllamaService.py +18 -18
edsl/inference_services/OpenAIService.py +236 -224
edsl/inference_services/PerplexityService.py +160 -163
edsl/inference_services/ServiceAvailability.py +135 -0
edsl/inference_services/TestService.py +90 -89
edsl/inference_services/TogetherAIService.py +172 -170
edsl/inference_services/data_structures.py +134 -0
edsl/inference_services/models_available_cache.py +118 -118
edsl/inference_services/rate_limits_cache.py +25 -25
edsl/inference_services/registry.py +41 -41
edsl/inference_services/write_available.py +10 -10
edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
edsl/jobs/Answers.py +43 -56
edsl/jobs/FetchInvigilator.py +47 -0
edsl/jobs/InterviewTaskManager.py +98 -0
edsl/jobs/InterviewsConstructor.py +50 -0
edsl/jobs/Jobs.py +823 -898
edsl/jobs/JobsChecks.py +172 -147
edsl/jobs/JobsComponentConstructor.py +189 -0
edsl/jobs/JobsPrompts.py +270 -268
edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
edsl/jobs/RequestTokenEstimator.py +30 -0
edsl/jobs/__init__.py +1 -1
edsl/jobs/async_interview_runner.py +138 -0
edsl/jobs/buckets/BucketCollection.py +104 -63
edsl/jobs/buckets/ModelBuckets.py +65 -65
edsl/jobs/buckets/TokenBucket.py +283 -251
edsl/jobs/buckets/TokenBucketAPI.py +211 -0
edsl/jobs/buckets/TokenBucketClient.py +191 -0
edsl/jobs/check_survey_scenario_compatibility.py +85 -0
edsl/jobs/data_structures.py +120 -0
edsl/jobs/decorators.py +35 -0
edsl/jobs/interviews/Interview.py +396 -661
edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
edsl/jobs/interviews/InterviewStatistic.py +63 -63
edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
edsl/jobs/interviews/InterviewStatusLog.py +92 -92
edsl/jobs/interviews/ReportErrors.py +66 -66
edsl/jobs/interviews/interview_status_enum.py +9 -9
edsl/jobs/jobs_status_enums.py +9 -0
edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
edsl/jobs/results_exceptions_handler.py +98 -0
edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
edsl/jobs/runners/JobsRunnerStatus.py +297 -330
edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
edsl/jobs/tasks/TaskCreators.py +64 -64
edsl/jobs/tasks/TaskHistory.py +470 -450
edsl/jobs/tasks/TaskStatusLog.py +23 -23
edsl/jobs/tasks/task_status_enum.py +161 -163
edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
edsl/jobs/tokens/TokenUsage.py +34 -34
edsl/language_models/ComputeCost.py +63 -0
edsl/language_models/LanguageModel.py +626 -668
edsl/language_models/ModelList.py +164 -155
edsl/language_models/PriceManager.py +127 -0
edsl/language_models/RawResponseHandler.py +106 -0
edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
edsl/language_models/ServiceDataSources.py +0 -0
edsl/language_models/__init__.py +2 -3
edsl/language_models/fake_openai_call.py +15 -15
edsl/language_models/fake_openai_service.py +61 -61
edsl/language_models/key_management/KeyLookup.py +63 -0
edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
edsl/language_models/key_management/KeyLookupCollection.py +38 -0
edsl/language_models/key_management/__init__.py +0 -0
edsl/language_models/key_management/models.py +131 -0
edsl/language_models/model.py +256 -0
edsl/language_models/repair.py +156 -156
edsl/language_models/utilities.py +65 -64
edsl/notebooks/Notebook.py +263 -258
edsl/notebooks/NotebookToLaTeX.py +142 -0
edsl/notebooks/__init__.py +1 -1
edsl/prompts/Prompt.py +352 -362
edsl/prompts/__init__.py +2 -2
edsl/questions/ExceptionExplainer.py +77 -0
edsl/questions/HTMLQuestion.py +103 -0
edsl/questions/QuestionBase.py +518 -664
edsl/questions/QuestionBasePromptsMixin.py +221 -217
edsl/questions/QuestionBudget.py +227 -227
edsl/questions/QuestionCheckBox.py +359 -359
edsl/questions/QuestionExtract.py +180 -182
edsl/questions/QuestionFreeText.py +113 -114
edsl/questions/QuestionFunctional.py +166 -166
edsl/questions/QuestionList.py +223 -231
edsl/questions/QuestionMatrix.py +265 -0
edsl/questions/QuestionMultipleChoice.py +330 -286
edsl/questions/QuestionNumerical.py +151 -153
edsl/questions/QuestionRank.py +314 -324
edsl/questions/Quick.py +41 -41
edsl/questions/SimpleAskMixin.py +74 -73
edsl/questions/__init__.py +27 -26
edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
edsl/questions/compose_questions.py +98 -98
edsl/questions/data_structures.py +20 -0
edsl/questions/decorators.py +21 -21
edsl/questions/derived/QuestionLikertFive.py +76 -76
edsl/questions/derived/QuestionLinearScale.py +90 -87
edsl/questions/derived/QuestionTopK.py +93 -93
edsl/questions/derived/QuestionYesNo.py +82 -82
edsl/questions/descriptors.py +427 -413
edsl/questions/loop_processor.py +149 -0
edsl/questions/prompt_templates/question_budget.jinja +13 -13
edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
edsl/questions/prompt_templates/question_extract.jinja +11 -11
edsl/questions/prompt_templates/question_free_text.jinja +3 -3
edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
edsl/questions/prompt_templates/question_list.jinja +17 -17
edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
edsl/questions/prompt_templates/question_numerical.jinja +36 -36
edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
edsl/questions/question_registry.py +177 -177
edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
edsl/questions/response_validator_factory.py +34 -0
edsl/questions/settings.py +12 -12
edsl/questions/templates/budget/answering_instructions.jinja +7 -7
edsl/questions/templates/budget/question_presentation.jinja +7 -7
edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
edsl/questions/templates/extract/answering_instructions.jinja +7 -7
edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
edsl/questions/templates/list/answering_instructions.jinja +3 -3
edsl/questions/templates/list/question_presentation.jinja +5 -5
edsl/questions/templates/matrix/__init__.py +1 -0
edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
edsl/questions/templates/matrix/question_presentation.jinja +20 -0
edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
edsl/questions/templates/numerical/question_presentation.jinja +6 -6
edsl/questions/templates/rank/answering_instructions.jinja +11 -11
edsl/questions/templates/rank/question_presentation.jinja +15 -15
edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
edsl/questions/templates/top_k/question_presentation.jinja +22 -22
edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
edsl/results/CSSParameterizer.py +108 -108
edsl/results/Dataset.py +587 -424
edsl/results/DatasetExportMixin.py +594 -731
edsl/results/DatasetTree.py +295 -275
edsl/results/MarkdownToDocx.py +122 -0
edsl/results/MarkdownToPDF.py +111 -0
edsl/results/Result.py +557 -465
edsl/results/Results.py +1183 -1165
edsl/results/ResultsExportMixin.py +45 -43
edsl/results/ResultsGGMixin.py +121 -121
edsl/results/TableDisplay.py +125 -198
edsl/results/TextEditor.py +50 -0
edsl/results/__init__.py +2 -2
edsl/results/file_exports.py +252 -0
edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
edsl/results/{Selector.py → results_selector.py} +145 -135
edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
edsl/results/smart_objects.py +96 -0
edsl/results/table_data_class.py +12 -0
edsl/results/table_display.css +77 -77
edsl/results/table_renderers.py +118 -0
edsl/results/tree_explore.py +115 -115
edsl/scenarios/ConstructDownloadLink.py +109 -0
edsl/scenarios/DocumentChunker.py +102 -0
edsl/scenarios/DocxScenario.py +16 -0
edsl/scenarios/FileStore.py +511 -632
edsl/scenarios/PdfExtractor.py +40 -0
edsl/scenarios/Scenario.py +498 -601
edsl/scenarios/ScenarioHtmlMixin.py +65 -64
edsl/scenarios/ScenarioList.py +1458 -1287
edsl/scenarios/ScenarioListExportMixin.py +45 -52
edsl/scenarios/ScenarioListPdfMixin.py +239 -261
edsl/scenarios/__init__.py +3 -4
edsl/scenarios/directory_scanner.py +96 -0
edsl/scenarios/file_methods.py +85 -0
edsl/scenarios/handlers/__init__.py +13 -0
edsl/scenarios/handlers/csv.py +38 -0
edsl/scenarios/handlers/docx.py +76 -0
edsl/scenarios/handlers/html.py +37 -0
edsl/scenarios/handlers/json.py +111 -0
edsl/scenarios/handlers/latex.py +5 -0
edsl/scenarios/handlers/md.py +51 -0
edsl/scenarios/handlers/pdf.py +68 -0
edsl/scenarios/handlers/png.py +39 -0
edsl/scenarios/handlers/pptx.py +105 -0
edsl/scenarios/handlers/py.py +294 -0
edsl/scenarios/handlers/sql.py +313 -0
edsl/scenarios/handlers/sqlite.py +149 -0
edsl/scenarios/handlers/txt.py +33 -0
edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
edsl/scenarios/scenario_selector.py +156 -0
edsl/shared.py +1 -1
edsl/study/ObjectEntry.py +173 -173
edsl/study/ProofOfWork.py +113 -113
edsl/study/SnapShot.py +80 -80
edsl/study/Study.py +521 -528
edsl/study/__init__.py +4 -4
edsl/surveys/ConstructDAG.py +92 -0
edsl/surveys/DAG.py +148 -148
edsl/surveys/EditSurvey.py +221 -0
edsl/surveys/InstructionHandler.py +100 -0
edsl/surveys/Memory.py +31 -31
edsl/surveys/MemoryManagement.py +72 -0
edsl/surveys/MemoryPlan.py +244 -244
edsl/surveys/Rule.py +327 -326
edsl/surveys/RuleCollection.py +385 -387
edsl/surveys/RuleManager.py +172 -0
edsl/surveys/Simulator.py +75 -0
edsl/surveys/Survey.py +1280 -1801
edsl/surveys/SurveyCSS.py +273 -261
edsl/surveys/SurveyExportMixin.py +259 -259
edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
edsl/surveys/SurveyQualtricsImport.py +284 -284
edsl/surveys/SurveyToApp.py +141 -0
edsl/surveys/__init__.py +5 -3
edsl/surveys/base.py +53 -53
edsl/surveys/descriptors.py +60 -56
edsl/surveys/instructions/ChangeInstruction.py +48 -49
edsl/surveys/instructions/Instruction.py +56 -65
edsl/surveys/instructions/InstructionCollection.py +82 -77
edsl/templates/error_reporting/base.html +23 -23
edsl/templates/error_reporting/exceptions_by_model.html +34 -34
edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
edsl/templates/error_reporting/exceptions_by_type.html +16 -16
edsl/templates/error_reporting/interview_details.html +115 -115
edsl/templates/error_reporting/interviews.html +19 -19
edsl/templates/error_reporting/overview.html +4 -4
edsl/templates/error_reporting/performance_plot.html +1 -1
edsl/templates/error_reporting/report.css +73 -73
edsl/templates/error_reporting/report.html +117 -117
edsl/templates/error_reporting/report.js +25 -25
edsl/tools/__init__.py +1 -1
edsl/tools/clusters.py +192 -192
edsl/tools/embeddings.py +27 -27
edsl/tools/embeddings_plotting.py +118 -118
edsl/tools/plotting.py +112 -112
edsl/tools/summarize.py +18 -18
edsl/utilities/PrettyList.py +56 -0
edsl/utilities/SystemInfo.py +28 -28
edsl/utilities/__init__.py +22 -22
edsl/utilities/ast_utilities.py +25 -25
edsl/utilities/data/Registry.py +6 -6
edsl/utilities/data/__init__.py +1 -1
edsl/utilities/data/scooter_results.json +1 -1
edsl/utilities/decorators.py +77 -77
edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
edsl/utilities/interface.py +627 -627
edsl/utilities/is_notebook.py +18 -0
edsl/utilities/is_valid_variable_name.py +11 -0
edsl/utilities/naming_utilities.py +263 -263
edsl/utilities/remove_edsl_version.py +24 -0
edsl/utilities/repair_functions.py +28 -28
edsl/utilities/restricted_python.py +70 -70
edsl/utilities/utilities.py +436 -424
{edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/LICENSE +21 -21
{edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/METADATA +13 -11
edsl-0.1.39.dev5.dist-info/RECORD +358 -0
{edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev5.dist-info}/WHEEL +1 -1
edsl/language_models/KeyLookup.py +0 -30
edsl/language_models/registry.py +0 -190
edsl/language_models/unused/ReplicateBase.py +0 -83
edsl/results/ResultsDBMixin.py +0 -238
edsl-0.1.39.dev3.dist-info/RECORD +0 -277

edsl/scenarios/ScenarioListExportMixin.py CHANGED Viewed

@@ -1,52 +1,45 @@
-"""Mixin class for exporting results."""
-from functools import wraps
-from edsl.results.DatasetExportMixin import DatasetExportMixin
-def to_dataset(func):
-    """Convert the object to a Dataset object before calling the function."""
-    @wraps(func)
-    def wrapper(self, *args, **kwargs):
-        """Return the function with the Results object converted to a Dataset object."""
-        if self.__class__.__name__ == "ScenarioList":
-            return func(self.to_dataset(), *args, **kwargs)
-        else:
-            raise Exception(
-                f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
-            )
-    return wrapper
-def decorate_methods_from_mixin(cls, mixin_cls):
-    for attr_name, attr_value in mixin_cls.__dict__.items():
-        if callable(attr_value) and not attr_name.startswith("__"):
-            setattr(cls, attr_name, to_dataset(attr_value))
-    return cls
-# def decorate_all_methods(cls):
-#     for attr_name, attr_value in cls.__dict__.items():
-#         if callable(attr_value):
-#             setattr(cls, attr_name, to_dataset(attr_value))
-#     return cls
-# @decorate_all_methods
-class ScenarioListExportMixin(DatasetExportMixin):
-    """Mixin class for exporting Results objects."""
-    def __init_subclass__(cls, **kwargs):
-        super().__init_subclass__(**kwargs)
-        decorate_methods_from_mixin(cls, DatasetExportMixin)
-    def to_docx(self, filename: str):
-        """Export the ScenarioList to a .docx file."""
-        dataset = self.to_dataset()
-        from edsl.results.DatasetTree import Tree
-        tree = Tree(dataset)
-        tree.construct_tree()
-        tree.to_docx(filename)
+"""Mixin class for exporting results."""
+from functools import wraps
+from edsl.results.DatasetExportMixin import DatasetExportMixin
+def to_dataset(func):
+    """Convert the object to a Dataset object before calling the function."""
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        """Return the function with the Results object converted to a Dataset object."""
+        if self.__class__.__name__ == "ScenarioList":
+            return func(self.to_dataset(), *args, **kwargs)
+        else:
+            raise Exception(
+                f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
+            )
+    return wrapper
+def decorate_methods_from_mixin(cls, mixin_cls):
+    for attr_name, attr_value in mixin_cls.__dict__.items():
+        if callable(attr_value) and not attr_name.startswith("__"):
+            setattr(cls, attr_name, to_dataset(attr_value))
+    return cls
+# @decorate_all_methods
+class ScenarioListExportMixin(DatasetExportMixin):
+    """Mixin class for exporting Results objects."""
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        decorate_methods_from_mixin(cls, DatasetExportMixin)
+    def to_docx(self, filename: str):
+        """Export the ScenarioList to a .docx file."""
+        dataset = self.to_dataset()
+        from edsl.results.DatasetTree import Tree
+        tree = Tree(dataset)
+        tree.construct_tree()
+        tree.to_docx(filename)

edsl/scenarios/ScenarioListPdfMixin.py CHANGED Viewed

@@ -1,261 +1,239 @@
-import fitz  # PyMuPDF
-import os
-import copy
-import subprocess
-import requests
-import tempfile
-import os
-# import urllib.parse as urlparse
-from urllib.parse import urlparse
-# from edsl import Scenario
-import requests
-import re
-import tempfile
-import os
-import atexit
-from urllib.parse import urlparse, parse_qs
-class GoogleDriveDownloader:
-    _temp_dir = None
-    _temp_file_path = None
-    @classmethod
-    def fetch_from_drive(cls, url, filename=None):
-        # Extract file ID from the URL
-        file_id = cls._extract_file_id(url)
-        if not file_id:
-            raise ValueError("Invalid Google Drive URL")
-        # Construct the download URL
-        download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
-        # Send a GET request to the URL
-        session = requests.Session()
-        response = session.get(download_url, stream=True)
-        response.raise_for_status()
-        # Check for large file download prompt
-        for key, value in response.cookies.items():
-            if key.startswith("download_warning"):
-                params = {"id": file_id, "confirm": value}
-                response = session.get(download_url, params=params, stream=True)
-                break
-        # Create a temporary file to save the download
-        if not filename:
-            filename = "downloaded_file"
-        if cls._temp_dir is None:
-            cls._temp_dir = tempfile.TemporaryDirectory()
-            atexit.register(cls._cleanup)
-        cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
-        # Write the content to the temporary file
-        with open(cls._temp_file_path, "wb") as f:
-            for chunk in response.iter_content(32768):
-                if chunk:
-                    f.write(chunk)
-        print(f"File saved to: {cls._temp_file_path}")
-        return cls._temp_file_path
-    @staticmethod
-    def _extract_file_id(url):
-        # Try to extract file ID from '/file/d/' format
-        file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
-        if file_id_match:
-            return file_id_match.group(1)
-        # If not found, try to extract from 'open?id=' format
-        parsed_url = urlparse(url)
-        query_params = parse_qs(parsed_url.query)
-        if "id" in query_params:
-            return query_params["id"][0]
-        return None
-    @classmethod
-    def _cleanup(cls):
-        if cls._temp_dir:
-            cls._temp_dir.cleanup()
-    @classmethod
-    def get_temp_file_path(cls):
-        return cls._temp_file_path
-def fetch_and_save_pdf(url, filename):
-    # Send a GET request to the URL
-    response = requests.get(url)
-    # Check if the request was successful
-    response.raise_for_status()
-    # Create a temporary directory
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Construct the full path for the file
-        temp_file_path = os.path.join(temp_dir, filename)
-        # Write the content to the temporary file
-        with open(temp_file_path, "wb") as file:
-            file.write(response.content)
-        print(f"PDF saved to: {temp_file_path}")
-        # Here you can perform operations with the file
-        # The file will be automatically deleted when you exit this block
-    return temp_file_path
-# Example usage:
-# url = "https://example.com/sample.pdf"
-# fetch_and_save_pdf(url, "sample.pdf")
-class ScenarioListPdfMixin:
-    @classmethod
-    def from_pdf(cls, filename_or_url, collapse_pages=False):
-        # Check if the input is a URL
-        if cls.is_url(filename_or_url):
-            # Check if it's a Google Drive URL
-            if "drive.google.com" in filename_or_url:
-                temp_filename = GoogleDriveDownloader.fetch_from_drive(
-                    filename_or_url, "temp_pdf.pdf"
-                )
-            else:
-                # For other URLs, use the previous fetch_and_save_pdf function
-                temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
-            scenarios = list(cls.extract_text_from_pdf(temp_filename))
-        else:
-            # If it's not a URL, assume it's a local file path
-            scenarios = list(cls.extract_text_from_pdf(filename_or_url))
-        if not collapse_pages:
-            return cls(scenarios)
-        else:
-            txt = ""
-            for scenario in scenarios:
-                txt += scenario["text"]
-            from edsl.scenarios import Scenario
-            base_scenario = copy.copy(scenarios[0])
-            base_scenario["text"] = txt
-        return base_scenario
-    @staticmethod
-    def is_url(string):
-        try:
-            result = urlparse(string)
-            return all([result.scheme, result.netloc])
-        except ValueError:
-            return False
-    @classmethod
-    def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
-        """
-        Convert each page of a PDF into an image and create Scenario instances.
-        :param pdf_path: Path to the PDF file.
-        :param image_format: Format of the output images (default is 'jpeg').
-        :return: ScenarioList instance containing the Scenario instances.
-        """
-        import tempfile
-        from pdf2image import convert_from_path
-        from edsl.scenarios import Scenario
-        with tempfile.TemporaryDirectory() as output_folder:
-            # Convert PDF to images
-            images = convert_from_path(pdf_path)
-            scenarios = []
-            # Save each page as an image and create Scenario instances
-            for i, image in enumerate(images):
-                image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
-                image.save(image_path, image_format.upper())
-                scenario = Scenario._from_filepath_image(image_path)
-                scenarios.append(scenario)
-            # print(f"Saved {len(images)} pages as images in {output_folder}")
-            return cls(scenarios)
-    @staticmethod
-    def extract_text_from_pdf(pdf_path):
-        from edsl import Scenario
-        # TODO: Add test case
-        # Ensure the file exists
-        if not os.path.exists(pdf_path):
-            raise FileNotFoundError(f"The file {pdf_path} does not exist.")
-        # Open the PDF file
-        document = fitz.open(pdf_path)
-        # Get the filename from the path
-        filename = os.path.basename(pdf_path)
-        # Iterate through each page and extract text
-        for page_num in range(len(document)):
-            page = document.load_page(page_num)
-            text = page.get_text()
-            # Create a dictionary for the current page
-            page_info = {"filename": filename, "page": page_num + 1, "text": text}
-            yield Scenario(page_info)
-    def create_hello_world_pdf(pdf_path):
-        # LaTeX content
-        latex_content = r"""
-        \documentclass{article}
-        \title{Hello World}
-        \author{John}
-        \date{\today}
-        \begin{document}
-        \maketitle
-        \section{Hello, World!}
-        This is a simple hello world example created with LaTeX and Python.
-        \end{document}
-        """
-        # Create a .tex file
-        tex_filename = pdf_path + ".tex"
-        with open(tex_filename, "w") as tex_file:
-            tex_file.write(latex_content)
-        # Compile the .tex file to PDF
-        subprocess.run(["pdflatex", tex_filename], check=True)
-        # Optionally, clean up auxiliary files generated by pdflatex
-        aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
-        for aux_file in aux_files:
-            try:
-                os.remove(aux_file)
-            except FileNotFoundError:
-                pass
-if __name__ == "__main__":
-    pass
-    # from edsl import ScenarioList
-    # class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
-    #     pass
-    # #ScenarioListNew.create_hello_world_pdf('hello_world')
-    # #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
-    # #print(scenarios)
-    # from edsl import ScenarioList, QuestionFreeText
-    # homo_silicus = ScenarioList.from_pdf('w31122.pdf')
-    # q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
-    # results = q.by(homo_silicus).run(progress_bar = True)
-    # results.select('scenario.page', 'answer.key_point').order_by('page').print()
+import os
+import re
+import copy
+import atexit
+import tempfile
+import subprocess
+class GoogleDriveDownloader:
+    _temp_dir = None
+    _temp_file_path = None
+    @classmethod
+    def fetch_from_drive(cls, url, filename=None):
+        import requests
+        # Extract file ID from the URL
+        file_id = cls._extract_file_id(url)
+        if not file_id:
+            raise ValueError("Invalid Google Drive URL")
+        # Construct the download URL
+        download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
+        # Send a GET request to the URL
+        session = requests.Session()
+        response = session.get(download_url, stream=True)
+        response.raise_for_status()
+        # Check for large file download prompt
+        for key, value in response.cookies.items():
+            if key.startswith("download_warning"):
+                params = {"id": file_id, "confirm": value}
+                response = session.get(download_url, params=params, stream=True)
+                break
+        # Create a temporary file to save the download
+        if not filename:
+            filename = "downloaded_file"
+        if cls._temp_dir is None:
+            cls._temp_dir = tempfile.TemporaryDirectory()
+            atexit.register(cls._cleanup)
+        cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
+        # Write the content to the temporary file
+        with open(cls._temp_file_path, "wb") as f:
+            for chunk in response.iter_content(32768):
+                if chunk:
+                    f.write(chunk)
+        print(f"File saved to: {cls._temp_file_path}")
+        return cls._temp_file_path
+    @staticmethod
+    def _extract_file_id(url):
+        from urllib.parse import urlparse, parse_qs
+        # Try to extract file ID from '/file/d/' format
+        file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
+        if file_id_match:
+            return file_id_match.group(1)
+        # If not found, try to extract from 'open?id=' format
+        parsed_url = urlparse(url)
+        query_params = parse_qs(parsed_url.query)
+        if "id" in query_params:
+            return query_params["id"][0]
+        return None
+    @classmethod
+    def _cleanup(cls):
+        if cls._temp_dir:
+            cls._temp_dir.cleanup()
+    @classmethod
+    def get_temp_file_path(cls):
+        return cls._temp_file_path
+def fetch_and_save_pdf(url, filename):
+    # Send a GET request to the URL
+    import requests
+    response = requests.get(url)
+    # Check if the request was successful
+    response.raise_for_status()
+    # Create a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Construct the full path for the file
+        temp_file_path = os.path.join(temp_dir, filename)
+        # Write the content to the temporary file
+        with open(temp_file_path, "wb") as file:
+            file.write(response.content)
+        print(f"PDF saved to: {temp_file_path}")
+        # Here you can perform operations with the file
+        # The file will be automatically deleted when you exit this block
+    return temp_file_path
+class ScenarioListPdfMixin:
+    @classmethod
+    def from_pdf(cls, filename_or_url, collapse_pages=False):
+        # Check if the input is a URL
+        if cls.is_url(filename_or_url):
+            # Check if it's a Google Drive URL
+            if "drive.google.com" in filename_or_url:
+                temp_filename = GoogleDriveDownloader.fetch_from_drive(
+                    filename_or_url, "temp_pdf.pdf"
+                )
+            else:
+                # For other URLs, use the previous fetch_and_save_pdf function
+                temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
+            scenarios = list(cls.extract_text_from_pdf(temp_filename))
+        else:
+            # If it's not a URL, assume it's a local file path
+            scenarios = list(cls.extract_text_from_pdf(filename_or_url))
+        if not collapse_pages:
+            return cls(scenarios)
+        else:
+            txt = ""
+            for scenario in scenarios:
+                txt += scenario["text"]
+            from edsl.scenarios import Scenario
+            base_scenario = copy.copy(scenarios[0])
+            base_scenario["text"] = txt
+        return base_scenario
+    @staticmethod
+    def is_url(string):
+        from urllib.parse import urlparse
+        try:
+            result = urlparse(string)
+            return all([result.scheme, result.netloc])
+        except ValueError:
+            return False
+    @classmethod
+    def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
+        """
+        Convert each page of a PDF into an image and create Scenario instances.
+        :param pdf_path: Path to the PDF file.
+        :param image_format: Format of the output images (default is 'jpeg').
+        :return: ScenarioList instance containing the Scenario instances.
+        """
+        import tempfile
+        from pdf2image import convert_from_path
+        from edsl.scenarios import Scenario
+        with tempfile.TemporaryDirectory() as output_folder:
+            # Convert PDF to images
+            images = convert_from_path(pdf_path)
+            scenarios = []
+            # Save each page as an image and create Scenario instances
+            for i, image in enumerate(images):
+                image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
+                image.save(image_path, image_format.upper())
+                scenario = Scenario._from_filepath_image(image_path)
+                scenarios.append(scenario)
+            # print(f"Saved {len(images)} pages as images in {output_folder}")
+            return cls(scenarios)
+    @staticmethod
+    def extract_text_from_pdf(pdf_path):
+        from edsl.scenarios.Scenario import Scenario
+        import fitz  # PyMuPDF
+        # TODO: Add test case
+        # Ensure the file exists
+        if not os.path.exists(pdf_path):
+            raise FileNotFoundError(f"The file {pdf_path} does not exist.")
+        # Open the PDF file
+        document = fitz.open(pdf_path)
+        # Get the filename from the path
+        filename = os.path.basename(pdf_path)
+        # Iterate through each page and extract text
+        for page_num in range(len(document)):
+            page = document.load_page(page_num)
+            text = page.get_text()
+            # Create a dictionary for the current page
+            page_info = {"filename": filename, "page": page_num + 1, "text": text}
+            yield Scenario(page_info)
+    def create_hello_world_pdf(pdf_path):
+        # LaTeX content
+        latex_content = r"""
+        \documentclass{article}
+        \title{Hello World}
+        \author{John}
+        \date{\today}
+        \begin{document}
+        \maketitle
+        \section{Hello, World!}
+        This is a simple hello world example created with LaTeX and Python.
+        \end{document}
+        """
+        # Create a .tex file
+        tex_filename = pdf_path + ".tex"
+        with open(tex_filename, "w") as tex_file:
+            tex_file.write(latex_content)
+        # Compile the .tex file to PDF
+        subprocess.run(["pdflatex", tex_filename], check=True)
+        # Optionally, clean up auxiliary files generated by pdflatex
+        aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
+        for aux_file in aux_files:
+            try:
+                os.remove(aux_file)
+            except FileNotFoundError:
+                pass
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()

edsl/scenarios/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from edsl.scenarios.Scenario import Scenario
-from edsl.scenarios.ScenarioList import ScenarioList
-# from edsl.scenarios.FileStore import FileStore
+from edsl.scenarios.Scenario import Scenario
+from edsl.scenarios.ScenarioList import ScenarioList
+from edsl.scenarios.FileStore import FileStore

edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev5__py3-none-any.whl

edsl 0.1.39.dev3py3-none-any.whl → 0.1.39.dev5py3-none-any.whl