PyPI - edsl - Versions diffs - 0.1.39.dev2__py3-none-any.whl → 0.1.39.dev3__py3-none-any.whl - Mend

edsl 0.1.39.dev2py3-none-any.whl → 0.1.39.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (334) hide show

edsl/Base.py +332 -385
edsl/BaseDiff.py +260 -260
edsl/TemplateLoader.py +24 -24
edsl/__init__.py +49 -57
edsl/__version__.py +1 -1
edsl/agents/Agent.py +867 -1079
edsl/agents/AgentList.py +413 -551
edsl/agents/Invigilator.py +233 -285
edsl/agents/InvigilatorBase.py +270 -254
edsl/agents/PromptConstructor.py +354 -252
edsl/agents/__init__.py +3 -2
edsl/agents/descriptors.py +99 -99
edsl/agents/prompt_helpers.py +129 -129
edsl/auto/AutoStudy.py +117 -117
edsl/auto/StageBase.py +230 -230
edsl/auto/StageGenerateSurvey.py +178 -178
edsl/auto/StageLabelQuestions.py +125 -125
edsl/auto/StagePersona.py +61 -61
edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
edsl/auto/StagePersonaDimensionValues.py +74 -74
edsl/auto/StagePersonaDimensions.py +69 -69
edsl/auto/StageQuestions.py +73 -73
edsl/auto/SurveyCreatorPipeline.py +21 -21
edsl/auto/utilities.py +224 -224
edsl/base/Base.py +279 -279
edsl/config.py +157 -177
edsl/conversation/Conversation.py +290 -290
edsl/conversation/car_buying.py +58 -59
edsl/conversation/chips.py +95 -95
edsl/conversation/mug_negotiation.py +81 -81
edsl/conversation/next_speaker_utilities.py +93 -93
edsl/coop/PriceFetcher.py +54 -54
edsl/coop/__init__.py +2 -2
edsl/coop/coop.py +1028 -1090
edsl/coop/utils.py +131 -131
edsl/data/Cache.py +555 -562
edsl/data/CacheEntry.py +233 -230
edsl/data/CacheHandler.py +149 -170
edsl/data/RemoteCacheSync.py +78 -78
edsl/data/SQLiteDict.py +292 -292
edsl/data/__init__.py +4 -5
edsl/data/orm.py +10 -10
edsl/data_transfer_models.py +73 -74
edsl/enums.py +175 -195
edsl/exceptions/BaseException.py +21 -21
edsl/exceptions/__init__.py +54 -54
edsl/exceptions/agents.py +42 -54
edsl/exceptions/cache.py +5 -5
edsl/exceptions/configuration.py +16 -16
edsl/exceptions/coop.py +10 -10
edsl/exceptions/data.py +14 -14
edsl/exceptions/general.py +34 -34
edsl/exceptions/jobs.py +33 -33
edsl/exceptions/language_models.py +63 -63
edsl/exceptions/prompts.py +15 -15
edsl/exceptions/questions.py +91 -109
edsl/exceptions/results.py +29 -29
edsl/exceptions/scenarios.py +22 -29
edsl/exceptions/surveys.py +37 -37
edsl/inference_services/AnthropicService.py +87 -84
edsl/inference_services/AwsBedrock.py +120 -118
edsl/inference_services/AzureAI.py +217 -215
edsl/inference_services/DeepInfraService.py +18 -18
edsl/inference_services/GoogleService.py +148 -139
edsl/inference_services/GroqService.py +20 -20
edsl/inference_services/InferenceServiceABC.py +147 -80
edsl/inference_services/InferenceServicesCollection.py +97 -122
edsl/inference_services/MistralAIService.py +123 -120
edsl/inference_services/OllamaService.py +18 -18
edsl/inference_services/OpenAIService.py +224 -221
edsl/inference_services/PerplexityService.py +163 -160
edsl/inference_services/TestService.py +89 -92
edsl/inference_services/TogetherAIService.py +170 -170
edsl/inference_services/models_available_cache.py +118 -118
edsl/inference_services/rate_limits_cache.py +25 -25
edsl/inference_services/registry.py +41 -41
edsl/inference_services/write_available.py +10 -10
edsl/jobs/Answers.py +56 -43
edsl/jobs/Jobs.py +898 -757
edsl/jobs/JobsChecks.py +147 -172
edsl/jobs/JobsPrompts.py +268 -270
edsl/jobs/JobsRemoteInferenceHandler.py +239 -287
edsl/jobs/__init__.py +1 -1
edsl/jobs/buckets/BucketCollection.py +63 -104
edsl/jobs/buckets/ModelBuckets.py +65 -65
edsl/jobs/buckets/TokenBucket.py +251 -283
edsl/jobs/interviews/Interview.py +661 -358
edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
edsl/jobs/interviews/InterviewStatistic.py +63 -63
edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
edsl/jobs/interviews/InterviewStatusLog.py +92 -92
edsl/jobs/interviews/ReportErrors.py +66 -66
edsl/jobs/interviews/interview_status_enum.py +9 -9
edsl/jobs/runners/JobsRunnerAsyncio.py +466 -421
edsl/jobs/runners/JobsRunnerStatus.py +330 -330
edsl/jobs/tasks/QuestionTaskCreator.py +242 -244
edsl/jobs/tasks/TaskCreators.py +64 -64
edsl/jobs/tasks/TaskHistory.py +450 -449
edsl/jobs/tasks/TaskStatusLog.py +23 -23
edsl/jobs/tasks/task_status_enum.py +163 -161
edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
edsl/jobs/tokens/TokenUsage.py +34 -34
edsl/language_models/KeyLookup.py +30 -0
edsl/language_models/LanguageModel.py +668 -571
edsl/language_models/ModelList.py +155 -153
edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
edsl/language_models/__init__.py +3 -2
edsl/language_models/fake_openai_call.py +15 -15
edsl/language_models/fake_openai_service.py +61 -61
edsl/language_models/registry.py +190 -180
edsl/language_models/repair.py +156 -156
edsl/language_models/unused/ReplicateBase.py +83 -0
edsl/language_models/utilities.py +64 -65
edsl/notebooks/Notebook.py +258 -263
edsl/notebooks/__init__.py +1 -1
edsl/prompts/Prompt.py +362 -352
edsl/prompts/__init__.py +2 -2
edsl/questions/AnswerValidatorMixin.py +289 -334
edsl/questions/QuestionBase.py +664 -509
edsl/questions/QuestionBaseGenMixin.py +161 -165
edsl/questions/QuestionBasePromptsMixin.py +217 -221
edsl/questions/QuestionBudget.py +227 -227
edsl/questions/QuestionCheckBox.py +359 -359
edsl/questions/QuestionExtract.py +182 -182
edsl/questions/QuestionFreeText.py +114 -113
edsl/questions/QuestionFunctional.py +166 -166
edsl/questions/QuestionList.py +231 -229
edsl/questions/QuestionMultipleChoice.py +286 -330
edsl/questions/QuestionNumerical.py +153 -151
edsl/questions/QuestionRank.py +324 -314
edsl/questions/Quick.py +41 -41
edsl/questions/RegisterQuestionsMeta.py +71 -71
edsl/questions/ResponseValidatorABC.py +174 -200
edsl/questions/SimpleAskMixin.py +73 -74
edsl/questions/__init__.py +26 -27
edsl/questions/compose_questions.py +98 -98
edsl/questions/decorators.py +21 -21
edsl/questions/derived/QuestionLikertFive.py +76 -76
edsl/questions/derived/QuestionLinearScale.py +87 -90
edsl/questions/derived/QuestionTopK.py +93 -93
edsl/questions/derived/QuestionYesNo.py +82 -82
edsl/questions/descriptors.py +413 -427
edsl/questions/prompt_templates/question_budget.jinja +13 -13
edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
edsl/questions/prompt_templates/question_extract.jinja +11 -11
edsl/questions/prompt_templates/question_free_text.jinja +3 -3
edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
edsl/questions/prompt_templates/question_list.jinja +17 -17
edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
edsl/questions/prompt_templates/question_numerical.jinja +36 -36
edsl/questions/question_registry.py +177 -177
edsl/questions/settings.py +12 -12
edsl/questions/templates/budget/answering_instructions.jinja +7 -7
edsl/questions/templates/budget/question_presentation.jinja +7 -7
edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
edsl/questions/templates/extract/answering_instructions.jinja +7 -7
edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
edsl/questions/templates/list/answering_instructions.jinja +3 -3
edsl/questions/templates/list/question_presentation.jinja +5 -5
edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
edsl/questions/templates/numerical/question_presentation.jinja +6 -6
edsl/questions/templates/rank/answering_instructions.jinja +11 -11
edsl/questions/templates/rank/question_presentation.jinja +15 -15
edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
edsl/questions/templates/top_k/question_presentation.jinja +22 -22
edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
edsl/results/CSSParameterizer.py +108 -108
edsl/results/Dataset.py +424 -587
edsl/results/DatasetExportMixin.py +731 -653
edsl/results/DatasetTree.py +275 -295
edsl/results/Result.py +465 -451
edsl/results/Results.py +1165 -1172
edsl/results/ResultsDBMixin.py +238 -0
edsl/results/ResultsExportMixin.py +43 -45
edsl/results/ResultsFetchMixin.py +33 -33
edsl/results/ResultsGGMixin.py +121 -121
edsl/results/ResultsToolsMixin.py +98 -98
edsl/results/Selector.py +135 -145
edsl/results/TableDisplay.py +198 -125
edsl/results/__init__.py +2 -2
edsl/results/table_display.css +77 -77
edsl/results/tree_explore.py +115 -115
edsl/scenarios/FileStore.py +632 -511
edsl/scenarios/Scenario.py +601 -498
edsl/scenarios/ScenarioHtmlMixin.py +64 -65
edsl/scenarios/ScenarioJoin.py +127 -131
edsl/scenarios/ScenarioList.py +1287 -1430
edsl/scenarios/ScenarioListExportMixin.py +52 -45
edsl/scenarios/ScenarioListPdfMixin.py +261 -239
edsl/scenarios/__init__.py +4 -3
edsl/shared.py +1 -1
edsl/study/ObjectEntry.py +173 -173
edsl/study/ProofOfWork.py +113 -113
edsl/study/SnapShot.py +80 -80
edsl/study/Study.py +528 -521
edsl/study/__init__.py +4 -4
edsl/surveys/DAG.py +148 -148
edsl/surveys/Memory.py +31 -31
edsl/surveys/MemoryPlan.py +244 -244
edsl/surveys/Rule.py +326 -327
edsl/surveys/RuleCollection.py +387 -385
edsl/surveys/Survey.py +1801 -1229
edsl/surveys/SurveyCSS.py +261 -273
edsl/surveys/SurveyExportMixin.py +259 -259
edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +179 -181
edsl/surveys/SurveyQualtricsImport.py +284 -284
edsl/surveys/__init__.py +3 -5
edsl/surveys/base.py +53 -53
edsl/surveys/descriptors.py +56 -60
edsl/surveys/instructions/ChangeInstruction.py +49 -48
edsl/surveys/instructions/Instruction.py +65 -56
edsl/surveys/instructions/InstructionCollection.py +77 -82
edsl/templates/error_reporting/base.html +23 -23
edsl/templates/error_reporting/exceptions_by_model.html +34 -34
edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
edsl/templates/error_reporting/exceptions_by_type.html +16 -16
edsl/templates/error_reporting/interview_details.html +115 -115
edsl/templates/error_reporting/interviews.html +19 -19
edsl/templates/error_reporting/overview.html +4 -4
edsl/templates/error_reporting/performance_plot.html +1 -1
edsl/templates/error_reporting/report.css +73 -73
edsl/templates/error_reporting/report.html +117 -117
edsl/templates/error_reporting/report.js +25 -25
edsl/tools/__init__.py +1 -1
edsl/tools/clusters.py +192 -192
edsl/tools/embeddings.py +27 -27
edsl/tools/embeddings_plotting.py +118 -118
edsl/tools/plotting.py +112 -112
edsl/tools/summarize.py +18 -18
edsl/utilities/SystemInfo.py +28 -28
edsl/utilities/__init__.py +22 -22
edsl/utilities/ast_utilities.py +25 -25
edsl/utilities/data/Registry.py +6 -6
edsl/utilities/data/__init__.py +1 -1
edsl/utilities/data/scooter_results.json +1 -1
edsl/utilities/decorators.py +77 -77
edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
edsl/utilities/interface.py +627 -627
edsl/utilities/naming_utilities.py +263 -263
edsl/utilities/repair_functions.py +28 -28
edsl/utilities/restricted_python.py +70 -70
edsl/utilities/utilities.py +424 -436
{edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/LICENSE +21 -21
{edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/METADATA +10 -12
edsl-0.1.39.dev3.dist-info/RECORD +277 -0
edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
edsl/agents/QuestionOptionProcessor.py +0 -172
edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
edsl/coop/CoopFunctionsMixin.py +0 -15
edsl/coop/ExpectedParrotKeyHandler.py +0 -125
edsl/exceptions/inference_services.py +0 -5
edsl/inference_services/AvailableModelCacheHandler.py +0 -184
edsl/inference_services/AvailableModelFetcher.py +0 -209
edsl/inference_services/ServiceAvailability.py +0 -135
edsl/inference_services/data_structures.py +0 -62
edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -188
edsl/jobs/FetchInvigilator.py +0 -40
edsl/jobs/InterviewTaskManager.py +0 -98
edsl/jobs/InterviewsConstructor.py +0 -48
edsl/jobs/JobsComponentConstructor.py +0 -189
edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
edsl/jobs/RequestTokenEstimator.py +0 -30
edsl/jobs/buckets/TokenBucketAPI.py +0 -211
edsl/jobs/buckets/TokenBucketClient.py +0 -191
edsl/jobs/decorators.py +0 -35
edsl/jobs/jobs_status_enums.py +0 -9
edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
edsl/language_models/ComputeCost.py +0 -63
edsl/language_models/PriceManager.py +0 -127
edsl/language_models/RawResponseHandler.py +0 -106
edsl/language_models/ServiceDataSources.py +0 -0
edsl/language_models/key_management/KeyLookup.py +0 -63
edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
edsl/language_models/key_management/KeyLookupCollection.py +0 -38
edsl/language_models/key_management/__init__.py +0 -0
edsl/language_models/key_management/models.py +0 -131
edsl/notebooks/NotebookToLaTeX.py +0 -142
edsl/questions/ExceptionExplainer.py +0 -77
edsl/questions/HTMLQuestion.py +0 -103
edsl/questions/LoopProcessor.py +0 -149
edsl/questions/QuestionMatrix.py +0 -265
edsl/questions/ResponseValidatorFactory.py +0 -28
edsl/questions/templates/matrix/__init__.py +0 -1
edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
edsl/questions/templates/matrix/question_presentation.jinja +0 -20
edsl/results/MarkdownToDocx.py +0 -122
edsl/results/MarkdownToPDF.py +0 -111
edsl/results/TextEditor.py +0 -50
edsl/results/smart_objects.py +0 -96
edsl/results/table_data_class.py +0 -12
edsl/results/table_renderers.py +0 -118
edsl/scenarios/ConstructDownloadLink.py +0 -109
edsl/scenarios/DirectoryScanner.py +0 -96
edsl/scenarios/DocumentChunker.py +0 -102
edsl/scenarios/DocxScenario.py +0 -16
edsl/scenarios/PdfExtractor.py +0 -40
edsl/scenarios/ScenarioSelector.py +0 -156
edsl/scenarios/file_methods.py +0 -85
edsl/scenarios/handlers/__init__.py +0 -13
edsl/scenarios/handlers/csv.py +0 -38
edsl/scenarios/handlers/docx.py +0 -76
edsl/scenarios/handlers/html.py +0 -37
edsl/scenarios/handlers/json.py +0 -111
edsl/scenarios/handlers/latex.py +0 -5
edsl/scenarios/handlers/md.py +0 -51
edsl/scenarios/handlers/pdf.py +0 -68
edsl/scenarios/handlers/png.py +0 -39
edsl/scenarios/handlers/pptx.py +0 -105
edsl/scenarios/handlers/py.py +0 -294
edsl/scenarios/handlers/sql.py +0 -313
edsl/scenarios/handlers/sqlite.py +0 -149
edsl/scenarios/handlers/txt.py +0 -33
edsl/surveys/ConstructDAG.py +0 -92
edsl/surveys/EditSurvey.py +0 -221
edsl/surveys/InstructionHandler.py +0 -100
edsl/surveys/MemoryManagement.py +0 -72
edsl/surveys/RuleManager.py +0 -172
edsl/surveys/Simulator.py +0 -75
edsl/surveys/SurveyToApp.py +0 -141
edsl/utilities/PrettyList.py +0 -56
edsl/utilities/is_notebook.py +0 -18
edsl/utilities/is_valid_variable_name.py +0 -11
edsl/utilities/remove_edsl_version.py +0 -24
edsl-0.1.39.dev2.dist-info/RECORD +0 -352
{edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev3.dist-info}/WHEEL +0 -0

edsl/scenarios/ScenarioListExportMixin.py CHANGED Viewed

@@ -1,45 +1,52 @@
-"""Mixin class for exporting results."""
-from functools import wraps
-from edsl.results.DatasetExportMixin import DatasetExportMixin
-def to_dataset(func):
-    """Convert the object to a Dataset object before calling the function."""
-    @wraps(func)
-    def wrapper(self, *args, **kwargs):
-        """Return the function with the Results object converted to a Dataset object."""
-        if self.__class__.__name__ == "ScenarioList":
-            return func(self.to_dataset(), *args, **kwargs)
-        else:
-            raise Exception(
-                f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
-            )
-    return wrapper
-def decorate_methods_from_mixin(cls, mixin_cls):
-    for attr_name, attr_value in mixin_cls.__dict__.items():
-        if callable(attr_value) and not attr_name.startswith("__"):
-            setattr(cls, attr_name, to_dataset(attr_value))
-    return cls
-# @decorate_all_methods
-class ScenarioListExportMixin(DatasetExportMixin):
-    """Mixin class for exporting Results objects."""
-    def __init_subclass__(cls, **kwargs):
-        super().__init_subclass__(**kwargs)
-        decorate_methods_from_mixin(cls, DatasetExportMixin)
-    def to_docx(self, filename: str):
-        """Export the ScenarioList to a .docx file."""
-        dataset = self.to_dataset()
-        from edsl.results.DatasetTree import Tree
-        tree = Tree(dataset)
-        tree.construct_tree()
-        tree.to_docx(filename)
+"""Mixin class for exporting results."""
+from functools import wraps
+from edsl.results.DatasetExportMixin import DatasetExportMixin
+def to_dataset(func):
+    """Convert the object to a Dataset object before calling the function."""
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        """Return the function with the Results object converted to a Dataset object."""
+        if self.__class__.__name__ == "ScenarioList":
+            return func(self.to_dataset(), *args, **kwargs)
+        else:
+            raise Exception(
+                f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
+            )
+    return wrapper
+def decorate_methods_from_mixin(cls, mixin_cls):
+    for attr_name, attr_value in mixin_cls.__dict__.items():
+        if callable(attr_value) and not attr_name.startswith("__"):
+            setattr(cls, attr_name, to_dataset(attr_value))
+    return cls
+# def decorate_all_methods(cls):
+#     for attr_name, attr_value in cls.__dict__.items():
+#         if callable(attr_value):
+#             setattr(cls, attr_name, to_dataset(attr_value))
+#     return cls
+# @decorate_all_methods
+class ScenarioListExportMixin(DatasetExportMixin):
+    """Mixin class for exporting Results objects."""
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        decorate_methods_from_mixin(cls, DatasetExportMixin)
+    def to_docx(self, filename: str):
+        """Export the ScenarioList to a .docx file."""
+        dataset = self.to_dataset()
+        from edsl.results.DatasetTree import Tree
+        tree = Tree(dataset)
+        tree.construct_tree()
+        tree.to_docx(filename)

edsl/scenarios/ScenarioListPdfMixin.py CHANGED Viewed

@@ -1,239 +1,261 @@
-import os
-import re
-import copy
-import atexit
-import tempfile
-import subprocess
-class GoogleDriveDownloader:
-    _temp_dir = None
-    _temp_file_path = None
-    @classmethod
-    def fetch_from_drive(cls, url, filename=None):
-        import requests
-        # Extract file ID from the URL
-        file_id = cls._extract_file_id(url)
-        if not file_id:
-            raise ValueError("Invalid Google Drive URL")
-        # Construct the download URL
-        download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
-        # Send a GET request to the URL
-        session = requests.Session()
-        response = session.get(download_url, stream=True)
-        response.raise_for_status()
-        # Check for large file download prompt
-        for key, value in response.cookies.items():
-            if key.startswith("download_warning"):
-                params = {"id": file_id, "confirm": value}
-                response = session.get(download_url, params=params, stream=True)
-                break
-        # Create a temporary file to save the download
-        if not filename:
-            filename = "downloaded_file"
-        if cls._temp_dir is None:
-            cls._temp_dir = tempfile.TemporaryDirectory()
-            atexit.register(cls._cleanup)
-        cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
-        # Write the content to the temporary file
-        with open(cls._temp_file_path, "wb") as f:
-            for chunk in response.iter_content(32768):
-                if chunk:
-                    f.write(chunk)
-        print(f"File saved to: {cls._temp_file_path}")
-        return cls._temp_file_path
-    @staticmethod
-    def _extract_file_id(url):
-        from urllib.parse import urlparse, parse_qs
-        # Try to extract file ID from '/file/d/' format
-        file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
-        if file_id_match:
-            return file_id_match.group(1)
-        # If not found, try to extract from 'open?id=' format
-        parsed_url = urlparse(url)
-        query_params = parse_qs(parsed_url.query)
-        if "id" in query_params:
-            return query_params["id"][0]
-        return None
-    @classmethod
-    def _cleanup(cls):
-        if cls._temp_dir:
-            cls._temp_dir.cleanup()
-    @classmethod
-    def get_temp_file_path(cls):
-        return cls._temp_file_path
-def fetch_and_save_pdf(url, filename):
-    # Send a GET request to the URL
-    import requests
-    response = requests.get(url)
-    # Check if the request was successful
-    response.raise_for_status()
-    # Create a temporary directory
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Construct the full path for the file
-        temp_file_path = os.path.join(temp_dir, filename)
-        # Write the content to the temporary file
-        with open(temp_file_path, "wb") as file:
-            file.write(response.content)
-        print(f"PDF saved to: {temp_file_path}")
-        # Here you can perform operations with the file
-        # The file will be automatically deleted when you exit this block
-    return temp_file_path
-class ScenarioListPdfMixin:
-    @classmethod
-    def from_pdf(cls, filename_or_url, collapse_pages=False):
-        # Check if the input is a URL
-        if cls.is_url(filename_or_url):
-            # Check if it's a Google Drive URL
-            if "drive.google.com" in filename_or_url:
-                temp_filename = GoogleDriveDownloader.fetch_from_drive(
-                    filename_or_url, "temp_pdf.pdf"
-                )
-            else:
-                # For other URLs, use the previous fetch_and_save_pdf function
-                temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
-            scenarios = list(cls.extract_text_from_pdf(temp_filename))
-        else:
-            # If it's not a URL, assume it's a local file path
-            scenarios = list(cls.extract_text_from_pdf(filename_or_url))
-        if not collapse_pages:
-            return cls(scenarios)
-        else:
-            txt = ""
-            for scenario in scenarios:
-                txt += scenario["text"]
-            from edsl.scenarios import Scenario
-            base_scenario = copy.copy(scenarios[0])
-            base_scenario["text"] = txt
-        return base_scenario
-    @staticmethod
-    def is_url(string):
-        from urllib.parse import urlparse
-        try:
-            result = urlparse(string)
-            return all([result.scheme, result.netloc])
-        except ValueError:
-            return False
-    @classmethod
-    def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
-        """
-        Convert each page of a PDF into an image and create Scenario instances.
-        :param pdf_path: Path to the PDF file.
-        :param image_format: Format of the output images (default is 'jpeg').
-        :return: ScenarioList instance containing the Scenario instances.
-        """
-        import tempfile
-        from pdf2image import convert_from_path
-        from edsl.scenarios import Scenario
-        with tempfile.TemporaryDirectory() as output_folder:
-            # Convert PDF to images
-            images = convert_from_path(pdf_path)
-            scenarios = []
-            # Save each page as an image and create Scenario instances
-            for i, image in enumerate(images):
-                image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
-                image.save(image_path, image_format.upper())
-                scenario = Scenario._from_filepath_image(image_path)
-                scenarios.append(scenario)
-            # print(f"Saved {len(images)} pages as images in {output_folder}")
-            return cls(scenarios)
-    @staticmethod
-    def extract_text_from_pdf(pdf_path):
-        from edsl.scenarios.Scenario import Scenario
-        import fitz  # PyMuPDF
-        # TODO: Add test case
-        # Ensure the file exists
-        if not os.path.exists(pdf_path):
-            raise FileNotFoundError(f"The file {pdf_path} does not exist.")
-        # Open the PDF file
-        document = fitz.open(pdf_path)
-        # Get the filename from the path
-        filename = os.path.basename(pdf_path)
-        # Iterate through each page and extract text
-        for page_num in range(len(document)):
-            page = document.load_page(page_num)
-            text = page.get_text()
-            # Create a dictionary for the current page
-            page_info = {"filename": filename, "page": page_num + 1, "text": text}
-            yield Scenario(page_info)
-    def create_hello_world_pdf(pdf_path):
-        # LaTeX content
-        latex_content = r"""
-        \documentclass{article}
-        \title{Hello World}
-        \author{John}
-        \date{\today}
-        \begin{document}
-        \maketitle
-        \section{Hello, World!}
-        This is a simple hello world example created with LaTeX and Python.
-        \end{document}
-        """
-        # Create a .tex file
-        tex_filename = pdf_path + ".tex"
-        with open(tex_filename, "w") as tex_file:
-            tex_file.write(latex_content)
-        # Compile the .tex file to PDF
-        subprocess.run(["pdflatex", tex_filename], check=True)
-        # Optionally, clean up auxiliary files generated by pdflatex
-        aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
-        for aux_file in aux_files:
-            try:
-                os.remove(aux_file)
-            except FileNotFoundError:
-                pass
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
+import fitz  # PyMuPDF
+import os
+import copy
+import subprocess
+import requests
+import tempfile
+import os
+# import urllib.parse as urlparse
+from urllib.parse import urlparse
+# from edsl import Scenario
+import requests
+import re
+import tempfile
+import os
+import atexit
+from urllib.parse import urlparse, parse_qs
+class GoogleDriveDownloader:
+    _temp_dir = None
+    _temp_file_path = None
+    @classmethod
+    def fetch_from_drive(cls, url, filename=None):
+        # Extract file ID from the URL
+        file_id = cls._extract_file_id(url)
+        if not file_id:
+            raise ValueError("Invalid Google Drive URL")
+        # Construct the download URL
+        download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
+        # Send a GET request to the URL
+        session = requests.Session()
+        response = session.get(download_url, stream=True)
+        response.raise_for_status()
+        # Check for large file download prompt
+        for key, value in response.cookies.items():
+            if key.startswith("download_warning"):
+                params = {"id": file_id, "confirm": value}
+                response = session.get(download_url, params=params, stream=True)
+                break
+        # Create a temporary file to save the download
+        if not filename:
+            filename = "downloaded_file"
+        if cls._temp_dir is None:
+            cls._temp_dir = tempfile.TemporaryDirectory()
+            atexit.register(cls._cleanup)
+        cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
+        # Write the content to the temporary file
+        with open(cls._temp_file_path, "wb") as f:
+            for chunk in response.iter_content(32768):
+                if chunk:
+                    f.write(chunk)
+        print(f"File saved to: {cls._temp_file_path}")
+        return cls._temp_file_path
+    @staticmethod
+    def _extract_file_id(url):
+        # Try to extract file ID from '/file/d/' format
+        file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
+        if file_id_match:
+            return file_id_match.group(1)
+        # If not found, try to extract from 'open?id=' format
+        parsed_url = urlparse(url)
+        query_params = parse_qs(parsed_url.query)
+        if "id" in query_params:
+            return query_params["id"][0]
+        return None
+    @classmethod
+    def _cleanup(cls):
+        if cls._temp_dir:
+            cls._temp_dir.cleanup()
+    @classmethod
+    def get_temp_file_path(cls):
+        return cls._temp_file_path
+def fetch_and_save_pdf(url, filename):
+    # Send a GET request to the URL
+    response = requests.get(url)
+    # Check if the request was successful
+    response.raise_for_status()
+    # Create a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Construct the full path for the file
+        temp_file_path = os.path.join(temp_dir, filename)
+        # Write the content to the temporary file
+        with open(temp_file_path, "wb") as file:
+            file.write(response.content)
+        print(f"PDF saved to: {temp_file_path}")
+        # Here you can perform operations with the file
+        # The file will be automatically deleted when you exit this block
+    return temp_file_path
+# Example usage:
+# url = "https://example.com/sample.pdf"
+# fetch_and_save_pdf(url, "sample.pdf")
+class ScenarioListPdfMixin:
+    @classmethod
+    def from_pdf(cls, filename_or_url, collapse_pages=False):
+        # Check if the input is a URL
+        if cls.is_url(filename_or_url):
+            # Check if it's a Google Drive URL
+            if "drive.google.com" in filename_or_url:
+                temp_filename = GoogleDriveDownloader.fetch_from_drive(
+                    filename_or_url, "temp_pdf.pdf"
+                )
+            else:
+                # For other URLs, use the previous fetch_and_save_pdf function
+                temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
+            scenarios = list(cls.extract_text_from_pdf(temp_filename))
+        else:
+            # If it's not a URL, assume it's a local file path
+            scenarios = list(cls.extract_text_from_pdf(filename_or_url))
+        if not collapse_pages:
+            return cls(scenarios)
+        else:
+            txt = ""
+            for scenario in scenarios:
+                txt += scenario["text"]
+            from edsl.scenarios import Scenario
+            base_scenario = copy.copy(scenarios[0])
+            base_scenario["text"] = txt
+        return base_scenario
+    @staticmethod
+    def is_url(string):
+        try:
+            result = urlparse(string)
+            return all([result.scheme, result.netloc])
+        except ValueError:
+            return False
+    @classmethod
+    def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
+        """
+        Convert each page of a PDF into an image and create Scenario instances.
+        :param pdf_path: Path to the PDF file.
+        :param image_format: Format of the output images (default is 'jpeg').
+        :return: ScenarioList instance containing the Scenario instances.
+        """
+        import tempfile
+        from pdf2image import convert_from_path
+        from edsl.scenarios import Scenario
+        with tempfile.TemporaryDirectory() as output_folder:
+            # Convert PDF to images
+            images = convert_from_path(pdf_path)
+            scenarios = []
+            # Save each page as an image and create Scenario instances
+            for i, image in enumerate(images):
+                image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
+                image.save(image_path, image_format.upper())
+                scenario = Scenario._from_filepath_image(image_path)
+                scenarios.append(scenario)
+            # print(f"Saved {len(images)} pages as images in {output_folder}")
+            return cls(scenarios)
+    @staticmethod
+    def extract_text_from_pdf(pdf_path):
+        from edsl import Scenario
+        # TODO: Add test case
+        # Ensure the file exists
+        if not os.path.exists(pdf_path):
+            raise FileNotFoundError(f"The file {pdf_path} does not exist.")
+        # Open the PDF file
+        document = fitz.open(pdf_path)
+        # Get the filename from the path
+        filename = os.path.basename(pdf_path)
+        # Iterate through each page and extract text
+        for page_num in range(len(document)):
+            page = document.load_page(page_num)
+            text = page.get_text()
+            # Create a dictionary for the current page
+            page_info = {"filename": filename, "page": page_num + 1, "text": text}
+            yield Scenario(page_info)
+    def create_hello_world_pdf(pdf_path):
+        # LaTeX content
+        latex_content = r"""
+        \documentclass{article}
+        \title{Hello World}
+        \author{John}
+        \date{\today}
+        \begin{document}
+        \maketitle
+        \section{Hello, World!}
+        This is a simple hello world example created with LaTeX and Python.
+        \end{document}
+        """
+        # Create a .tex file
+        tex_filename = pdf_path + ".tex"
+        with open(tex_filename, "w") as tex_file:
+            tex_file.write(latex_content)
+        # Compile the .tex file to PDF
+        subprocess.run(["pdflatex", tex_filename], check=True)
+        # Optionally, clean up auxiliary files generated by pdflatex
+        aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
+        for aux_file in aux_files:
+            try:
+                os.remove(aux_file)
+            except FileNotFoundError:
+                pass
+if __name__ == "__main__":
+    pass
+    # from edsl import ScenarioList
+    # class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
+    #     pass
+    # #ScenarioListNew.create_hello_world_pdf('hello_world')
+    # #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
+    # #print(scenarios)
+    # from edsl import ScenarioList, QuestionFreeText
+    # homo_silicus = ScenarioList.from_pdf('w31122.pdf')
+    # q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
+    # results = q.by(homo_silicus).run(progress_bar = True)
+    # results.select('scenario.page', 'answer.key_point').order_by('page').print()

edsl/scenarios/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
-from edsl.scenarios.Scenario import Scenario
-from edsl.scenarios.ScenarioList import ScenarioList
-from edsl.scenarios.FileStore import FileStore
+from edsl.scenarios.Scenario import Scenario
+from edsl.scenarios.ScenarioList import ScenarioList
+# from edsl.scenarios.FileStore import FileStore

edsl/shared.py CHANGED Viewed

	@@ -1 +1 @@
1	- shared_globals = {}
1	+ shared_globals = {}

edsl 0.1.39.dev2__py3-none-any.whl → 0.1.39.dev3__py3-none-any.whl

edsl 0.1.39.dev2py3-none-any.whl → 0.1.39.dev3py3-none-any.whl