PyPI - edsl - Versions diffs - 0.1.38.dev2__py3-none-any.whl → 0.1.38.dev3__py3-none-any.whl - Mend

edsl 0.1.38.dev2py3-none-any.whl → 0.1.38.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (248) hide show

edsl/Base.py +303 -303
edsl/BaseDiff.py +260 -260
edsl/TemplateLoader.py +24 -24
edsl/__init__.py +49 -49
edsl/__version__.py +1 -1
edsl/agents/Agent.py +858 -858
edsl/agents/AgentList.py +362 -362
edsl/agents/Invigilator.py +222 -222
edsl/agents/InvigilatorBase.py +284 -284
edsl/agents/PromptConstructor.py +353 -353
edsl/agents/__init__.py +3 -3
edsl/agents/descriptors.py +99 -99
edsl/agents/prompt_helpers.py +129 -129
edsl/auto/AutoStudy.py +117 -117
edsl/auto/StageBase.py +230 -230
edsl/auto/StageGenerateSurvey.py +178 -178
edsl/auto/StageLabelQuestions.py +125 -125
edsl/auto/StagePersona.py +61 -61
edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
edsl/auto/StagePersonaDimensionValues.py +74 -74
edsl/auto/StagePersonaDimensions.py +69 -69
edsl/auto/StageQuestions.py +73 -73
edsl/auto/SurveyCreatorPipeline.py +21 -21
edsl/auto/utilities.py +224 -224
edsl/base/Base.py +279 -279
edsl/config.py +149 -149
edsl/conversation/Conversation.py +290 -290
edsl/conversation/car_buying.py +58 -58
edsl/conversation/chips.py +95 -95
edsl/conversation/mug_negotiation.py +81 -81
edsl/conversation/next_speaker_utilities.py +93 -93
edsl/coop/PriceFetcher.py +54 -54
edsl/coop/__init__.py +2 -2
edsl/coop/coop.py +961 -961
edsl/coop/utils.py +131 -131
edsl/data/Cache.py +530 -530
edsl/data/CacheEntry.py +228 -228
edsl/data/CacheHandler.py +149 -149
edsl/data/RemoteCacheSync.py +97 -97
edsl/data/SQLiteDict.py +292 -292
edsl/data/__init__.py +4 -4
edsl/data/orm.py +10 -10
edsl/data_transfer_models.py +73 -73
edsl/enums.py +173 -173
edsl/exceptions/BaseException.py +21 -21
edsl/exceptions/__init__.py +54 -54
edsl/exceptions/agents.py +42 -42
edsl/exceptions/cache.py +5 -5
edsl/exceptions/configuration.py +16 -16
edsl/exceptions/coop.py +10 -10
edsl/exceptions/data.py +14 -14
edsl/exceptions/general.py +34 -34
edsl/exceptions/jobs.py +33 -33
edsl/exceptions/language_models.py +63 -63
edsl/exceptions/prompts.py +15 -15
edsl/exceptions/questions.py +91 -91
edsl/exceptions/results.py +29 -29
edsl/exceptions/scenarios.py +22 -22
edsl/exceptions/surveys.py +37 -37
edsl/inference_services/AnthropicService.py +87 -87
edsl/inference_services/AwsBedrock.py +120 -120
edsl/inference_services/AzureAI.py +217 -217
edsl/inference_services/DeepInfraService.py +18 -18
edsl/inference_services/GoogleService.py +156 -156
edsl/inference_services/GroqService.py +20 -20
edsl/inference_services/InferenceServiceABC.py +147 -147
edsl/inference_services/InferenceServicesCollection.py +97 -97
edsl/inference_services/MistralAIService.py +123 -123
edsl/inference_services/OllamaService.py +18 -18
edsl/inference_services/OpenAIService.py +224 -224
edsl/inference_services/TestService.py +89 -89
edsl/inference_services/TogetherAIService.py +170 -170
edsl/inference_services/models_available_cache.py +118 -118
edsl/inference_services/rate_limits_cache.py +25 -25
edsl/inference_services/registry.py +39 -39
edsl/inference_services/write_available.py +10 -10
edsl/jobs/Answers.py +56 -56
edsl/jobs/Jobs.py +1358 -1358
edsl/jobs/__init__.py +1 -1
edsl/jobs/buckets/BucketCollection.py +63 -63
edsl/jobs/buckets/ModelBuckets.py +65 -65
edsl/jobs/buckets/TokenBucket.py +251 -251
edsl/jobs/interviews/Interview.py +661 -661
edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
edsl/jobs/interviews/InterviewStatistic.py +63 -63
edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
edsl/jobs/interviews/InterviewStatusLog.py +92 -92
edsl/jobs/interviews/ReportErrors.py +66 -66
edsl/jobs/interviews/interview_status_enum.py +9 -9
edsl/jobs/runners/JobsRunnerAsyncio.py +361 -361
edsl/jobs/runners/JobsRunnerStatus.py +332 -332
edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
edsl/jobs/tasks/TaskCreators.py +64 -64
edsl/jobs/tasks/TaskHistory.py +451 -451
edsl/jobs/tasks/TaskStatusLog.py +23 -23
edsl/jobs/tasks/task_status_enum.py +163 -163
edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
edsl/jobs/tokens/TokenUsage.py +34 -34
edsl/language_models/KeyLookup.py +30 -30
edsl/language_models/LanguageModel.py +708 -708
edsl/language_models/ModelList.py +109 -109
edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
edsl/language_models/__init__.py +3 -3
edsl/language_models/fake_openai_call.py +15 -15
edsl/language_models/fake_openai_service.py +61 -61
edsl/language_models/registry.py +137 -137
edsl/language_models/repair.py +156 -156
edsl/language_models/unused/ReplicateBase.py +83 -83
edsl/language_models/utilities.py +64 -64
edsl/notebooks/Notebook.py +258 -258
edsl/notebooks/__init__.py +1 -1
edsl/prompts/Prompt.py +357 -357
edsl/prompts/__init__.py +2 -2
edsl/questions/AnswerValidatorMixin.py +289 -289
edsl/questions/QuestionBase.py +660 -660
edsl/questions/QuestionBaseGenMixin.py +161 -161
edsl/questions/QuestionBasePromptsMixin.py +217 -217
edsl/questions/QuestionBudget.py +227 -227
edsl/questions/QuestionCheckBox.py +359 -359
edsl/questions/QuestionExtract.py +183 -183
edsl/questions/QuestionFreeText.py +114 -114
edsl/questions/QuestionFunctional.py +166 -166
edsl/questions/QuestionList.py +231 -231
edsl/questions/QuestionMultipleChoice.py +286 -286
edsl/questions/QuestionNumerical.py +153 -153
edsl/questions/QuestionRank.py +324 -324
edsl/questions/Quick.py +41 -41
edsl/questions/RegisterQuestionsMeta.py +71 -71
edsl/questions/ResponseValidatorABC.py +174 -174
edsl/questions/SimpleAskMixin.py +73 -73
edsl/questions/__init__.py +26 -26
edsl/questions/compose_questions.py +98 -98
edsl/questions/decorators.py +21 -21
edsl/questions/derived/QuestionLikertFive.py +76 -76
edsl/questions/derived/QuestionLinearScale.py +87 -87
edsl/questions/derived/QuestionTopK.py +93 -93
edsl/questions/derived/QuestionYesNo.py +82 -82
edsl/questions/descriptors.py +413 -413
edsl/questions/prompt_templates/question_budget.jinja +13 -13
edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
edsl/questions/prompt_templates/question_extract.jinja +11 -11
edsl/questions/prompt_templates/question_free_text.jinja +3 -3
edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
edsl/questions/prompt_templates/question_list.jinja +17 -17
edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
edsl/questions/prompt_templates/question_numerical.jinja +36 -36
edsl/questions/question_registry.py +147 -147
edsl/questions/settings.py +12 -12
edsl/questions/templates/budget/answering_instructions.jinja +7 -7
edsl/questions/templates/budget/question_presentation.jinja +7 -7
edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
edsl/questions/templates/extract/answering_instructions.jinja +7 -7
edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
edsl/questions/templates/list/answering_instructions.jinja +3 -3
edsl/questions/templates/list/question_presentation.jinja +5 -5
edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
edsl/questions/templates/numerical/question_presentation.jinja +6 -6
edsl/questions/templates/rank/answering_instructions.jinja +11 -11
edsl/questions/templates/rank/question_presentation.jinja +15 -15
edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
edsl/questions/templates/top_k/question_presentation.jinja +22 -22
edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
edsl/results/Dataset.py +293 -293
edsl/results/DatasetExportMixin.py +717 -717
edsl/results/DatasetTree.py +145 -145
edsl/results/Result.py +456 -456
edsl/results/Results.py +1071 -1071
edsl/results/ResultsDBMixin.py +238 -238
edsl/results/ResultsExportMixin.py +43 -43
edsl/results/ResultsFetchMixin.py +33 -33
edsl/results/ResultsGGMixin.py +121 -121
edsl/results/ResultsToolsMixin.py +98 -98
edsl/results/Selector.py +135 -135
edsl/results/__init__.py +2 -2
edsl/results/tree_explore.py +115 -115
edsl/scenarios/FileStore.py +458 -458
edsl/scenarios/Scenario.py +544 -544
edsl/scenarios/ScenarioHtmlMixin.py +64 -64
edsl/scenarios/ScenarioList.py +1112 -1112
edsl/scenarios/ScenarioListExportMixin.py +52 -52
edsl/scenarios/ScenarioListPdfMixin.py +261 -261
edsl/scenarios/__init__.py +4 -4
edsl/shared.py +1 -1
edsl/study/ObjectEntry.py +173 -173
edsl/study/ProofOfWork.py +113 -113
edsl/study/SnapShot.py +80 -80
edsl/study/Study.py +528 -528
edsl/study/__init__.py +4 -4
edsl/surveys/DAG.py +148 -148
edsl/surveys/Memory.py +31 -31
edsl/surveys/MemoryPlan.py +244 -244
edsl/surveys/Rule.py +326 -326
edsl/surveys/RuleCollection.py +387 -387
edsl/surveys/Survey.py +1787 -1787
edsl/surveys/SurveyCSS.py +261 -261
edsl/surveys/SurveyExportMixin.py +259 -259
edsl/surveys/SurveyFlowVisualizationMixin.py +121 -121
edsl/surveys/SurveyQualtricsImport.py +284 -284
edsl/surveys/__init__.py +3 -3
edsl/surveys/base.py +53 -53
edsl/surveys/descriptors.py +56 -56
edsl/surveys/instructions/ChangeInstruction.py +49 -49
edsl/surveys/instructions/Instruction.py +53 -53
edsl/surveys/instructions/InstructionCollection.py +77 -77
edsl/templates/error_reporting/base.html +23 -23
edsl/templates/error_reporting/exceptions_by_model.html +34 -34
edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
edsl/templates/error_reporting/exceptions_by_type.html +16 -16
edsl/templates/error_reporting/interview_details.html +115 -115
edsl/templates/error_reporting/interviews.html +9 -9
edsl/templates/error_reporting/overview.html +4 -4
edsl/templates/error_reporting/performance_plot.html +1 -1
edsl/templates/error_reporting/report.css +73 -73
edsl/templates/error_reporting/report.html +117 -117
edsl/templates/error_reporting/report.js +25 -25
edsl/tools/__init__.py +1 -1
edsl/tools/clusters.py +192 -192
edsl/tools/embeddings.py +27 -27
edsl/tools/embeddings_plotting.py +118 -118
edsl/tools/plotting.py +112 -112
edsl/tools/summarize.py +18 -18
edsl/utilities/SystemInfo.py +28 -28
edsl/utilities/__init__.py +22 -22
edsl/utilities/ast_utilities.py +25 -25
edsl/utilities/data/Registry.py +6 -6
edsl/utilities/data/__init__.py +1 -1
edsl/utilities/data/scooter_results.json +1 -1
edsl/utilities/decorators.py +77 -77
edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
edsl/utilities/interface.py +627 -627
edsl/utilities/naming_utilities.py +263 -263
edsl/utilities/repair_functions.py +28 -28
edsl/utilities/restricted_python.py +70 -70
edsl/utilities/utilities.py +409 -409
{edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev3.dist-info}/LICENSE +21 -21
{edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev3.dist-info}/METADATA +1 -1
edsl-0.1.38.dev3.dist-info/RECORD +269 -0
edsl-0.1.38.dev2.dist-info/RECORD +0 -269
{edsl-0.1.38.dev2.dist-info → edsl-0.1.38.dev3.dist-info}/WHEEL +0 -0

edsl/scenarios/ScenarioListExportMixin.py CHANGED Viewed

@@ -1,52 +1,52 @@
-"""Mixin class for exporting results."""
-from functools import wraps
-from edsl.results.DatasetExportMixin import DatasetExportMixin
-def to_dataset(func):
-    """Convert the object to a Dataset object before calling the function."""
-    @wraps(func)
-    def wrapper(self, *args, **kwargs):
-        """Return the function with the Results object converted to a Dataset object."""
-        if self.__class__.__name__ == "ScenarioList":
-            return func(self.to_dataset(), *args, **kwargs)
-        else:
-            raise Exception(
-                f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
-            )
-    return wrapper
-def decorate_methods_from_mixin(cls, mixin_cls):
-    for attr_name, attr_value in mixin_cls.__dict__.items():
-        if callable(attr_value) and not attr_name.startswith("__"):
-            setattr(cls, attr_name, to_dataset(attr_value))
-    return cls
-# def decorate_all_methods(cls):
-#     for attr_name, attr_value in cls.__dict__.items():
-#         if callable(attr_value):
-#             setattr(cls, attr_name, to_dataset(attr_value))
-#     return cls
-# @decorate_all_methods
-class ScenarioListExportMixin(DatasetExportMixin):
-    """Mixin class for exporting Results objects."""
-    def __init_subclass__(cls, **kwargs):
-        super().__init_subclass__(**kwargs)
-        decorate_methods_from_mixin(cls, DatasetExportMixin)
-    def to_docx(self, filename: str):
-        """Export the ScenarioList to a .docx file."""
-        dataset = self.to_dataset()
-        from edsl.results.DatasetTree import Tree
-        tree = Tree(dataset)
-        tree.construct_tree()
-        tree.to_docx(filename)
+"""Mixin class for exporting results."""
+from functools import wraps
+from edsl.results.DatasetExportMixin import DatasetExportMixin
+def to_dataset(func):
+    """Convert the object to a Dataset object before calling the function."""
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        """Return the function with the Results object converted to a Dataset object."""
+        if self.__class__.__name__ == "ScenarioList":
+            return func(self.to_dataset(), *args, **kwargs)
+        else:
+            raise Exception(
+                f"Class {self.__class__.__name__} not recognized as a Results or Dataset object."
+            )
+    return wrapper
+def decorate_methods_from_mixin(cls, mixin_cls):
+    for attr_name, attr_value in mixin_cls.__dict__.items():
+        if callable(attr_value) and not attr_name.startswith("__"):
+            setattr(cls, attr_name, to_dataset(attr_value))
+    return cls
+# def decorate_all_methods(cls):
+#     for attr_name, attr_value in cls.__dict__.items():
+#         if callable(attr_value):
+#             setattr(cls, attr_name, to_dataset(attr_value))
+#     return cls
+# @decorate_all_methods
+class ScenarioListExportMixin(DatasetExportMixin):
+    """Mixin class for exporting Results objects."""
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        decorate_methods_from_mixin(cls, DatasetExportMixin)
+    def to_docx(self, filename: str):
+        """Export the ScenarioList to a .docx file."""
+        dataset = self.to_dataset()
+        from edsl.results.DatasetTree import Tree
+        tree = Tree(dataset)
+        tree.construct_tree()
+        tree.to_docx(filename)

edsl/scenarios/ScenarioListPdfMixin.py CHANGED Viewed

@@ -1,261 +1,261 @@
-import fitz  # PyMuPDF
-import os
-import copy
-import subprocess
-import requests
-import tempfile
-import os
-# import urllib.parse as urlparse
-from urllib.parse import urlparse
-# from edsl import Scenario
-import requests
-import re
-import tempfile
-import os
-import atexit
-from urllib.parse import urlparse, parse_qs
-class GoogleDriveDownloader:
-    _temp_dir = None
-    _temp_file_path = None
-    @classmethod
-    def fetch_from_drive(cls, url, filename=None):
-        # Extract file ID from the URL
-        file_id = cls._extract_file_id(url)
-        if not file_id:
-            raise ValueError("Invalid Google Drive URL")
-        # Construct the download URL
-        download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
-        # Send a GET request to the URL
-        session = requests.Session()
-        response = session.get(download_url, stream=True)
-        response.raise_for_status()
-        # Check for large file download prompt
-        for key, value in response.cookies.items():
-            if key.startswith("download_warning"):
-                params = {"id": file_id, "confirm": value}
-                response = session.get(download_url, params=params, stream=True)
-                break
-        # Create a temporary file to save the download
-        if not filename:
-            filename = "downloaded_file"
-        if cls._temp_dir is None:
-            cls._temp_dir = tempfile.TemporaryDirectory()
-            atexit.register(cls._cleanup)
-        cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
-        # Write the content to the temporary file
-        with open(cls._temp_file_path, "wb") as f:
-            for chunk in response.iter_content(32768):
-                if chunk:
-                    f.write(chunk)
-        print(f"File saved to: {cls._temp_file_path}")
-        return cls._temp_file_path
-    @staticmethod
-    def _extract_file_id(url):
-        # Try to extract file ID from '/file/d/' format
-        file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
-        if file_id_match:
-            return file_id_match.group(1)
-        # If not found, try to extract from 'open?id=' format
-        parsed_url = urlparse(url)
-        query_params = parse_qs(parsed_url.query)
-        if "id" in query_params:
-            return query_params["id"][0]
-        return None
-    @classmethod
-    def _cleanup(cls):
-        if cls._temp_dir:
-            cls._temp_dir.cleanup()
-    @classmethod
-    def get_temp_file_path(cls):
-        return cls._temp_file_path
-def fetch_and_save_pdf(url, filename):
-    # Send a GET request to the URL
-    response = requests.get(url)
-    # Check if the request was successful
-    response.raise_for_status()
-    # Create a temporary directory
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Construct the full path for the file
-        temp_file_path = os.path.join(temp_dir, filename)
-        # Write the content to the temporary file
-        with open(temp_file_path, "wb") as file:
-            file.write(response.content)
-        print(f"PDF saved to: {temp_file_path}")
-        # Here you can perform operations with the file
-        # The file will be automatically deleted when you exit this block
-    return temp_file_path
-# Example usage:
-# url = "https://example.com/sample.pdf"
-# fetch_and_save_pdf(url, "sample.pdf")
-class ScenarioListPdfMixin:
-    @classmethod
-    def from_pdf(cls, filename_or_url, collapse_pages=False):
-        # Check if the input is a URL
-        if cls.is_url(filename_or_url):
-            # Check if it's a Google Drive URL
-            if "drive.google.com" in filename_or_url:
-                temp_filename = GoogleDriveDownloader.fetch_from_drive(
-                    filename_or_url, "temp_pdf.pdf"
-                )
-            else:
-                # For other URLs, use the previous fetch_and_save_pdf function
-                temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
-            scenarios = list(cls.extract_text_from_pdf(temp_filename))
-        else:
-            # If it's not a URL, assume it's a local file path
-            scenarios = list(cls.extract_text_from_pdf(filename_or_url))
-        if not collapse_pages:
-            return cls(scenarios)
-        else:
-            txt = ""
-            for scenario in scenarios:
-                txt += scenario["text"]
-            from edsl.scenarios import Scenario
-            base_scenario = copy.copy(scenarios[0])
-            base_scenario["text"] = txt
-        return base_scenario
-    @staticmethod
-    def is_url(string):
-        try:
-            result = urlparse(string)
-            return all([result.scheme, result.netloc])
-        except ValueError:
-            return False
-    @classmethod
-    def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
-        """
-        Convert each page of a PDF into an image and create Scenario instances.
-        :param pdf_path: Path to the PDF file.
-        :param image_format: Format of the output images (default is 'jpeg').
-        :return: ScenarioList instance containing the Scenario instances.
-        """
-        import tempfile
-        from pdf2image import convert_from_path
-        from edsl.scenarios import Scenario
-        with tempfile.TemporaryDirectory() as output_folder:
-            # Convert PDF to images
-            images = convert_from_path(pdf_path)
-            scenarios = []
-            # Save each page as an image and create Scenario instances
-            for i, image in enumerate(images):
-                image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
-                image.save(image_path, image_format.upper())
-                scenario = Scenario._from_filepath_image(image_path)
-                scenarios.append(scenario)
-            # print(f"Saved {len(images)} pages as images in {output_folder}")
-            return cls(scenarios)
-    @staticmethod
-    def extract_text_from_pdf(pdf_path):
-        from edsl import Scenario
-        # TODO: Add test case
-        # Ensure the file exists
-        if not os.path.exists(pdf_path):
-            raise FileNotFoundError(f"The file {pdf_path} does not exist.")
-        # Open the PDF file
-        document = fitz.open(pdf_path)
-        # Get the filename from the path
-        filename = os.path.basename(pdf_path)
-        # Iterate through each page and extract text
-        for page_num in range(len(document)):
-            page = document.load_page(page_num)
-            text = page.get_text()
-            # Create a dictionary for the current page
-            page_info = {"filename": filename, "page": page_num + 1, "text": text}
-            yield Scenario(page_info)
-    def create_hello_world_pdf(pdf_path):
-        # LaTeX content
-        latex_content = r"""
-        \documentclass{article}
-        \title{Hello World}
-        \author{John}
-        \date{\today}
-        \begin{document}
-        \maketitle
-        \section{Hello, World!}
-        This is a simple hello world example created with LaTeX and Python.
-        \end{document}
-        """
-        # Create a .tex file
-        tex_filename = pdf_path + ".tex"
-        with open(tex_filename, "w") as tex_file:
-            tex_file.write(latex_content)
-        # Compile the .tex file to PDF
-        subprocess.run(["pdflatex", tex_filename], check=True)
-        # Optionally, clean up auxiliary files generated by pdflatex
-        aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
-        for aux_file in aux_files:
-            try:
-                os.remove(aux_file)
-            except FileNotFoundError:
-                pass
-if __name__ == "__main__":
-    pass
-    # from edsl import ScenarioList
-    # class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
-    #     pass
-    # #ScenarioListNew.create_hello_world_pdf('hello_world')
-    # #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
-    # #print(scenarios)
-    # from edsl import ScenarioList, QuestionFreeText
-    # homo_silicus = ScenarioList.from_pdf('w31122.pdf')
-    # q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
-    # results = q.by(homo_silicus).run(progress_bar = True)
-    # results.select('scenario.page', 'answer.key_point').order_by('page').print()
+import fitz  # PyMuPDF
+import os
+import copy
+import subprocess
+import requests
+import tempfile
+import os
+# import urllib.parse as urlparse
+from urllib.parse import urlparse
+# from edsl import Scenario
+import requests
+import re
+import tempfile
+import os
+import atexit
+from urllib.parse import urlparse, parse_qs
+class GoogleDriveDownloader:
+    _temp_dir = None
+    _temp_file_path = None
+    @classmethod
+    def fetch_from_drive(cls, url, filename=None):
+        # Extract file ID from the URL
+        file_id = cls._extract_file_id(url)
+        if not file_id:
+            raise ValueError("Invalid Google Drive URL")
+        # Construct the download URL
+        download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
+        # Send a GET request to the URL
+        session = requests.Session()
+        response = session.get(download_url, stream=True)
+        response.raise_for_status()
+        # Check for large file download prompt
+        for key, value in response.cookies.items():
+            if key.startswith("download_warning"):
+                params = {"id": file_id, "confirm": value}
+                response = session.get(download_url, params=params, stream=True)
+                break
+        # Create a temporary file to save the download
+        if not filename:
+            filename = "downloaded_file"
+        if cls._temp_dir is None:
+            cls._temp_dir = tempfile.TemporaryDirectory()
+            atexit.register(cls._cleanup)
+        cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
+        # Write the content to the temporary file
+        with open(cls._temp_file_path, "wb") as f:
+            for chunk in response.iter_content(32768):
+                if chunk:
+                    f.write(chunk)
+        print(f"File saved to: {cls._temp_file_path}")
+        return cls._temp_file_path
+    @staticmethod
+    def _extract_file_id(url):
+        # Try to extract file ID from '/file/d/' format
+        file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
+        if file_id_match:
+            return file_id_match.group(1)
+        # If not found, try to extract from 'open?id=' format
+        parsed_url = urlparse(url)
+        query_params = parse_qs(parsed_url.query)
+        if "id" in query_params:
+            return query_params["id"][0]
+        return None
+    @classmethod
+    def _cleanup(cls):
+        if cls._temp_dir:
+            cls._temp_dir.cleanup()
+    @classmethod
+    def get_temp_file_path(cls):
+        return cls._temp_file_path
+def fetch_and_save_pdf(url, filename):
+    # Send a GET request to the URL
+    response = requests.get(url)
+    # Check if the request was successful
+    response.raise_for_status()
+    # Create a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Construct the full path for the file
+        temp_file_path = os.path.join(temp_dir, filename)
+        # Write the content to the temporary file
+        with open(temp_file_path, "wb") as file:
+            file.write(response.content)
+        print(f"PDF saved to: {temp_file_path}")
+        # Here you can perform operations with the file
+        # The file will be automatically deleted when you exit this block
+    return temp_file_path
+# Example usage:
+# url = "https://example.com/sample.pdf"
+# fetch_and_save_pdf(url, "sample.pdf")
+class ScenarioListPdfMixin:
+    @classmethod
+    def from_pdf(cls, filename_or_url, collapse_pages=False):
+        # Check if the input is a URL
+        if cls.is_url(filename_or_url):
+            # Check if it's a Google Drive URL
+            if "drive.google.com" in filename_or_url:
+                temp_filename = GoogleDriveDownloader.fetch_from_drive(
+                    filename_or_url, "temp_pdf.pdf"
+                )
+            else:
+                # For other URLs, use the previous fetch_and_save_pdf function
+                temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
+            scenarios = list(cls.extract_text_from_pdf(temp_filename))
+        else:
+            # If it's not a URL, assume it's a local file path
+            scenarios = list(cls.extract_text_from_pdf(filename_or_url))
+        if not collapse_pages:
+            return cls(scenarios)
+        else:
+            txt = ""
+            for scenario in scenarios:
+                txt += scenario["text"]
+            from edsl.scenarios import Scenario
+            base_scenario = copy.copy(scenarios[0])
+            base_scenario["text"] = txt
+        return base_scenario
+    @staticmethod
+    def is_url(string):
+        try:
+            result = urlparse(string)
+            return all([result.scheme, result.netloc])
+        except ValueError:
+            return False
+    @classmethod
+    def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
+        """
+        Convert each page of a PDF into an image and create Scenario instances.
+        :param pdf_path: Path to the PDF file.
+        :param image_format: Format of the output images (default is 'jpeg').
+        :return: ScenarioList instance containing the Scenario instances.
+        """
+        import tempfile
+        from pdf2image import convert_from_path
+        from edsl.scenarios import Scenario
+        with tempfile.TemporaryDirectory() as output_folder:
+            # Convert PDF to images
+            images = convert_from_path(pdf_path)
+            scenarios = []
+            # Save each page as an image and create Scenario instances
+            for i, image in enumerate(images):
+                image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
+                image.save(image_path, image_format.upper())
+                scenario = Scenario._from_filepath_image(image_path)
+                scenarios.append(scenario)
+            # print(f"Saved {len(images)} pages as images in {output_folder}")
+            return cls(scenarios)
+    @staticmethod
+    def extract_text_from_pdf(pdf_path):
+        from edsl import Scenario
+        # TODO: Add test case
+        # Ensure the file exists
+        if not os.path.exists(pdf_path):
+            raise FileNotFoundError(f"The file {pdf_path} does not exist.")
+        # Open the PDF file
+        document = fitz.open(pdf_path)
+        # Get the filename from the path
+        filename = os.path.basename(pdf_path)
+        # Iterate through each page and extract text
+        for page_num in range(len(document)):
+            page = document.load_page(page_num)
+            text = page.get_text()
+            # Create a dictionary for the current page
+            page_info = {"filename": filename, "page": page_num + 1, "text": text}
+            yield Scenario(page_info)
+    def create_hello_world_pdf(pdf_path):
+        # LaTeX content
+        latex_content = r"""
+        \documentclass{article}
+        \title{Hello World}
+        \author{John}
+        \date{\today}
+        \begin{document}
+        \maketitle
+        \section{Hello, World!}
+        This is a simple hello world example created with LaTeX and Python.
+        \end{document}
+        """
+        # Create a .tex file
+        tex_filename = pdf_path + ".tex"
+        with open(tex_filename, "w") as tex_file:
+            tex_file.write(latex_content)
+        # Compile the .tex file to PDF
+        subprocess.run(["pdflatex", tex_filename], check=True)
+        # Optionally, clean up auxiliary files generated by pdflatex
+        aux_files = [pdf_path + ext for ext in [".aux", ".log"]]
+        for aux_file in aux_files:
+            try:
+                os.remove(aux_file)
+            except FileNotFoundError:
+                pass
+if __name__ == "__main__":
+    pass
+    # from edsl import ScenarioList
+    # class ScenarioListNew(ScenarioList, ScenaroListPdfMixin):
+    #     pass
+    # #ScenarioListNew.create_hello_world_pdf('hello_world')
+    # #scenarios = ScenarioListNew.from_pdf('hello_world.pdf')
+    # #print(scenarios)
+    # from edsl import ScenarioList, QuestionFreeText
+    # homo_silicus = ScenarioList.from_pdf('w31122.pdf')
+    # q = QuestionFreeText(question_text = "What is the key point of the text in {{ text }}?", question_name = "key_point")
+    # results = q.by(homo_silicus).run(progress_bar = True)
+    # results.select('scenario.page', 'answer.key_point').order_by('page').print()

edsl/scenarios/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from edsl.scenarios.Scenario import Scenario
-from edsl.scenarios.ScenarioList import ScenarioList
-# from edsl.scenarios.FileStore import FileStore
+from edsl.scenarios.Scenario import Scenario
+from edsl.scenarios.ScenarioList import ScenarioList
+# from edsl.scenarios.FileStore import FileStore

edsl/shared.py CHANGED Viewed

	@@ -1 +1 @@
1	- shared_globals = {}
1	+ shared_globals = {}

edsl 0.1.38.dev2__py3-none-any.whl → 0.1.38.dev3__py3-none-any.whl

edsl 0.1.38.dev2py3-none-any.whl → 0.1.38.dev3py3-none-any.whl