PyPI - edsl - Versions diffs - 0.1.39.dev1__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl - Mend

edsl 0.1.39.dev1py3-none-any.whl → 0.1.39.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

edsl/Base.py +169 -116
edsl/__init__.py +14 -6
edsl/__version__.py +1 -1
edsl/agents/Agent.py +358 -146
edsl/agents/AgentList.py +211 -73
edsl/agents/Invigilator.py +88 -36
edsl/agents/InvigilatorBase.py +59 -70
edsl/agents/PromptConstructor.py +117 -219
edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
edsl/agents/QuestionOptionProcessor.py +172 -0
edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
edsl/agents/__init__.py +0 -1
edsl/agents/prompt_helpers.py +3 -3
edsl/config.py +22 -2
edsl/conversation/car_buying.py +2 -1
edsl/coop/CoopFunctionsMixin.py +15 -0
edsl/coop/ExpectedParrotKeyHandler.py +125 -0
edsl/coop/PriceFetcher.py +1 -1
edsl/coop/coop.py +104 -42
edsl/coop/utils.py +14 -14
edsl/data/Cache.py +21 -14
edsl/data/CacheEntry.py +12 -15
edsl/data/CacheHandler.py +33 -12
edsl/data/__init__.py +4 -3
edsl/data_transfer_models.py +2 -1
edsl/enums.py +20 -0
edsl/exceptions/__init__.py +50 -50
edsl/exceptions/agents.py +12 -0
edsl/exceptions/inference_services.py +5 -0
edsl/exceptions/questions.py +24 -6
edsl/exceptions/scenarios.py +7 -0
edsl/inference_services/AnthropicService.py +0 -3
edsl/inference_services/AvailableModelCacheHandler.py +184 -0
edsl/inference_services/AvailableModelFetcher.py +209 -0
edsl/inference_services/AwsBedrock.py +0 -2
edsl/inference_services/AzureAI.py +0 -2
edsl/inference_services/GoogleService.py +2 -11
edsl/inference_services/InferenceServiceABC.py +18 -85
edsl/inference_services/InferenceServicesCollection.py +105 -80
edsl/inference_services/MistralAIService.py +0 -3
edsl/inference_services/OpenAIService.py +1 -4
edsl/inference_services/PerplexityService.py +0 -3
edsl/inference_services/ServiceAvailability.py +135 -0
edsl/inference_services/TestService.py +11 -8
edsl/inference_services/data_structures.py +62 -0
edsl/jobs/AnswerQuestionFunctionConstructor.py +188 -0
edsl/jobs/Answers.py +1 -14
edsl/jobs/FetchInvigilator.py +40 -0
edsl/jobs/InterviewTaskManager.py +98 -0
edsl/jobs/InterviewsConstructor.py +48 -0
edsl/jobs/Jobs.py +102 -243
edsl/jobs/JobsChecks.py +35 -10
edsl/jobs/JobsComponentConstructor.py +189 -0
edsl/jobs/JobsPrompts.py +5 -3
edsl/jobs/JobsRemoteInferenceHandler.py +128 -80
edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
edsl/jobs/RequestTokenEstimator.py +30 -0
edsl/jobs/buckets/BucketCollection.py +44 -3
edsl/jobs/buckets/TokenBucket.py +53 -21
edsl/jobs/buckets/TokenBucketAPI.py +211 -0
edsl/jobs/buckets/TokenBucketClient.py +191 -0
edsl/jobs/decorators.py +35 -0
edsl/jobs/interviews/Interview.py +77 -380
edsl/jobs/jobs_status_enums.py +9 -0
edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
edsl/jobs/runners/JobsRunnerAsyncio.py +4 -49
edsl/jobs/tasks/QuestionTaskCreator.py +21 -19
edsl/jobs/tasks/TaskHistory.py +14 -15
edsl/jobs/tasks/task_status_enum.py +0 -2
edsl/language_models/ComputeCost.py +63 -0
edsl/language_models/LanguageModel.py +137 -234
edsl/language_models/ModelList.py +11 -13
edsl/language_models/PriceManager.py +127 -0
edsl/language_models/RawResponseHandler.py +106 -0
edsl/language_models/ServiceDataSources.py +0 -0
edsl/language_models/__init__.py +0 -1
edsl/language_models/key_management/KeyLookup.py +63 -0
edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
edsl/language_models/key_management/KeyLookupCollection.py +38 -0
edsl/language_models/key_management/__init__.py +0 -0
edsl/language_models/key_management/models.py +131 -0
edsl/language_models/registry.py +49 -59
edsl/language_models/repair.py +2 -2
edsl/language_models/utilities.py +5 -4
edsl/notebooks/Notebook.py +19 -14
edsl/notebooks/NotebookToLaTeX.py +142 -0
edsl/prompts/Prompt.py +29 -39
edsl/questions/AnswerValidatorMixin.py +47 -2
edsl/questions/ExceptionExplainer.py +77 -0
edsl/questions/HTMLQuestion.py +103 -0
edsl/questions/LoopProcessor.py +149 -0
edsl/questions/QuestionBase.py +37 -192
edsl/questions/QuestionBaseGenMixin.py +52 -48
edsl/questions/QuestionBasePromptsMixin.py +7 -3
edsl/questions/QuestionCheckBox.py +1 -1
edsl/questions/QuestionExtract.py +1 -1
edsl/questions/QuestionFreeText.py +1 -2
edsl/questions/QuestionList.py +3 -5
edsl/questions/QuestionMatrix.py +265 -0
edsl/questions/QuestionMultipleChoice.py +66 -22
edsl/questions/QuestionNumerical.py +1 -3
edsl/questions/QuestionRank.py +6 -16
edsl/questions/ResponseValidatorABC.py +37 -11
edsl/questions/ResponseValidatorFactory.py +28 -0
edsl/questions/SimpleAskMixin.py +4 -3
edsl/questions/__init__.py +1 -0
edsl/questions/derived/QuestionLinearScale.py +6 -3
edsl/questions/derived/QuestionTopK.py +1 -1
edsl/questions/descriptors.py +17 -3
edsl/questions/question_registry.py +1 -1
edsl/questions/templates/matrix/__init__.py +1 -0
edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
edsl/questions/templates/matrix/question_presentation.jinja +20 -0
edsl/results/CSSParameterizer.py +1 -1
edsl/results/Dataset.py +170 -7
edsl/results/DatasetExportMixin.py +224 -302
edsl/results/DatasetTree.py +28 -8
edsl/results/MarkdownToDocx.py +122 -0
edsl/results/MarkdownToPDF.py +111 -0
edsl/results/Result.py +192 -206
edsl/results/Results.py +120 -113
edsl/results/ResultsExportMixin.py +2 -0
edsl/results/Selector.py +23 -13
edsl/results/TableDisplay.py +98 -171
edsl/results/TextEditor.py +50 -0
edsl/results/__init__.py +1 -1
edsl/results/smart_objects.py +96 -0
edsl/results/table_data_class.py +12 -0
edsl/results/table_renderers.py +118 -0
edsl/scenarios/ConstructDownloadLink.py +109 -0
edsl/scenarios/DirectoryScanner.py +96 -0
edsl/scenarios/DocumentChunker.py +102 -0
edsl/scenarios/DocxScenario.py +16 -0
edsl/scenarios/FileStore.py +118 -239
edsl/scenarios/PdfExtractor.py +40 -0
edsl/scenarios/Scenario.py +90 -193
edsl/scenarios/ScenarioHtmlMixin.py +4 -3
edsl/scenarios/ScenarioJoin.py +10 -6
edsl/scenarios/ScenarioList.py +383 -240
edsl/scenarios/ScenarioListExportMixin.py +0 -7
edsl/scenarios/ScenarioListPdfMixin.py +15 -37
edsl/scenarios/ScenarioSelector.py +156 -0
edsl/scenarios/__init__.py +1 -2
edsl/scenarios/file_methods.py +85 -0
edsl/scenarios/handlers/__init__.py +13 -0
edsl/scenarios/handlers/csv.py +38 -0
edsl/scenarios/handlers/docx.py +76 -0
edsl/scenarios/handlers/html.py +37 -0
edsl/scenarios/handlers/json.py +111 -0
edsl/scenarios/handlers/latex.py +5 -0
edsl/scenarios/handlers/md.py +51 -0
edsl/scenarios/handlers/pdf.py +68 -0
edsl/scenarios/handlers/png.py +39 -0
edsl/scenarios/handlers/pptx.py +105 -0
edsl/scenarios/handlers/py.py +294 -0
edsl/scenarios/handlers/sql.py +313 -0
edsl/scenarios/handlers/sqlite.py +149 -0
edsl/scenarios/handlers/txt.py +33 -0
edsl/study/ObjectEntry.py +1 -1
edsl/study/SnapShot.py +1 -1
edsl/study/Study.py +5 -12
edsl/surveys/ConstructDAG.py +92 -0
edsl/surveys/EditSurvey.py +221 -0
edsl/surveys/InstructionHandler.py +100 -0
edsl/surveys/MemoryManagement.py +72 -0
edsl/surveys/Rule.py +5 -4
edsl/surveys/RuleCollection.py +25 -27
edsl/surveys/RuleManager.py +172 -0
edsl/surveys/Simulator.py +75 -0
edsl/surveys/Survey.py +199 -771
edsl/surveys/SurveyCSS.py +20 -8
edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +11 -9
edsl/surveys/SurveyToApp.py +141 -0
edsl/surveys/__init__.py +4 -2
edsl/surveys/descriptors.py +6 -2
edsl/surveys/instructions/ChangeInstruction.py +1 -2
edsl/surveys/instructions/Instruction.py +4 -13
edsl/surveys/instructions/InstructionCollection.py +11 -6
edsl/templates/error_reporting/interview_details.html +1 -1
edsl/templates/error_reporting/report.html +1 -1
edsl/tools/plotting.py +1 -1
edsl/utilities/PrettyList.py +56 -0
edsl/utilities/is_notebook.py +18 -0
edsl/utilities/is_valid_variable_name.py +11 -0
edsl/utilities/remove_edsl_version.py +24 -0
edsl/utilities/utilities.py +35 -23
{edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/METADATA +12 -10
edsl-0.1.39.dev2.dist-info/RECORD +352 -0
edsl/language_models/KeyLookup.py +0 -30
edsl/language_models/unused/ReplicateBase.py +0 -83
edsl/results/ResultsDBMixin.py +0 -238
edsl-0.1.39.dev1.dist-info/RECORD +0 -277
{edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/LICENSE +0 -0
{edsl-0.1.39.dev1.dist-info → edsl-0.1.39.dev2.dist-info}/WHEEL +0 -0

edsl/scenarios/PdfExtractor.py ADDED Viewed

@@ -0,0 +1,40 @@
+import os
+class PdfExtractor:
+    def __init__(self, pdf_path: str, parent_object: object):
+        self.pdf_path = pdf_path
+        self.constructor = parent_object.__class__
+    def get_object(self) -> object:
+        return self.constructor(self._get_pdf_dict())
+    def _get_pdf_dict(self) -> dict:
+        # Ensure the file exists
+        import fitz
+        if not os.path.exists(self.pdf_path):
+            raise FileNotFoundError(f"The file {self.pdf_path} does not exist.")
+        # Open the PDF file
+        document = fitz.open(self.pdf_path)
+        # Get the filename from the path
+        filename = os.path.basename(self.pdf_path)
+        # Iterate through each page and extract text
+        text = ""
+        for page_num in range(len(document)):
+            page = document.load_page(page_num)
+            blocks = page.get_text("blocks")  # Extract text blocks
+            # Sort blocks by their vertical position (y0) to maintain reading order
+            blocks.sort(key=lambda b: (b[1], b[0]))  # Sort by y0 first, then x0
+            # Combine the text blocks in order
+            for block in blocks:
+                text += block[4] + "\n"
+        # Create a dictionary for the combined text
+        page_info = {"filename": filename, "text": text}
+        return page_info

edsl/scenarios/Scenario.py CHANGED Viewed

@@ -2,54 +2,65 @@
 from __future__ import annotations
 import copy
-import hashlib
 import os
 import json
 from collections import UserDict
-from typing import Union, List, Optional, Generator
+from typing import Union, List, Optional, TYPE_CHECKING, Collection
 from uuid import uuid4
 from edsl.Base import Base
 from edsl.scenarios.ScenarioHtmlMixin import ScenarioHtmlMixin
-from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
+from edsl.utilities.remove_edsl_version import remove_edsl_version
 from edsl.exceptions.scenarios import ScenarioError
+if TYPE_CHECKING:
+    from edsl.scenarios.ScenarioList import ScenarioList
+    from edsl.results.Dataset import Dataset
 class DisplayJSON:
-    def __init__(self, dict):
-        self.text = json.dumps(dict, indent=4)
+    """Display a dictionary as JSON."""
+    def __init__(self, input_dict: dict):
+        self.text = json.dumps(input_dict, indent=4)
     def __repr__(self):
         return self.text
 class DisplayYAML:
-    def __init__(self, dict):
+    """Display a dictionary as YAML."""
+    def __init__(self, input_dict: dict):
         import yaml
-        self.text = yaml.dump(dict)
+        self.text = yaml.dump(input_dict)
     def __repr__(self):
         return self.text
 class Scenario(Base, UserDict, ScenarioHtmlMixin):
-    """A Scenario is a dictionary of keys/values.
-    They can be used parameterize EDSL questions."""
+    """A Scenario is a dictionary of keys/values that can be used to parameterize questions."""
     __documentation__ = "https://docs.expectedparrot.com/en/latest/scenarios.html"
-    def __init__(self, data: Union[dict, None] = None, name: str = None):
+    def __init__(self, data: Optional[dict] = None, name: Optional[str] = None):
         """Initialize a new Scenario.
-        # :param data: A dictionary of keys/values for parameterizing questions.
-        #"""
+        :param data: A dictionary of keys/values for parameterizing questions.
+        :param name: The name of the scenario.
+        """
         if not isinstance(data, dict) and data is not None:
-            raise EDSLScenarioError(
-                "You must pass in a dictionary to initialize a Scenario."
-            )
+            try:
+                data = dict(data)
+            except Exception as e:
+                raise ScenarioError(
+                    f"You must pass in a dictionary to initialize a Scenario. You passed in {data}",
+                    "Exception message:" + str(e),
+                )
+        super().__init__()
         self.data = data if data is not None else {}
         self.name = name
@@ -59,7 +70,6 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         :param n: The number of times to replicate the scenario.
         Example:
         >>> s = Scenario({"food": "wood chips"})
         >>> s.replicate(2)
         ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood chips'})])
@@ -82,13 +92,13 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
                     return True
         return False
-    def convert_jinja_braces(
-        self, replacement_left="<<", replacement_right=">>"
+    def _convert_jinja_braces(
+        self, replacement_left: str = "<<", replacement_right: str = ">>"
     ) -> Scenario:
         """Convert Jinja braces to some other character.
         >>> s = Scenario({"food": "I love {{wood chips}}"})
-        >>> s.convert_jinja_braces()
+        >>> s._convert_jinja_braces()
         Scenario({'food': 'I love <<wood chips>>'})
         """
@@ -102,7 +112,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
                 new_scenario[key] = value
         return new_scenario
-    def __add__(self, other_scenario: "Scenario") -> "Scenario":
+    def __add__(self, other_scenario: Scenario) -> Scenario:
         """Combine two scenarios by taking the union of their keys
         If the other scenario is None, then just return self.
@@ -127,11 +137,14 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
             return s
     def rename(
-        self, old_name_or_replacement_dict: dict, new_name: Optional[str] = None
-    ) -> "Scenario":
+        self,
+        old_name_or_replacement_dict: Union[str, dict[str, str]],
+        new_name: Optional[str] = None,
+    ) -> Scenario:
         """Rename the keys of a scenario.
-        :param replacement_dict: A dictionary of old keys to new keys.
+        :param old_name_or_replacement_dict: A dictionary of old keys to new keys *OR* a string of the old key.
+        :param new_name: The new name of the key.
         Example:
@@ -156,13 +169,26 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
                 new_scenario[key] = value
         return new_scenario
-    def table(self, tablefmt: str = "grid") -> str:
-        from edsl.results.Dataset import Dataset
+    def new_column_names(self, new_names: List[str]) -> Scenario:
+        """Rename the keys of a scenario.
+        >>> s = Scenario({"food": "wood chips"})
+        >>> s.new_column_names(["food_preference"])
+        Scenario({'food_preference': 'wood chips'})
+        """
+        try:
+            assert len(new_names) == len(self.keys())
+        except AssertionError:
+            print("The number of new names must match the number of keys.")
+        new_scenario = Scenario()
+        for new_names, value in zip(new_names, self.values()):
+            new_scenario[new_names] = value
+        return new_scenario
-        keys = [key for key, value in self.items()]
-        values = [value for key, value in self.items()]
-        d = Dataset([{"key": keys}, {"value": values}])
-        return d.table(tablefmt=tablefmt)
+    def table(self, tablefmt: str = "grid") -> str:
+        """Display a scenario as a table."""
+        return self.to_dataset().table(tablefmt=tablefmt)
     def json(self):
         return DisplayJSON(self.to_dict(add_edsl_version=False))
@@ -172,7 +198,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         return DisplayYAML(self.to_dict(add_edsl_version=False))
-    def to_dict(self, add_edsl_version=True) -> dict:
+    def to_dict(self, add_edsl_version: bool = True) -> dict:
         """Convert a scenario to a dictionary.
         Example:
@@ -200,8 +226,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         return d
     def __hash__(self) -> int:
-        """
-        Return a hash of the scenario.
+        """Return a hash of the scenario.
         Example:
@@ -213,44 +238,23 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         return dict_hash(self.to_dict(add_edsl_version=False))
-    def print(self):
-        from rich import print_json
-        import json
-        print_json(json.dumps(self.to_dict()))
     def __repr__(self):
         return "Scenario(" + repr(self.data) + ")"
     def to_dataset(self) -> "Dataset":
-        # d = Dataset([{'a.b':[1,2,3,4]}])
+        """Convert a scenario to a dataset.
+        >>> s = Scenario({"food": "wood chips"})
+        >>> s.to_dataset()
+        Dataset([{'key': ['food']}, {'value': ['wood chips']}])
+        """
         from edsl.results.Dataset import Dataset
-        keys = [key for key, value in self.items()]
-        values = [value for key, value in self.items()]
+        keys = list(self.keys())
+        values = list(self.values())
         return Dataset([{"key": keys}, {"value": values}])
-    def _repr_html_(self):
-        from tabulate import tabulate
-        import reprlib
-        d = self.to_dict(add_edsl_version=False)
-        # return self.to_dataset()
-        r = reprlib.Repr()
-        r.maxstring = 70
-        data = [[k, r.repr(v)] for k, v in d.items()]
-        from tabulate import tabulate
-        if hasattr(self, "__documentation__"):
-            footer = f"<a href='{self.__documentation__}'>(docs)</a></p>"
-        else:
-            footer = ""
-        table = str(tabulate(data, headers=["keys", "values"], tablefmt="html"))
-        return f"<pre>{table}</pre>" + footer
-    def select(self, list_of_keys: List[str]) -> "Scenario":
+    def select(self, list_of_keys: Collection[str]) -> "Scenario":
         """Select a subset of keys from a scenario.
         :param list_of_keys: The keys to select.
@@ -266,7 +270,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
             new_scenario[key] = self[key]
         return new_scenario
-    def drop(self, list_of_keys: List[str]) -> "Scenario":
+    def drop(self, list_of_keys: Collection[str]) -> "Scenario":
         """Drop a subset of keys from a scenario.
         :param list_of_keys: The keys to drop.
@@ -320,7 +324,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         ...     _ = f.flush()
         ...     s = Scenario.from_file(f.name, "file")
         >>> s
-        Scenario({'file': FileStore(path='...')})
+        Scenario({'file': FileStore(path='...', ...)})
         """
         from edsl.scenarios.FileStore import FileStore
@@ -351,35 +355,10 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         return cls.from_file(image_path, image_name)
     @classmethod
-    def from_pdf(cls, pdf_path):
-        # Ensure the file exists
-        import fitz
-        if not os.path.exists(pdf_path):
-            raise FileNotFoundError(f"The file {pdf_path} does not exist.")
-        # Open the PDF file
-        document = fitz.open(pdf_path)
-        # Get the filename from the path
-        filename = os.path.basename(pdf_path)
+    def from_pdf(cls, pdf_path: str):
+        from edsl.scenarios.PdfExtractor import PdfExtractor
-        # Iterate through each page and extract text
-        text = ""
-        for page_num in range(len(document)):
-            page = document.load_page(page_num)
-            blocks = page.get_text("blocks")  # Extract text blocks
-            # Sort blocks by their vertical position (y0) to maintain reading order
-            blocks.sort(key=lambda b: (b[1], b[0]))  # Sort by y0 first, then x0
-            # Combine the text blocks in order
-            for block in blocks:
-                text += block[4] + "\n"
-        # Create a dictionary for the combined text
-        page_info = {"filename": filename, "text": text}
-        return Scenario(page_info)
+        return PdfExtractor(pdf_path, cls).get_object()
     @classmethod
     def from_docx(cls, docx_path: str) -> "Scenario":
@@ -399,52 +378,9 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         Scenario({'file_path': 'test.docx', 'text': 'EDSL Survey\\nThis is a test.'})
         >>> import os; os.remove("test.docx")
         """
-        from docx import Document
-        doc = Document(docx_path)
+        from edsl.scenarios.DocxScenario import DocxScenario
-        # Extract all text
-        full_text = []
-        for para in doc.paragraphs:
-            full_text.append(para.text)
-        # Join the text from all paragraphs
-        text = "\n".join(full_text)
-        return Scenario({"file_path": docx_path, "text": text})
-    @staticmethod
-    def _line_chunks(text, num_lines: int) -> Generator[str, None, None]:
-        """Split a text into chunks of a given size.
-        :param text: The text to split.
-        :param num_lines: The number of lines in each chunk.
-        Example:
-        >>> list(Scenario._line_chunks("This is a test.\\nThis is a test. This is a test.", 1))
-        ['This is a test.', 'This is a test. This is a test.']
-        """
-        lines = text.split("\n")
-        for i in range(0, len(lines), num_lines):
-            chunk = "\n".join(lines[i : i + num_lines])
-            yield chunk
-    @staticmethod
-    def _word_chunks(text, num_words: int) -> Generator[str, None, None]:
-        """Split a text into chunks of a given size.
-        :param text: The text to split.
-        :param num_words: The number of words in each chunk.
-        Example:
-        >>> list(Scenario._word_chunks("This is a test.", 2))
-        ['This is', 'a test.']
-        """
-        words = text.split()
-        for i in range(0, len(words), num_words):
-            chunk = " ".join(words[i : i + num_words])
-            yield chunk
+        return Scenario(DocxScenario(docx_path).get_scenario_dict())
     def chunk(
         self,
@@ -495,36 +431,11 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         ...
         ValueError: You must specify either num_words or num_lines, but not both.
         """
-        from edsl.scenarios.ScenarioList import ScenarioList
+        from edsl.scenarios.DocumentChunker import DocumentChunker
-        if num_words is not None:
-            chunks = list(self._word_chunks(self[field], num_words))
-        if num_lines is not None:
-            chunks = list(self._line_chunks(self[field], num_lines))
-        if num_words is None and num_lines is None:
-            raise ValueError("You must specify either num_words or num_lines.")
-        if num_words is not None and num_lines is not None:
-            raise ValueError(
-                "You must specify either num_words or num_lines, but not both."
-            )
-        scenarios = []
-        for i, chunk in enumerate(chunks):
-            new_scenario = copy.deepcopy(self)
-            new_scenario[field] = chunk
-            new_scenario[field + "_chunk"] = i
-            if include_original:
-                if hash_original:
-                    new_scenario[field + "_original"] = hashlib.md5(
-                        self[field].encode()
-                    ).hexdigest()
-                else:
-                    new_scenario[field + "_original"] = self[field]
-            scenarios.append(new_scenario)
-        return ScenarioList(scenarios)
+        return DocumentChunker(self).chunk(
+            field, num_words, num_lines, include_original, hash_original
+        )
     @classmethod
     @remove_edsl_version
@@ -547,44 +458,30 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
         return cls(d)
     def _table(self) -> tuple[dict, list]:
-        """Prepare generic table data."""
+        """Prepare generic table data.
+        >>> s = Scenario({"food": "wood chips"})
+        >>> s._table()
+        ([{'Attribute': 'data', 'Value': "{'food': 'wood chips'}"}, {'Attribute': 'name', 'Value': 'None'}], ['Attribute', 'Value'])
+        """
         table_data = []
         for attr_name, attr_value in self.__dict__.items():
             table_data.append({"Attribute": attr_name, "Value": repr(attr_value)})
         column_names = ["Attribute", "Value"]
         return table_data, column_names
-    def rich_print(self) -> "Table":
-        """Display an object as a rich table."""
-        from rich.table import Table
-        table_data, column_names = self._table()
-        table = Table(title=f"{self.__class__.__name__} Attributes")
-        for column in column_names:
-            table.add_column(column, style="bold")
-        for row in table_data:
-            row_data = [row[column] for column in column_names]
-            table.add_row(*row_data)
-        return table
     @classmethod
-    def example(cls, randomize: bool = False, has_image=False) -> Scenario:
+    def example(cls, randomize: bool = False) -> Scenario:
         """
         Returns an example Scenario instance.
         :param randomize: If True, adds a random string to the value of the example key.
         """
-        if not has_image:
-            addition = "" if not randomize else str(uuid4())
-            return cls(
-                {
-                    "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
-                }
-            )
-        else:
-            return cls.from_image(cls.example_image())
+        addition = "" if not randomize else str(uuid4())
+        return cls(
+            {
+                "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
+            }
+        )
     def code(self) -> List[str]:
         """Return the code for the scenario."""

edsl/scenarios/ScenarioHtmlMixin.py CHANGED Viewed

@@ -1,7 +1,4 @@
-import requests
 from typing import Optional
-from requests.adapters import HTTPAdapter
-from requests.packages.urllib3.util.retry import Retry
 class ScenarioHtmlMixin:
@@ -22,6 +19,10 @@ class ScenarioHtmlMixin:
     def fetch_html(url):
         # Define the user-agent to mimic a browser
+        import requests
+        from requests.adapters import HTTPAdapter
+        from requests.packages.urllib3.util.retry import Retry
         headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
         }

edsl/scenarios/ScenarioJoin.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from __future__ import annotations
 from typing import Union, TYPE_CHECKING
-# if TYPE_CHECKING:
-from edsl.scenarios.ScenarioList import ScenarioList
-from edsl.scenarios.Scenario import Scenario
+if TYPE_CHECKING:
+    from edsl.scenarios.ScenarioList import ScenarioList
+    from edsl.scenarios.Scenario import Scenario
 class ScenarioJoin:
@@ -23,7 +23,7 @@ class ScenarioJoin:
         self.left = left
         self.right = right
-    def left_join(self, by: Union[str, list[str]]) -> ScenarioList:
+    def left_join(self, by: Union[str, list[str]]) -> "ScenarioList":
         """Perform a left join between the two ScenarioLists.
         Args:
@@ -35,6 +35,8 @@ class ScenarioJoin:
         Raises:
             ValueError: If by is empty or if any join keys don't exist in both ScenarioLists
         """
+        from edsl.scenarios.ScenarioList import ScenarioList
         self._validate_join_keys(by)
         by_keys = [by] if isinstance(by, str) else by
@@ -86,6 +88,8 @@ class ScenarioJoin:
         self, by_keys: list[str], other_dict: dict, all_keys: set
     ) -> list[Scenario]:
         """Create the joined scenarios."""
+        from edsl.scenarios.Scenario import Scenario
         new_scenarios = []
         for scenario in self.left:
@@ -105,8 +109,8 @@ class ScenarioJoin:
     def _handle_matching_scenario(
         self,
         new_scenario: dict,
-        left_scenario: Scenario,
-        right_scenario: Scenario,
+        left_scenario: "Scenario",
+        right_scenario: "Scenario",
         by_keys: list[str],
     ) -> None:
         """Handle merging of matching scenarios and conflict warnings."""

edsl 0.1.39.dev1__py3-none-any.whl → 0.1.39.dev2__py3-none-any.whl

edsl 0.1.39.dev1py3-none-any.whl → 0.1.39.dev2py3-none-any.whl