PyPI - edsl - Versions diffs - 0.1.33.dev2__py3-none-any.whl → 0.1.34__py3-none-any.whl - Mend

edsl 0.1.33.dev2py3-none-any.whl → 0.1.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

edsl/Base.py +24 -14
edsl/__init__.py +1 -0
edsl/__version__.py +1 -1
edsl/agents/Agent.py +6 -6
edsl/agents/Invigilator.py +28 -6
edsl/agents/InvigilatorBase.py +8 -27
edsl/agents/{PromptConstructionMixin.py → PromptConstructor.py} +150 -182
edsl/agents/prompt_helpers.py +129 -0
edsl/config.py +26 -34
edsl/coop/coop.py +14 -4
edsl/data_transfer_models.py +26 -73
edsl/enums.py +2 -0
edsl/inference_services/AnthropicService.py +5 -2
edsl/inference_services/AwsBedrock.py +5 -2
edsl/inference_services/AzureAI.py +5 -2
edsl/inference_services/GoogleService.py +108 -33
edsl/inference_services/InferenceServiceABC.py +44 -13
edsl/inference_services/MistralAIService.py +5 -2
edsl/inference_services/OpenAIService.py +10 -6
edsl/inference_services/TestService.py +34 -16
edsl/inference_services/TogetherAIService.py +170 -0
edsl/inference_services/registry.py +2 -0
edsl/jobs/Jobs.py +109 -18
edsl/jobs/buckets/BucketCollection.py +24 -15
edsl/jobs/buckets/TokenBucket.py +64 -10
edsl/jobs/interviews/Interview.py +130 -49
edsl/jobs/interviews/{interview_exception_tracking.py → InterviewExceptionCollection.py} +16 -0
edsl/jobs/interviews/InterviewExceptionEntry.py +2 -0
edsl/jobs/runners/JobsRunnerAsyncio.py +119 -173
edsl/jobs/runners/JobsRunnerStatus.py +332 -0
edsl/jobs/tasks/QuestionTaskCreator.py +1 -13
edsl/jobs/tasks/TaskHistory.py +17 -0
edsl/language_models/LanguageModel.py +36 -38
edsl/language_models/registry.py +13 -9
edsl/language_models/utilities.py +5 -2
edsl/questions/QuestionBase.py +74 -16
edsl/questions/QuestionBaseGenMixin.py +28 -0
edsl/questions/QuestionBudget.py +93 -41
edsl/questions/QuestionCheckBox.py +1 -1
edsl/questions/QuestionFreeText.py +6 -0
edsl/questions/QuestionMultipleChoice.py +13 -24
edsl/questions/QuestionNumerical.py +5 -4
edsl/questions/Quick.py +41 -0
edsl/questions/ResponseValidatorABC.py +11 -6
edsl/questions/derived/QuestionLinearScale.py +4 -1
edsl/questions/derived/QuestionTopK.py +4 -1
edsl/questions/derived/QuestionYesNo.py +8 -2
edsl/questions/descriptors.py +12 -11
edsl/questions/templates/budget/__init__.py +0 -0
edsl/questions/templates/budget/answering_instructions.jinja +7 -0
edsl/questions/templates/budget/question_presentation.jinja +7 -0
edsl/questions/templates/extract/__init__.py +0 -0
edsl/questions/templates/numerical/answering_instructions.jinja +0 -1
edsl/questions/templates/rank/__init__.py +0 -0
edsl/questions/templates/yes_no/answering_instructions.jinja +2 -2
edsl/results/DatasetExportMixin.py +5 -1
edsl/results/Result.py +1 -1
edsl/results/Results.py +4 -1
edsl/scenarios/FileStore.py +178 -34
edsl/scenarios/Scenario.py +76 -37
edsl/scenarios/ScenarioList.py +19 -2
edsl/scenarios/ScenarioListPdfMixin.py +150 -4
edsl/study/Study.py +32 -0
edsl/surveys/DAG.py +62 -0
edsl/surveys/MemoryPlan.py +26 -0
edsl/surveys/Rule.py +34 -1
edsl/surveys/RuleCollection.py +55 -5
edsl/surveys/Survey.py +189 -10
edsl/surveys/base.py +4 -0
edsl/templates/error_reporting/interview_details.html +6 -1
edsl/utilities/utilities.py +9 -1
{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/METADATA +3 -1
{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/RECORD +75 -69
edsl/jobs/interviews/retry_management.py +0 -39
edsl/jobs/runners/JobsRunnerStatusMixin.py +0 -333
edsl/scenarios/ScenarioImageMixin.py +0 -100
{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/LICENSE +0 -0
{edsl-0.1.33.dev2.dist-info → edsl-0.1.34.dist-info}/WHEEL +0 -0

edsl/scenarios/ScenarioList.py CHANGED Viewed

@@ -39,6 +39,15 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
             super().__init__([])
         self.codebook = codebook or {}
+    @property
+    def has_jinja_braces(self) -> bool:
+        """Check if the ScenarioList has Jinja braces."""
+        return any([scenario.has_jinja_braces for scenario in self])
+    def convert_jinja_braces(self) -> ScenarioList:
+        """Convert Jinja braces to Python braces."""
+        return ScenarioList([scenario.convert_jinja_braces() for scenario in self])
     def give_valid_names(self) -> ScenarioList:
         """Give valid names to the scenario keys.
@@ -273,6 +282,10 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         for s in data["scenarios"]:
             _ = s.pop("edsl_version")
             _ = s.pop("edsl_class_name")
+        for scenario in data["scenarios"]:
+            for key, value in scenario.items():
+                if hasattr(value, "to_dict"):
+                    data[key] = value.to_dict()
         return data_to_html(data)
     def tally(self, field) -> dict:
@@ -517,7 +530,9 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         return ScenarioList([scenario.drop(fields) for scenario in self.data])
     @classmethod
-    def from_list(cls, name, values) -> ScenarioList:
+    def from_list(
+        cls, name: str, values: list, func: Optional[Callable] = None
+    ) -> ScenarioList:
         """Create a ScenarioList from a list of values.
         Example:
@@ -525,7 +540,9 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         >>> ScenarioList.from_list('name', ['Alice', 'Bob'])
         ScenarioList([Scenario({'name': 'Alice'}), Scenario({'name': 'Bob'})])
         """
-        return cls([Scenario({name: value}) for value in values])
+        if not func:
+            func = lambda x: x
+        return cls([Scenario({name: func(value)}) for value in values])
     def to_dataset(self) -> "Dataset":
         """

edsl/scenarios/ScenarioListPdfMixin.py CHANGED Viewed

@@ -1,15 +1,161 @@
 import fitz  # PyMuPDF
 import os
+import copy
 import subprocess
+import requests
+import tempfile
+import os
+# import urllib.parse as urlparse
+from urllib.parse import urlparse
 # from edsl import Scenario
+import requests
+import re
+import tempfile
+import os
+import atexit
+from urllib.parse import urlparse, parse_qs
+class GoogleDriveDownloader:
+    _temp_dir = None
+    _temp_file_path = None
+    @classmethod
+    def fetch_from_drive(cls, url, filename=None):
+        # Extract file ID from the URL
+        file_id = cls._extract_file_id(url)
+        if not file_id:
+            raise ValueError("Invalid Google Drive URL")
+        # Construct the download URL
+        download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
+        # Send a GET request to the URL
+        session = requests.Session()
+        response = session.get(download_url, stream=True)
+        response.raise_for_status()
+        # Check for large file download prompt
+        for key, value in response.cookies.items():
+            if key.startswith("download_warning"):
+                params = {"id": file_id, "confirm": value}
+                response = session.get(download_url, params=params, stream=True)
+                break
+        # Create a temporary file to save the download
+        if not filename:
+            filename = "downloaded_file"
+        if cls._temp_dir is None:
+            cls._temp_dir = tempfile.TemporaryDirectory()
+            atexit.register(cls._cleanup)
+        cls._temp_file_path = os.path.join(cls._temp_dir.name, filename)
+        # Write the content to the temporary file
+        with open(cls._temp_file_path, "wb") as f:
+            for chunk in response.iter_content(32768):
+                if chunk:
+                    f.write(chunk)
+        print(f"File saved to: {cls._temp_file_path}")
+        return cls._temp_file_path
+    @staticmethod
+    def _extract_file_id(url):
+        # Try to extract file ID from '/file/d/' format
+        file_id_match = re.search(r"/d/([a-zA-Z0-9-_]+)", url)
+        if file_id_match:
+            return file_id_match.group(1)
+        # If not found, try to extract from 'open?id=' format
+        parsed_url = urlparse(url)
+        query_params = parse_qs(parsed_url.query)
+        if "id" in query_params:
+            return query_params["id"][0]
+        return None
+    @classmethod
+    def _cleanup(cls):
+        if cls._temp_dir:
+            cls._temp_dir.cleanup()
+    @classmethod
+    def get_temp_file_path(cls):
+        return cls._temp_file_path
+def fetch_and_save_pdf(url, filename):
+    # Send a GET request to the URL
+    response = requests.get(url)
+    # Check if the request was successful
+    response.raise_for_status()
+    # Create a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Construct the full path for the file
+        temp_file_path = os.path.join(temp_dir, filename)
+        # Write the content to the temporary file
+        with open(temp_file_path, "wb") as file:
+            file.write(response.content)
+        print(f"PDF saved to: {temp_file_path}")
+        # Here you can perform operations with the file
+        # The file will be automatically deleted when you exit this block
+    return temp_file_path
+# Example usage:
+# url = "https://example.com/sample.pdf"
+# fetch_and_save_pdf(url, "sample.pdf")
 class ScenarioListPdfMixin:
     @classmethod
-    def from_pdf(cls, filename):
-        scenarios = list(cls.extract_text_from_pdf(filename))
-        return cls(scenarios)
+    def from_pdf(cls, filename_or_url, collapse_pages=False):
+        # Check if the input is a URL
+        if cls.is_url(filename_or_url):
+            # Check if it's a Google Drive URL
+            if "drive.google.com" in filename_or_url:
+                temp_filename = GoogleDriveDownloader.fetch_from_drive(
+                    filename_or_url, "temp_pdf.pdf"
+                )
+            else:
+                # For other URLs, use the previous fetch_and_save_pdf function
+                temp_filename = fetch_and_save_pdf(filename_or_url, "temp_pdf.pdf")
+            scenarios = list(cls.extract_text_from_pdf(temp_filename))
+        else:
+            # If it's not a URL, assume it's a local file path
+            scenarios = list(cls.extract_text_from_pdf(filename_or_url))
+        if not collapse_pages:
+            return cls(scenarios)
+        else:
+            txt = ""
+            for scenario in scenarios:
+                txt += scenario["text"]
+            from edsl.scenarios import Scenario
+            base_scenario = copy.copy(scenarios[0])
+            base_scenario["text"] = txt
+        return base_scenario
+    @staticmethod
+    def is_url(string):
+        try:
+            result = urlparse(string)
+            return all([result.scheme, result.netloc])
+        except ValueError:
+            return False
     @classmethod
     def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
@@ -38,7 +184,7 @@ class ScenarioListPdfMixin:
                 scenario = Scenario._from_filepath_image(image_path)
                 scenarios.append(scenario)
-            print(f"Saved {len(images)} pages as images in {output_folder}")
+            # print(f"Saved {len(images)} pages as images in {output_folder}")
             return cls(scenarios)
     @staticmethod

edsl/study/Study.py CHANGED Viewed

@@ -469,6 +469,38 @@ class Study:
         coop = Coop()
         return coop.create(self, description=self.description)
+    def delete_object(self, identifier: Union[str, UUID]):
+        """
+        Delete an EDSL object from the study.
+        :param identifier: Either the variable name or the hash of the object to delete
+        :raises ValueError: If the object is not found in the study
+        """
+        if isinstance(identifier, str):
+            # If identifier is a variable name or a string representation of UUID
+            for hash, obj_entry in list(self.objects.items()):
+                if obj_entry.variable_name == identifier or hash == identifier:
+                    del self.objects[hash]
+                    self._create_mapping_dicts()  # Update internal mappings
+                    if self.verbose:
+                        print(f"Deleted object with identifier: {identifier}")
+                    return
+            raise ValueError(f"No object found with identifier: {identifier}")
+        elif isinstance(identifier, UUID):
+            # If identifier is a UUID object
+            hash_str = str(identifier)
+            if hash_str in self.objects:
+                del self.objects[hash_str]
+                self._create_mapping_dicts()  # Update internal mappings
+                if self.verbose:
+                    print(f"Deleted object with hash: {hash_str}")
+                return
+            raise ValueError(f"No object found with hash: {hash_str}")
+        else:
+            raise TypeError(
+                "Identifier must be either a string (variable name or hash) or a UUID object"
+            )
     @classmethod
     def pull(cls, uuid: Optional[Union[str, UUID]] = None, url: Optional[str] = None):
         """Pull the object from coop."""

edsl/surveys/DAG.py CHANGED Viewed

@@ -11,6 +11,7 @@ class DAG(UserDict):
         """Initialize the DAG class."""
         super().__init__(data)
         self.reverse_mapping = self._create_reverse_mapping()
+        self.validate_no_cycles()
     def _create_reverse_mapping(self):
         """
@@ -73,12 +74,73 @@ class DAG(UserDict):
         # else:
         #     return DAG(d)
+    def remove_node(self, node: int) -> None:
+        """Remove a node and all its connections from the DAG."""
+        self.pop(node, None)
+        for connections in self.values():
+            connections.discard(node)
+        # Adjust remaining nodes if necessary
+        self._adjust_nodes_after_removal(node)
+    def _adjust_nodes_after_removal(self, removed_node: int) -> None:
+        """Adjust node indices after a node is removed."""
+        new_dag = {}
+        for node, connections in self.items():
+            new_node = node if node < removed_node else node - 1
+            new_connections = {c if c < removed_node else c - 1 for c in connections}
+            new_dag[new_node] = new_connections
+        self.clear()
+        self.update(new_dag)
     @classmethod
     def example(cls):
         """Return an example of the `DAG`."""
         data = {"a": ["b", "c"], "b": ["d"], "c": [], "d": []}
         return cls(data)
+    def detect_cycles(self):
+        """
+        Detect cycles in the DAG using depth-first search.
+        :return: A list of cycles if any are found, otherwise an empty list.
+        """
+        visited = set()
+        path = []
+        cycles = []
+        def dfs(node):
+            if node in path:
+                cycle = path[path.index(node) :]
+                cycles.append(cycle + [node])
+                return
+            if node in visited:
+                return
+            visited.add(node)
+            path.append(node)
+            for child in self.get(node, []):
+                dfs(child)
+            path.pop()
+        for node in self:
+            if node not in visited:
+                dfs(node)
+        return cycles
+    def validate_no_cycles(self):
+        """
+        Validate that the DAG does not contain any cycles.
+        :raises ValueError: If cycles are detected in the DAG.
+        """
+        cycles = self.detect_cycles()
+        if cycles:
+            raise ValueError(f"Cycles detected in the DAG: {cycles}")
 if __name__ == "__main__":
     import doctest

edsl/surveys/MemoryPlan.py CHANGED Viewed

@@ -211,6 +211,32 @@ class MemoryPlan(UserDict):
         mp.add_single_memory("q1", "q0")
         return mp
+    def remove_question(self, question_name: str) -> None:
+        """Remove a question from the memory plan.
+        :param question_name: The name of the question to remove.
+        """
+        self._check_valid_question_name(question_name)
+        # Remove the question from survey_question_names and question_texts
+        index = self.survey_question_names.index(question_name)
+        self.survey_question_names.pop(index)
+        self.question_texts.pop(index)
+        # Remove the question from the memory plan if it's a focal question
+        self.pop(question_name, None)
+        # Remove the question from all memories where it appears as a prior question
+        for focal_question, memory in self.items():
+            memory.remove_prior_question(question_name)
+        # Update the DAG
+        self.dag.remove_node(index)
+    def remove_prior_question(self, question_name: str) -> None:
+        """Remove a prior question from the memory."""
+        self.prior_questions = [q for q in self.prior_questions if q != question_name]
 if __name__ == "__main__":
     import doctest

edsl/surveys/Rule.py CHANGED Viewed

@@ -18,6 +18,7 @@ with a low (-1) priority.
 """
 import ast
+import random
 from typing import Any, Union, List
 from jinja2 import Template
@@ -37,9 +38,29 @@ from edsl.utilities.ast_utilities import extract_variable_names
 from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
+class QuestionIndex:
+    def __set_name__(self, owner, name):
+        self.name = f"_{name}"
+    def __get__(self, obj, objtype=None):
+        return getattr(obj, self.name)
+    def __set__(self, obj, value):
+        if not isinstance(value, (int, EndOfSurvey.__class__)):
+            raise ValueError(f"{self.name} must be an integer or EndOfSurvey")
+        if self.name == "_next_q" and isinstance(value, int):
+            current_q = getattr(obj, "_current_q")
+            if value <= current_q:
+                raise ValueError("next_q must be greater than current_q")
+        setattr(obj, self.name, value)
 class Rule:
     """The Rule class defines a "rule" for determining the next question presented to an agent."""
+    current_q = QuestionIndex()
+    next_q = QuestionIndex()
     # Not implemented but nice to have:
     # We could potentially use the question pydantic models to check for rule conflicts, as
     # they define the potential trees through a survey.
@@ -74,6 +95,10 @@ class Rule:
         self.priority = priority
         self.before_rule = before_rule
+        if not self.next_q == EndOfSurvey:
+            if self.next_q <= self.current_q:
+                raise SurveyRuleSendsYouBackwardsError
         if not self.next_q == EndOfSurvey and self.current_q > self.next_q:
             raise SurveyRuleSendsYouBackwardsError
@@ -254,8 +279,16 @@ class Rule:
             msg = f"""Exception in evaluation: {e}. The expression is: {self.expression}. The current info env trying to substitute in is: {current_info_env}. After the substition, the expression was: {to_evaluate}."""
             raise SurveyRuleCannotEvaluateError(msg)
+        random_functions = {
+            "randint": random.randint,
+            "choice": random.choice,
+            "random": random.random,
+            "uniform": random.uniform,
+            # Add any other random functions you want to allow
+        }
         try:
-            return EvalWithCompoundTypes().eval(to_evaluate)
+            return EvalWithCompoundTypes(functions=random_functions).eval(to_evaluate)
         except Exception as e:
             msg = f"""Exception in evaluation: {e}. The expression is: {self.expression}. The current info env trying to substitute in is: {current_info_env}. After the substition, the expression was: {to_evaluate}."""
             raise SurveyRuleCannotEvaluateError(msg)

edsl/surveys/RuleCollection.py CHANGED Viewed

@@ -120,13 +120,13 @@ class RuleCollection(UserList):
         :param answers: The answers to the survey questions.
         >>> rule_collection = RuleCollection()
-        >>> r = Rule(current_q=1, expression="True", next_q=1, priority=1, question_name_to_index={}, before_rule = True)
+        >>> r = Rule(current_q=1, expression="True", next_q=2, priority=1, question_name_to_index={}, before_rule = True)
         >>> rule_collection.add_rule(r)
         >>> rule_collection.skip_question_before_running(1, {})
         True
         >>> rule_collection = RuleCollection()
-        >>> r = Rule(current_q=1, expression="False", next_q=1, priority=1, question_name_to_index={}, before_rule = True)
+        >>> r = Rule(current_q=1, expression="False", next_q=2, priority=1, question_name_to_index={}, before_rule = True)
         >>> rule_collection.add_rule(r)
         >>> rule_collection.skip_question_before_running(1, {})
         False
@@ -172,7 +172,8 @@ class RuleCollection(UserList):
     def next_question(self, q_now: int, answers: dict[str, Any]) -> NextQuestion:
         """Find the next question by index, given the rule collection.
-        This rule is applied after the question is asked.
+        This rule is applied after the question is answered.
         :param q_now: The current question index.
         :param answers: The answers to the survey questions so far, including the current question.
@@ -182,8 +183,17 @@ class RuleCollection(UserList):
         NextQuestion(next_q=3, num_rules_found=2, expressions_evaluating_to_true=1, priority=1)
         """
-        # What rules apply at the current node?
+        # # is this the first question? If it is, we need to check if it should be skipped.
+        # if q_now == 0:
+        #     if self.skip_question_before_running(q_now, answers):
+        #         return NextQuestion(
+        #             next_q=q_now + 1,
+        #             num_rules_found=0,
+        #             expressions_evaluating_to_true=0,
+        #             priority=-1,
+        #         )
+        # breakpoint()
         expressions_evaluating_to_true = 0
         next_q = None
         highest_priority = -2  # start with -2 to 'pick up' the default rule added
@@ -205,6 +215,12 @@ class RuleCollection(UserList):
                 f"No rules found for question {q_now}"
             )
+        # breakpoint()
+        ## Now we need to check if the *next question* has any 'before; rules that we should follow
+        for rule in self.applicable_rules(next_q, before_rule=True):
+            if rule.evaluate(answers):  # rule evaluates to True
+                return self.next_question(next_q, answers)
         return NextQuestion(
             next_q, num_rules_found, expressions_evaluating_to_true, highest_priority
         )
@@ -305,6 +321,40 @@ class RuleCollection(UserList):
         return DAG(dict(sorted(children_to_parents.items())))
+    def detect_cycles(self):
+        """
+        Detect cycles in the survey rules using depth-first search.
+        :return: A list of cycles if any are found, otherwise an empty list.
+        """
+        dag = self.dag
+        visited = set()
+        path = []
+        cycles = []
+        def dfs(node):
+            if node in path:
+                cycle = path[path.index(node) :]
+                cycles.append(cycle + [node])
+                return
+            if node in visited:
+                return
+            visited.add(node)
+            path.append(node)
+            for child in dag.get(node, []):
+                dfs(child)
+            path.pop()
+        for node in dag:
+            if node not in visited:
+                dfs(node)
+        return cycles
     @classmethod
     def example(cls):
         """Create an example RuleCollection object."""

edsl 0.1.33.dev2__py3-none-any.whl → 0.1.34__py3-none-any.whl

edsl 0.1.33.dev2py3-none-any.whl → 0.1.34py3-none-any.whl