PyPI - edsl - Versions diffs - 0.1.45__py3-none-any.whl → 0.1.47__py3-none-any.whl - Mend

edsl 0.1.45py3-none-any.whl → 0.1.47py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

edsl/Base.py +87 -16
edsl/__version__.py +1 -1
edsl/agents/PromptConstructor.py +26 -79
edsl/agents/QuestionInstructionPromptBuilder.py +70 -32
edsl/agents/QuestionTemplateReplacementsBuilder.py +12 -2
edsl/coop/coop.py +289 -147
edsl/data/Cache.py +2 -0
edsl/data/CacheEntry.py +10 -2
edsl/data/RemoteCacheSync.py +10 -9
edsl/inference_services/AvailableModelFetcher.py +1 -1
edsl/inference_services/PerplexityService.py +9 -5
edsl/jobs/AnswerQuestionFunctionConstructor.py +12 -1
edsl/jobs/Jobs.py +35 -17
edsl/jobs/JobsComponentConstructor.py +2 -1
edsl/jobs/JobsPrompts.py +49 -26
edsl/jobs/JobsRemoteInferenceHandler.py +4 -5
edsl/jobs/data_structures.py +3 -0
edsl/jobs/interviews/Interview.py +6 -3
edsl/language_models/LanguageModel.py +7 -1
edsl/questions/QuestionBase.py +5 -0
edsl/questions/question_base_gen_mixin.py +2 -0
edsl/questions/question_registry.py +6 -7
edsl/results/DatasetExportMixin.py +124 -6
edsl/results/Results.py +59 -0
edsl/scenarios/FileStore.py +112 -7
edsl/scenarios/ScenarioList.py +283 -21
edsl/study/Study.py +2 -2
edsl/surveys/Survey.py +15 -20
{edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/METADATA +4 -3
{edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/RECORD +32 -44
edsl/auto/AutoStudy.py +0 -130
edsl/auto/StageBase.py +0 -243
edsl/auto/StageGenerateSurvey.py +0 -178
edsl/auto/StageLabelQuestions.py +0 -125
edsl/auto/StagePersona.py +0 -61
edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
edsl/auto/StagePersonaDimensionValues.py +0 -74
edsl/auto/StagePersonaDimensions.py +0 -69
edsl/auto/StageQuestions.py +0 -74
edsl/auto/SurveyCreatorPipeline.py +0 -21
edsl/auto/utilities.py +0 -218
edsl/base/Base.py +0 -279
{edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/LICENSE +0 -0
{edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/WHEEL +0 -0

edsl/questions/question_registry.py CHANGED Viewed

@@ -60,26 +60,25 @@ class Question(metaclass=Meta):
         return q.example()
     @classmethod
-    def pull(cls, uuid: Optional[Union[str, UUID]] = None, url: Optional[str] = None):
+    def pull(cls, url_or_uuid: Union[str, UUID]):
         """Pull the object from coop."""
         from edsl.coop import Coop
         coop = Coop()
-        return coop.get(uuid, url, "question")
+        return coop.get(url_or_uuid, "question")
     @classmethod
-    def delete(cls, uuid: Optional[Union[str, UUID]] = None, url: Optional[str] = None):
+    def delete(cls, url_or_uuid: Union[str, UUID]):
         """Delete the object from coop."""
         from edsl.coop import Coop
         coop = Coop()
-        return coop.delete(uuid, url)
+        return coop.delete(url_or_uuid)
     @classmethod
     def patch(
         cls,
-        uuid: Optional[Union[str, UUID]] = None,
-        url: Optional[str] = None,
+        url_or_uuid: Union[str, UUID],
         description: Optional[str] = None,
         value: Optional[Any] = None,
         visibility: Optional[str] = None,
@@ -88,7 +87,7 @@ class Question(metaclass=Meta):
         from edsl.coop import Coop
         coop = Coop()
-        return coop.patch(uuid, url, description, value, visibility)
+        return coop.patch(url_or_uuid, description, value, visibility)
     @classmethod
     def list_question_types(cls):

edsl/results/DatasetExportMixin.py CHANGED Viewed

@@ -505,8 +505,9 @@ class DatasetExportMixin:
         from edsl.utilities.PrettyList import PrettyList
-        return PrettyList(list_to_return)
+        #return PrettyList(list_to_return)
+        return list_to_return
     def html(
         self,
         filename: Optional[str] = None,
@@ -735,11 +736,14 @@ class DatasetExportMixin:
         """
         Flatten a field containing a list of dictionaries into separate fields.
-        For example, if a dataset contains:
-        [{'data': [{'a': 1}, {'b': 2}], 'other': ['x', 'y']}]
+        >>> from edsl.results.Dataset import Dataset
+        >>> Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5] }]).flatten('a')
+        Dataset([{'c': [5]}, {'a.a': [1]}, {'a.b': [2]}])
+        >>> Dataset([{'answer.example': [{'a': 1, 'b': 2}]}, {'c': [5] }]).flatten('answer.example')
+        Dataset([{'c': [5]}, {'answer.example.a': [1]}, {'answer.example.b': [2]}])
-        After d.flatten('data'), it should become:
-        [{'other': ['x', 'y'], 'data.a': [1, None], 'data.b': [None, 2]}]
         Args:
             field: The field to flatten
@@ -753,6 +757,24 @@ class DatasetExportMixin:
         # Ensure the dataset isn't empty
         if not self.data:
             return self.copy()
+        # Find all columns that contain the field
+        matching_entries = []
+        for entry in self.data:
+            col_name = next(iter(entry.keys()))
+            if field == col_name or (
+                '.' in col_name and
+                (col_name.endswith('.' + field) or col_name.startswith(field + '.'))
+            ):
+                matching_entries.append(entry)
+        # Check if the field is ambiguous
+        if len(matching_entries) > 1:
+            matching_cols = [next(iter(entry.keys())) for entry in matching_entries]
+            raise ValueError(
+                f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
+                f"Please specify the full column name to flatten."
+            )
         # Get the number of observations
         num_observations = self.num_observations()
@@ -882,6 +904,102 @@ class DatasetExportMixin:
             result.data.pop(field_index)
         return result
+    def drop(self, field_name):
+        """
+        Returns a new Dataset with the specified field removed.
+        Args:
+            field_name (str): The name of the field to remove.
+        Returns:
+            Dataset: A new Dataset instance without the specified field.
+        Raises:
+            KeyError: If the field_name doesn't exist in the dataset.
+        Examples:
+            >>> from edsl.results.Dataset import Dataset
+            >>> d = Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
+            >>> d.drop('a')
+            Dataset([{'b': [4, 5, 6]}])
+            >>> d.drop('c')
+            Traceback (most recent call last):
+            ...
+            KeyError: "Field 'c' not found in dataset"
+        """
+        from edsl.results.Dataset import Dataset
+        # Check if field exists in the dataset
+        if field_name not in self.relevant_columns():
+            raise KeyError(f"Field '{field_name}' not found in dataset")
+        # Create a new dataset without the specified field
+        new_data = [entry for entry in self.data if field_name not in entry]
+        return Dataset(new_data)
+    def remove_prefix(self):
+        """Returns a new Dataset with the prefix removed from all column names.
+        The prefix is defined as everything before the first dot (.) in the column name.
+        If removing prefixes would result in duplicate column names, an exception is raised.
+        Returns:
+            Dataset: A new Dataset with prefixes removed from column names
+        Raises:
+            ValueError: If removing prefixes would result in duplicate column names
+        Examples:
+            >>> from edsl.results import Results
+            >>> r = Results.example()
+            >>> r.select('how_feeling', 'how_feeling_yesterday').relevant_columns()
+            ['answer.how_feeling', 'answer.how_feeling_yesterday']
+            >>> r.select('how_feeling', 'how_feeling_yesterday').remove_prefix().relevant_columns()
+            ['how_feeling', 'how_feeling_yesterday']
+            >>> from edsl.results.Dataset import Dataset
+            >>> d = Dataset([{'a.x': [1, 2, 3]}, {'b.x': [4, 5, 6]}])
+            >>> d.remove_prefix()
+            Traceback (most recent call last):
+            ...
+            ValueError: Removing prefixes would result in duplicate column names: ['x']
+        """
+        from edsl.results.Dataset import Dataset
+        # Get all column names
+        columns = self.relevant_columns()
+        # Extract the unprefixed names
+        unprefixed = {}
+        duplicates = set()
+        for col in columns:
+            if '.' in col:
+                unprefixed_name = col.split('.', 1)[1]
+                if unprefixed_name in unprefixed:
+                    duplicates.add(unprefixed_name)
+                unprefixed[unprefixed_name] = col
+            else:
+                # For columns without a prefix, keep them as is
+                unprefixed[col] = col
+        # Check for duplicates
+        if duplicates:
+            raise ValueError(f"Removing prefixes would result in duplicate column names: {sorted(list(duplicates))}")
+        # Create a new dataset with unprefixed column names
+        new_data = []
+        for entry in self.data:
+            key, values = list(entry.items())[0]
+            if '.' in key:
+                new_key = key.split('.', 1)[1]
+            else:
+                new_key = key
+            new_data.append({new_key: values})
+        return Dataset(new_data)
 if __name__ == "__main__":

edsl/results/Results.py CHANGED Viewed

@@ -1379,6 +1379,65 @@ class Results(UserList, Mixins, Base):
             raise ResultsError(f"Failed to fetch remote results: {str(e)}")
+    def spot_issues(self, models: Optional[ModelList] = None) -> Results:
+        """Run a survey to spot issues and suggest improvements for prompts that had no model response, returning a new Results object.
+        Future version: Allow user to optionally pass a list of questions to review, regardless of whether they had a null model response.
+        """
+        from edsl.questions import QuestionFreeText, QuestionDict
+        from edsl.surveys import Survey
+        from edsl.scenarios import Scenario, ScenarioList
+        from edsl.language_models import Model, ModelList
+        import pandas as pd
+        df = self.select("agent.*", "scenario.*", "answer.*", "raw_model_response.*", "prompt.*").to_pandas()
+        scenario_list = []
+        for _, row in df.iterrows():
+            for col in df.columns:
+                if col.endswith("_raw_model_response") and pd.isna(row[col]):
+                    q = col.split("_raw_model_response")[0].replace("raw_model_response.", "")
+                    s = Scenario({
+                        "original_question": q,
+                        "original_agent_index": row["agent.agent_index"],
+                        "original_scenario_index": row["scenario.scenario_index"],
+                        "original_prompts": f"User prompt: {row[f'prompt.{q}_user_prompt']}\nSystem prompt: {row[f'prompt.{q}_system_prompt']}"
+                    })
+                    scenario_list.append(s)
+        sl = ScenarioList(set(scenario_list))
+        q1 = QuestionFreeText(
+            question_name = "issues",
+            question_text = """
+            The following prompts generated a bad or null response: '{{ original_prompts }}'
+            What do you think was the likely issue(s)?
+            """
+        )
+        q2 = QuestionDict(
+            question_name = "revised",
+            question_text = """
+            The following prompts generated a bad or null response: '{{ original_prompts }}'
+            You identified the issue(s) as '{{ issues.answer }}'.
+            Please revise the prompts to address the issue(s).
+            """,
+            answer_keys = ["revised_user_prompt", "revised_system_prompt"]
+        )
+        survey = Survey(questions = [q1, q2])
+        if models is not None:
+            if not isinstance(models, ModelList):
+                raise ResultsError("models must be a ModelList")
+            results = survey.by(sl).by(models).run()
+        else:
+            results = survey.by(sl).run() # use the default model
+        return results
 def main():  # pragma: no cover
     """Call the OpenAI API credits."""
     from edsl.results.Results import Results

edsl/scenarios/FileStore.py CHANGED Viewed

@@ -11,6 +11,10 @@ from edsl.utilities.remove_edsl_version import remove_edsl_version
 from edsl.scenarios.file_methods import FileMethods
 from typing import Union
 from uuid import UUID
+import time
+from typing import Dict, Any, IO, Optional, List, Union, Literal
 class FileStore(Scenario):
     __documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
@@ -30,7 +34,7 @@ class FileStore(Scenario):
             path = kwargs["filename"]
         # Check if path is a URL and handle download
-        if path and (path.startswith('http://') or path.startswith('https://')):
+        if path and (path.startswith("http://") or path.startswith("https://")):
             temp_filestore = self.from_url(path, mime_type=mime_type)
             path = temp_filestore._path
             mime_type = temp_filestore.mime_type
@@ -91,6 +95,102 @@ class FileStore(Scenario):
         else:
             print(f"Example for {example_type} is not supported.")
+    @classmethod
+    async def _async_screenshot(
+        cls,
+        url: str,
+        full_page: bool = True,
+        wait_until: Literal[
+            "load", "domcontentloaded", "networkidle", "commit"
+        ] = "networkidle",
+        download_path: Optional[str] = None,
+    ) -> "FileStore":
+        """Async version of screenshot functionality"""
+        try:
+            from playwright.async_api import async_playwright
+        except ImportError:
+            raise ImportError(
+                "Screenshot functionality requires additional dependencies.\n"
+                "Install them with: pip install 'edsl[screenshot]'"
+            )
+        if download_path is None:
+            download_path = os.path.join(
+                os.getcwd(), f"screenshot_{int(time.time())}.png"
+            )
+        async with async_playwright() as p:
+            browser = await p.chromium.launch()
+            page = await browser.new_page()
+            await page.goto(url, wait_until=wait_until)
+            await page.screenshot(path=download_path, full_page=full_page)
+            await browser.close()
+        return cls(download_path, mime_type="image/png")
+    @classmethod
+    def from_url_screenshot(cls, url: str, **kwargs) -> "FileStore":
+        """Synchronous wrapper for screenshot functionality"""
+        import asyncio
+        try:
+            # Try using get_event_loop first (works in regular Python)
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            # If we're in IPython/Jupyter, create a new loop
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+        try:
+            return loop.run_until_complete(cls._async_screenshot(url, **kwargs))
+        finally:
+            if not loop.is_running():
+                loop.close()
+    @classmethod
+    def batch_screenshots(cls, urls: List[str], **kwargs) -> "ScenarioList":
+        """
+        Take screenshots of multiple URLs concurrently.
+        Args:
+            urls: List of URLs to screenshot
+            **kwargs: Additional arguments passed to screenshot function (full_page, wait_until, etc.)
+        Returns:
+            ScenarioList containing FileStore objects with their corresponding URLs
+        """
+        from edsl import ScenarioList
+        try:
+            # Try using get_event_loop first (works in regular Python)
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            # If we're in IPython/Jupyter, create a new loop
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+        # Create tasks for all screenshots
+        tasks = [cls._async_screenshot(url, **kwargs) for url in urls]
+        try:
+            # Run all screenshots concurrently
+            results = loop.run_until_complete(
+                asyncio.gather(*tasks, return_exceptions=True)
+            )
+            # Filter out any errors and log them
+            successful_results = []
+            for url, result in zip(urls, results):
+                if isinstance(result, Exception):
+                    print(f"Failed to screenshot {url}: {result}")
+                else:
+                    successful_results.append(
+                        Scenario({"url": url, "screenshot": result})
+                    )
+            return ScenarioList(successful_results)
+        finally:
+            if not loop.is_running():
+                loop.close()
     @property
     def size(self) -> int:
         if self.base64_string != None:
@@ -273,12 +373,11 @@ class FileStore(Scenario):
         # raise TypeError("No text method found for this file type.")
     def push(
-        self,
-        description: Optional[str] = None,
+        self,
+        description: Optional[str] = None,
         alias: Optional[str] = None,
         visibility: Optional[str] = "unlisted",
         expected_parrot_url: Optional[str] = None,
     ) -> dict:
         """
         Push the object to Coop.
@@ -286,20 +385,26 @@ class FileStore(Scenario):
         :param visibility: The visibility of the object to push.
         """
         scenario_version = Scenario.from_dict(self.to_dict())
         if description is None:
             description = "File: " + self.path
-        info = scenario_version.push(description=description, visibility=visibility, expected_parrot_url=expected_parrot_url, alias=alias)
+        info = scenario_version.push(
+            description=description,
+            visibility=visibility,
+            expected_parrot_url=expected_parrot_url,
+            alias=alias,
+        )
         return info
     @classmethod
     def pull(cls, url_or_uuid: Union[str, UUID]) -> "FileStore":
         """
         Pull a FileStore object from Coop.
         Args:
             url_or_uuid: Either a UUID string or a URL pointing to the object
             expected_parrot_url: Optional URL for the Parrot server
         Returns:
             FileStore: The pulled FileStore object
         """

edsl 0.1.45__py3-none-any.whl → 0.1.47__py3-none-any.whl

edsl 0.1.45py3-none-any.whl → 0.1.47py3-none-any.whl