PyPI - edsl - Versions diffs - 0.1.39.dev2__py3-none-any.whl → 0.1.39.dev4__py3-none-any.whl - Mend

edsl 0.1.39.dev2py3-none-any.whl → 0.1.39.dev4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

edsl/Base.py +28 -0
edsl/__init__.py +1 -1
edsl/__version__.py +1 -1
edsl/agents/Agent.py +8 -16
edsl/agents/Invigilator.py +13 -14
edsl/agents/InvigilatorBase.py +4 -1
edsl/agents/PromptConstructor.py +42 -22
edsl/agents/QuestionInstructionPromptBuilder.py +1 -1
edsl/auto/AutoStudy.py +18 -5
edsl/auto/StageBase.py +53 -40
edsl/auto/StageQuestions.py +2 -1
edsl/auto/utilities.py +0 -6
edsl/coop/coop.py +21 -5
edsl/data/Cache.py +29 -18
edsl/data/CacheHandler.py +0 -2
edsl/data/RemoteCacheSync.py +154 -46
edsl/data/hack.py +10 -0
edsl/enums.py +7 -0
edsl/inference_services/AnthropicService.py +38 -16
edsl/inference_services/AvailableModelFetcher.py +7 -1
edsl/inference_services/GoogleService.py +5 -1
edsl/inference_services/InferenceServicesCollection.py +18 -2
edsl/inference_services/OpenAIService.py +46 -31
edsl/inference_services/TestService.py +1 -3
edsl/inference_services/TogetherAIService.py +5 -3
edsl/inference_services/data_structures.py +74 -2
edsl/jobs/AnswerQuestionFunctionConstructor.py +148 -113
edsl/jobs/FetchInvigilator.py +10 -3
edsl/jobs/InterviewsConstructor.py +6 -4
edsl/jobs/Jobs.py +299 -233
edsl/jobs/JobsChecks.py +2 -2
edsl/jobs/JobsPrompts.py +1 -1
edsl/jobs/JobsRemoteInferenceHandler.py +160 -136
edsl/jobs/async_interview_runner.py +138 -0
edsl/jobs/check_survey_scenario_compatibility.py +85 -0
edsl/jobs/data_structures.py +120 -0
edsl/jobs/interviews/Interview.py +80 -42
edsl/jobs/results_exceptions_handler.py +98 -0
edsl/jobs/runners/JobsRunnerAsyncio.py +87 -357
edsl/jobs/runners/JobsRunnerStatus.py +131 -164
edsl/jobs/tasks/TaskHistory.py +24 -3
edsl/language_models/LanguageModel.py +59 -4
edsl/language_models/ModelList.py +19 -8
edsl/language_models/__init__.py +1 -1
edsl/language_models/model.py +256 -0
edsl/language_models/repair.py +1 -1
edsl/questions/QuestionBase.py +35 -26
edsl/questions/QuestionBasePromptsMixin.py +1 -1
edsl/questions/QuestionBudget.py +1 -1
edsl/questions/QuestionCheckBox.py +2 -2
edsl/questions/QuestionExtract.py +5 -7
edsl/questions/QuestionFreeText.py +1 -1
edsl/questions/QuestionList.py +9 -15
edsl/questions/QuestionMatrix.py +1 -1
edsl/questions/QuestionMultipleChoice.py +1 -1
edsl/questions/QuestionNumerical.py +1 -1
edsl/questions/QuestionRank.py +1 -1
edsl/questions/SimpleAskMixin.py +1 -1
edsl/questions/__init__.py +1 -1
edsl/questions/data_structures.py +20 -0
edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +52 -49
edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +6 -18
edsl/questions/{ResponseValidatorFactory.py → response_validator_factory.py} +7 -1
edsl/results/DatasetExportMixin.py +60 -119
edsl/results/Result.py +109 -3
edsl/results/Results.py +50 -39
edsl/results/file_exports.py +252 -0
edsl/scenarios/ScenarioList.py +35 -7
edsl/surveys/Survey.py +71 -20
edsl/test_h +1 -0
edsl/utilities/gcp_bucket/example.py +50 -0
{edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev4.dist-info}/METADATA +2 -2
{edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev4.dist-info}/RECORD +85 -76
edsl/language_models/registry.py +0 -180
/edsl/agents/{QuestionOptionProcessor.py → question_option_processor.py} +0 -0
/edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +0 -0
/edsl/questions/{LoopProcessor.py → loop_processor.py} +0 -0
/edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +0 -0
/edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +0 -0
/edsl/results/{Selector.py → results_selector.py} +0 -0
/edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +0 -0
/edsl/scenarios/{DirectoryScanner.py → directory_scanner.py} +0 -0
/edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +0 -0
/edsl/scenarios/{ScenarioSelector.py → scenario_selector.py} +0 -0
{edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev4.dist-info}/LICENSE +0 -0
{edsl-0.1.39.dev2.dist-info → edsl-0.1.39.dev4.dist-info}/WHEEL +0 -0

edsl/results/Results.py CHANGED Viewed

@@ -9,6 +9,8 @@ import random
 from collections import UserList, defaultdict
 from typing import Optional, Callable, Any, Type, Union, List, TYPE_CHECKING
+from bisect import bisect_left
 from edsl.Base import Base
 from edsl.exceptions.results import (
     ResultsError,
@@ -24,7 +26,7 @@ if TYPE_CHECKING:
     from edsl.surveys.Survey import Survey
     from edsl.data.Cache import Cache
     from edsl.agents.AgentList import AgentList
-    from edsl.language_models.registry import Model
+    from edsl.language_models.model import Model
     from edsl.scenarios.ScenarioList import ScenarioList
     from edsl.results.Result import Result
     from edsl.jobs.tasks.TaskHistory import TaskHistory
@@ -33,7 +35,7 @@ if TYPE_CHECKING:
 from edsl.results.ResultsExportMixin import ResultsExportMixin
 from edsl.results.ResultsGGMixin import ResultsGGMixin
-from edsl.results.ResultsFetchMixin import ResultsFetchMixin
+from edsl.results.results_fetch_mixin import ResultsFetchMixin
 from edsl.utilities.remove_edsl_version import remove_edsl_version
@@ -136,7 +138,33 @@ class Results(UserList, Mixins, Base):
         }
         return d
-    def compute_job_cost(self, include_cached_responses_in_cost=False) -> float:
+    def insert(self, item):
+        item_order = getattr(item, "order", None)
+        if item_order is not None:
+            # Get list of orders, putting None at the end
+            orders = [getattr(x, "order", None) for x in self]
+            # Filter to just the non-None orders for bisect
+            sorted_orders = [x for x in orders if x is not None]
+            if sorted_orders:
+                index = bisect_left(sorted_orders, item_order)
+                # Account for any None values before this position
+                index += orders[:index].count(None)
+            else:
+                # If no sorted items yet, insert before any unordered items
+                index = 0
+            self.data.insert(index, item)
+        else:
+            # No order - append to end
+            self.data.append(item)
+    def append(self, item):
+        self.insert(item)
+    def extend(self, other):
+        for item in other:
+            self.insert(item)
+    def compute_job_cost(self, include_cached_responses_in_cost: bool = False) -> float:
         """
         Computes the cost of a completed job in USD.
         """
@@ -250,24 +278,6 @@ class Results(UserList, Mixins, Base):
         raise TypeError("Invalid argument type")
-    # def _update_results(self) -> None:
-    #     from edsl import Agent, Scenario
-    #     from edsl.language_models import LanguageModel
-    #     from edsl.results import Result
-    #     if self._job_uuid and len(self.data) < self._total_results:
-    #         results = [
-    #             Result(
-    #                 agent=Agent.from_dict(json.loads(r.agent)),
-    #                 scenario=Scenario.from_dict(json.loads(r.scenario)),
-    #                 model=LanguageModel.from_dict(json.loads(r.model)),
-    #                 iteration=1,
-    #                 answer=json.loads(r.answer),
-    #             )
-    #             for r in CRUD.read_results(self._job_uuid)
-    #         ]
-    #         self.data = results
     def __add__(self, other: Results) -> Results:
         """Add two Results objects together.
         They must have the same survey and created columns.
@@ -295,13 +305,10 @@ class Results(UserList, Mixins, Base):
         )
     def __repr__(self) -> str:
-        # import reprlib
         return f"Results(data = {self.data}, survey = {repr(self.survey)}, created_columns = {self.created_columns})"
     def table(
         self,
-        # selector_string: Optional[str] = "*.*",
         *fields,
         tablefmt: Optional[str] = None,
         pretty_labels: Optional[dict] = None,
@@ -340,11 +347,11 @@ class Results(UserList, Mixins, Base):
     def to_dict(
         self,
-        sort=False,
-        add_edsl_version=False,
-        include_cache=False,
-        include_task_history=False,
-        include_cache_info=True,
+        sort: bool = False,
+        add_edsl_version: bool = False,
+        include_cache: bool = False,
+        include_task_history: bool = False,
+        include_cache_info: bool = True,
     ) -> dict[str, Any]:
         from edsl.data.Cache import Cache
@@ -386,7 +393,7 @@ class Results(UserList, Mixins, Base):
         return d
-    def compare(self, other_results):
+    def compare(self, other_results: Results) -> dict:
         """
         Compare two Results objects and return the differences.
         """
@@ -404,7 +411,7 @@ class Results(UserList, Mixins, Base):
         }
     @property
-    def has_unfixed_exceptions(self):
+    def has_unfixed_exceptions(self) -> bool:
         return self.task_history.has_unfixed_exceptions
     def __hash__(self) -> int:
@@ -487,10 +494,6 @@ class Results(UserList, Mixins, Base):
             raise ResultsDeserializationError(f"Error in Results.from_dict: {e}")
         return results
-    ######################
-    ## Convenience methods
-    ## & Report methods
-    ######################
     @property
     def _key_to_data_type(self) -> dict[str, str]:
         """
@@ -689,13 +692,19 @@ class Results(UserList, Mixins, Base):
         """
         return self.data[0]
-    def answer_truncate(self, column: str, top_n=5, new_var_name=None) -> Results:
+    def answer_truncate(
+        self, column: str, top_n: int = 5, new_var_name: str = None
+    ) -> Results:
         """Create a new variable that truncates the answers to the top_n.
         :param column: The column to truncate.
         :param top_n: The number of top answers to keep.
         :param new_var_name: The name of the new variable. If None, it is the original name + '_truncated'.
+        Example:
+        >>> r = Results.example()
+        >>> r.answer_truncate('how_feeling', top_n = 2).select('how_feeling', 'how_feeling_truncated')
+        Dataset([{'answer.how_feeling': ['OK', 'Great', 'Terrible', 'OK']}, {'answer.how_feeling_truncated': ['Other', 'Other', 'Other', 'Other']}])
         """
@@ -916,7 +925,7 @@ class Results(UserList, Mixins, Base):
         n: Optional[int] = None,
         frac: Optional[float] = None,
         with_replacement: bool = True,
-        seed: Optional[str] = "edsl",
+        seed: Optional[str] = None,
     ) -> Results:
         """Sample the results.
@@ -931,7 +940,7 @@ class Results(UserList, Mixins, Base):
         >>> len(r.sample(2))
         2
         """
-        if seed != "edsl":
+        if seed:
             random.seed(seed)
         if n is None and frac is None:
@@ -969,7 +978,7 @@ class Results(UserList, Mixins, Base):
         Dataset([{'answer.how_feeling_yesterday': ['Great', 'Good', 'OK', 'Terrible']}])
         """
-        from edsl.results.Selector import Selector
+        from edsl.results.results_selector import Selector
         if len(self) == 0:
             raise Exception("No data to select from---the Results object is empty.")
@@ -984,6 +993,7 @@ class Results(UserList, Mixins, Base):
         return selector.select(*columns)
     def sort_by(self, *columns: str, reverse: bool = False) -> Results:
+        """Sort the results by one or more columns."""
         import warnings
         warnings.warn(
@@ -992,6 +1002,7 @@ class Results(UserList, Mixins, Base):
         return self.order_by(*columns, reverse=reverse)
     def _parse_column(self, column: str) -> tuple[str, str]:
+        """Parse a column name into a data type and key."""
         if "." in column:
             return column.split(".")
         return self._key_to_data_type[column], column

edsl/results/file_exports.py ADDED Viewed

@@ -0,0 +1,252 @@
+from abc import ABC, abstractmethod
+import io
+import csv
+import base64
+from typing import Optional, Union, Tuple, List, Any, Dict
+from openpyxl import Workbook
+from edsl.scenarios.FileStore import FileStore
+class FileExport(ABC):
+    def __init__(
+        self,
+        data: Any,
+        filename: Optional[str] = None,
+        remove_prefix: bool = False,
+        pretty_labels: Optional[Dict[str, str]] = None,
+    ):
+        self.data = data
+        self.filename = filename  # or self._get_default_filename()
+        self.remove_prefix = remove_prefix
+        self.pretty_labels = pretty_labels
+    @property
+    def mime_type(self) -> str:
+        """Return the MIME type for this export format."""
+        return self.__class__.mime_type
+    @property
+    def suffix(self) -> str:
+        """Return the file suffix for this format."""
+        return self.__class__.suffix
+    @property
+    def is_binary(self) -> bool:
+        """Whether the format is binary or text-based."""
+        return self.__class__.is_binary
+    def _get_default_filename(self) -> str:
+        """Generate default filename for this format."""
+        return f"results.{self.suffix}"
+    def _create_filestore(self, data: Union[str, bytes]) -> "FileStore":
+        """Create a FileStore instance with encoded data."""
+        if isinstance(data, str):
+            base64_string = base64.b64encode(data.encode()).decode()
+        else:
+            base64_string = base64.b64encode(data).decode()
+        from edsl.scenarios.FileStore import FileStore
+        path = self.filename or self._get_default_filename()
+        fs = FileStore(
+            path=path,
+            mime_type=self.mime_type,
+            binary=self.is_binary,
+            suffix=self.suffix,
+            base64_string=base64_string,
+        )
+        if self.filename is not None:
+            fs.write(self.filename)
+            return None
+        return fs
+    @abstractmethod
+    def format_data(self) -> Union[str, bytes]:
+        """Convert the input data to the target format."""
+        pass
+    def export(self) -> Optional["FileStore"]:
+        """Export the data to a FileStore instance."""
+        formatted_data = self.format_data()
+        return self._create_filestore(formatted_data)
+class JSONLExport(FileExport):
+    mime_type = "application/jsonl"
+    suffix = "jsonl"
+    is_binary = False
+    def format_data(self) -> str:
+        output = io.StringIO()
+        for entry in self.data:
+            key, values = list(entry.items())[0]
+            output.write(f'{{"{key}": {values}}}\n')
+        return output.getvalue()
+class TabularExport(FileExport, ABC):
+    """Base class for exports that use tabular data."""
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.header, self.rows = self.data._get_tabular_data(
+            remove_prefix=self.remove_prefix, pretty_labels=self.pretty_labels
+        )
+class CSVExport(TabularExport):
+    mime_type = "text/csv"
+    suffix = "csv"
+    is_binary = False
+    def format_data(self) -> str:
+        output = io.StringIO()
+        writer = csv.writer(output)
+        writer.writerow(self.header)
+        writer.writerows(self.rows)
+        return output.getvalue()
+class ExcelExport(TabularExport):
+    mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    suffix = "xlsx"
+    is_binary = True
+    def __init__(self, *args, sheet_name: Optional[str] = None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.sheet_name = sheet_name or "Results"
+    def format_data(self) -> bytes:
+        wb = Workbook()
+        ws = wb.active
+        ws.title = self.sheet_name
+        # Write header
+        for col, value in enumerate(self.header, 1):
+            ws.cell(row=1, column=col, value=value)
+        # Write data rows
+        for row_idx, row_data in enumerate(self.rows, 2):
+            for col, value in enumerate(row_data, 1):
+                ws.cell(row=row_idx, column=col, value=value)
+        # Save to bytes buffer
+        buffer = io.BytesIO()
+        wb.save(buffer)
+        buffer.seek(0)
+        return buffer.getvalue()
+import sqlite3
+from typing import Any
+class SQLiteExport(TabularExport):
+    mime_type = "application/x-sqlite3"
+    suffix = "db"
+    is_binary = True
+    def __init__(
+        self, *args, table_name: str = "results", if_exists: str = "replace", **kwargs
+    ):
+        """
+        Initialize SQLite export.
+        Args:
+            table_name: Name of the table to create
+            if_exists: How to handle existing table ('fail', 'replace', or 'append')
+        """
+        super().__init__(*args, **kwargs)
+        self.table_name = table_name
+        self.if_exists = if_exists
+    def _get_column_types(self) -> list[tuple[str, str]]:
+        """Infer SQL column types from the data."""
+        column_types = []
+        # Check first row of data for types
+        if self.rows:
+            first_row = self.rows[0]
+            for header, value in zip(self.header, first_row):
+                if isinstance(value, bool):
+                    sql_type = "BOOLEAN"
+                elif isinstance(value, int):
+                    sql_type = "INTEGER"
+                elif isinstance(value, float):
+                    sql_type = "REAL"
+                else:
+                    sql_type = "TEXT"
+                column_types.append((header, sql_type))
+        else:
+            # If no data, default to TEXT
+            column_types = [(header, "TEXT") for header in self.header]
+        return column_types
+    def _create_table(self, cursor: sqlite3.Cursor) -> None:
+        """Create the table with appropriate schema."""
+        column_types = self._get_column_types()
+        # Drop existing table if replace mode
+        if self.if_exists == "replace":
+            cursor.execute(f"DROP TABLE IF EXISTS {self.table_name}")
+        elif self.if_exists == "fail":
+            cursor.execute(
+                f"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
+                (self.table_name,),
+            )
+            if cursor.fetchone():
+                raise ValueError(f"Table {self.table_name} already exists")
+        # Create table
+        columns = ", ".join(f'"{col}" {dtype}' for col, dtype in column_types)
+        create_table_sql = f"""
+        CREATE TABLE IF NOT EXISTS {self.table_name} (
+            {columns}
+        )
+        """
+        cursor.execute(create_table_sql)
+    def format_data(self) -> bytes:
+        """Convert the data to a SQLite database file."""
+        buffer = io.BytesIO()
+        # Create in-memory database
+        conn = sqlite3.connect(":memory:")
+        cursor = conn.cursor()
+        # Create table and insert data
+        self._create_table(cursor)
+        # Prepare placeholders for INSERT
+        placeholders = ",".join(["?" for _ in self.header])
+        insert_sql = f"INSERT INTO {self.table_name} ({','.join(self.header)}) VALUES ({placeholders})"
+        # Insert data
+        cursor.executemany(insert_sql, self.rows)
+        conn.commit()
+        # Save to file buffer
+        conn.backup(sqlite3.connect(buffer))
+        conn.close()
+        buffer.seek(0)
+        return buffer.getvalue()
+    def _validate_params(self) -> None:
+        """Validate initialization parameters."""
+        valid_if_exists = {"fail", "replace", "append"}
+        if self.if_exists not in valid_if_exists:
+            raise ValueError(
+                f"if_exists must be one of {valid_if_exists}, got {self.if_exists}"
+            )
+        # Validate table name (basic SQLite identifier validation)
+        if not self.table_name.isalnum() and not all(c in "_" for c in self.table_name):
+            raise ValueError(
+                f"Invalid table name: {self.table_name}. Must contain only alphanumeric characters and underscores."
+            )

edsl/scenarios/ScenarioList.py CHANGED Viewed

@@ -45,7 +45,7 @@ from edsl.utilities.naming_utilities import sanitize_string
 from edsl.utilities.is_valid_variable_name import is_valid_variable_name
 from edsl.exceptions.scenarios import ScenarioError
-from edsl.scenarios.DirectoryScanner import DirectoryScanner
+from edsl.scenarios.directory_scanner import DirectoryScanner
 class ScenarioListMixin(ScenarioListPdfMixin, ScenarioListExportMixin):
@@ -661,7 +661,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         >>> s.select('a')
         ScenarioList([Scenario({'a': 1}), Scenario({'a': 1})])
         """
-        from edsl.scenarios.ScenarioSelector import ScenarioSelector
+        from edsl.scenarios.scenario_selector import ScenarioSelector
         return ScenarioSelector(self).select(*fields)
@@ -840,10 +840,25 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
         """
         sl = self.duplicate()
+        if len(values) != len(sl):
+            raise ScenarioError(
+                f"Length of values ({len(values)}) does not match length of ScenarioList ({len(sl)})"
+            )
         for i, value in enumerate(values):
             sl[i][name] = value
         return sl
+    @classmethod
+    def create_empty_scenario_list(cls, n: int) -> ScenarioList:
+        """Create an empty ScenarioList with n scenarios.
+        Example:
+        >>> ScenarioList.create_empty_scenario_list(3)
+        ScenarioList([Scenario({}), Scenario({}), Scenario({})])
+        """
+        return ScenarioList([Scenario({}) for _ in range(n)])
     def add_value(self, name: str, value: Any) -> ScenarioList:
         """Add a value to all scenarios in a ScenarioList.
@@ -1222,7 +1237,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         >>> s3 == ScenarioList([Scenario({'age': 30, 'location': 'New York', 'name': 'Alice'}), Scenario({'age': 25, 'location': None, 'name': 'Bob'})])
         True
         """
-        from edsl.scenarios.ScenarioJoin import ScenarioJoin
+        from edsl.scenarios.scenario_join import ScenarioJoin
         sj = ScenarioJoin(self, other)
         return sj.left_join(by)
@@ -1244,6 +1259,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
         else:
             data = self
         d = {"scenarios": [s.to_dict(add_edsl_version=add_edsl_version) for s in data]}
         if add_edsl_version:
             from edsl import __version__
@@ -1296,10 +1312,22 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
     @classmethod
     def from_nested_dict(cls, data: dict) -> ScenarioList:
-        """Create a `ScenarioList` from a nested dictionary."""
-        s = ScenarioList()
-        for key, value in data.items():
-            s.add_list(key, value)
+        """Create a `ScenarioList` from a nested dictionary.
+        >>> data = {"headline": ["Armistice Signed, War Over: Celebrations Erupt Across City"], "date": ["1918-11-11"], "author": ["Jane Smith"]}
+        >>> ScenarioList.from_nested_dict(data)
+        ScenarioList([Scenario({'headline': 'Armistice Signed, War Over: Celebrations Erupt Across City', 'date': '1918-11-11', 'author': 'Jane Smith'})])
+        """
+        length_of_first_list = len(next(iter(data.values())))
+        s = ScenarioList.create_empty_scenario_list(n=length_of_first_list)
+        if any(len(v) != length_of_first_list for v in data.values()):
+            raise ValueError(
+                "All lists in the dictionary must be of the same length.",
+            )
+        for key, list_of_values in data.items():
+            s = s.add_list(key, list_of_values)
         return s
     def code(self) -> str:

edsl 0.1.39.dev2__py3-none-any.whl → 0.1.39.dev4__py3-none-any.whl

edsl 0.1.39.dev2py3-none-any.whl → 0.1.39.dev4py3-none-any.whl