PyPI - edsl - Versions diffs - 0.1.42__py3-none-any.whl → 0.1.44__py3-none-any.whl - Mend

edsl 0.1.42py3-none-any.whl → 0.1.44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

edsl/Base.py +15 -6
edsl/__version__.py +1 -1
edsl/agents/Invigilator.py +1 -1
edsl/agents/PromptConstructor.py +92 -21
edsl/agents/QuestionInstructionPromptBuilder.py +68 -9
edsl/agents/prompt_helpers.py +2 -2
edsl/coop/coop.py +100 -22
edsl/enums.py +3 -1
edsl/exceptions/coop.py +4 -0
edsl/inference_services/AnthropicService.py +2 -0
edsl/inference_services/AvailableModelFetcher.py +4 -1
edsl/inference_services/GoogleService.py +2 -0
edsl/inference_services/GrokService.py +11 -0
edsl/inference_services/InferenceServiceABC.py +1 -0
edsl/inference_services/OpenAIService.py +1 -0
edsl/inference_services/TestService.py +1 -0
edsl/inference_services/registry.py +2 -0
edsl/jobs/Jobs.py +54 -35
edsl/jobs/JobsChecks.py +7 -7
edsl/jobs/JobsPrompts.py +57 -6
edsl/jobs/JobsRemoteInferenceHandler.py +41 -25
edsl/jobs/buckets/BucketCollection.py +30 -0
edsl/jobs/data_structures.py +1 -0
edsl/language_models/LanguageModel.py +5 -2
edsl/language_models/key_management/KeyLookupBuilder.py +47 -20
edsl/language_models/key_management/models.py +10 -4
edsl/language_models/model.py +43 -11
edsl/prompts/Prompt.py +124 -61
edsl/questions/descriptors.py +32 -18
edsl/questions/question_base_gen_mixin.py +1 -0
edsl/results/DatasetExportMixin.py +35 -6
edsl/results/Results.py +180 -1
edsl/results/ResultsGGMixin.py +117 -60
edsl/scenarios/FileStore.py +19 -8
edsl/scenarios/Scenario.py +33 -0
edsl/scenarios/ScenarioList.py +22 -3
edsl/scenarios/ScenarioListPdfMixin.py +9 -3
edsl/surveys/Survey.py +27 -6
{edsl-0.1.42.dist-info → edsl-0.1.44.dist-info}/METADATA +3 -4
{edsl-0.1.42.dist-info → edsl-0.1.44.dist-info}/RECORD +42 -41
{edsl-0.1.42.dist-info → edsl-0.1.44.dist-info}/LICENSE +0 -0
{edsl-0.1.42.dist-info → edsl-0.1.44.dist-info}/WHEEL +0 -0

edsl/language_models/model.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import textwrap
 from random import random
-from typing import Optional, TYPE_CHECKING, List
+from typing import Optional, TYPE_CHECKING, List, Callable
 from edsl.utilities.PrettyList import PrettyList
 from edsl.config import CONFIG
@@ -11,17 +11,21 @@ from edsl.inference_services.InferenceServicesCollection import (
 from edsl.inference_services.data_structures import AvailableModels
 from edsl.inference_services.InferenceServiceABC import InferenceServiceABC
 from edsl.enums import InferenceServiceLiteral
+from edsl.exceptions.inference_services import InferenceServiceError
 if TYPE_CHECKING:
     from edsl.results.Dataset import Dataset
-def get_model_class(model_name, registry: Optional[InferenceServicesCollection] = None):
+def get_model_class(model_name, registry: Optional[InferenceServicesCollection] = None, service_name: Optional[InferenceServiceLiteral] = None):
     from edsl.inference_services.registry import default
     registry = registry or default
-    factory = registry.create_model_factory(model_name)
-    return factory
+    try:
+        factory = registry.create_model_factory(model_name, service_name=service_name)
+        return factory
+    except (InferenceServiceError, Exception) as e:
+        return Model._handle_model_error(model_name, e)
 class Meta(type):
@@ -58,6 +62,33 @@ class Model(metaclass=Meta):
         """Set a new registry"""
         cls._registry = registry
+    @classmethod
+    def _handle_model_error(cls, model_name: str, error: Exception):
+        """Handle errors from model creation and execution with notebook-aware behavior."""
+        if isinstance(error, InferenceServiceError):
+            services = [s._inference_service_ for s in cls.get_registry().services]
+            message = (
+                f"Model '{model_name}' not found in any services.\n"
+                "It is likely that our registry is just out of date.\n"
+                "Simply adding the service name to your model call should fix this.\n"
+                f"Available services are: {services}\n"
+                f"To specify a model with a service, use:\n"
+                f'Model("{model_name}", service_name="<service_name>")'
+            )
+        else:
+            message = f"An error occurred: {str(error)}"
+        # Check if we're in a notebook environment
+        try:
+            get_ipython()
+            print(message)
+            return None
+        except NameError:
+            # Not in a notebook, raise the exception
+            if isinstance(error, InferenceServiceError):
+                raise InferenceServiceError(message)
+            raise error
     def __new__(
         cls,
         model_name: Optional[str] = None,
@@ -69,9 +100,7 @@ class Model(metaclass=Meta):
         "Instantiate a new language model."
         # Map index to the respective subclass
         if model_name is None:
-            model_name = (
-                cls.default_model
-            )  # when model_name is None, use the default model, set in the config file
+            model_name = cls.default_model
         if registry is not None:
             cls.set_registry(registry)
@@ -79,10 +108,13 @@ class Model(metaclass=Meta):
         if isinstance(model_name, int):  # can refer to a model by index
             model_name = cls.available(name_only=True)[model_name]
-        factory = cls.get_registry().create_model_factory(
-            model_name, service_name=service_name
-        )
-        return factory(*args, **kwargs)
+        try:
+            factory = cls.get_registry().create_model_factory(
+                model_name, service_name=service_name
+            )
+            return factory(*args, **kwargs)
+        except (InferenceServiceError, Exception) as e:
+            return cls._handle_model_error(model_name, e)
     @classmethod
     def add_model(cls, service_name, model_name) -> None:

edsl/prompts/Prompt.py CHANGED Viewed

@@ -10,6 +10,48 @@ from edsl.Base import PersistenceMixin, RepresentationMixin
 MAX_NESTING = 100
+from jinja2 import Environment, meta, TemplateSyntaxError, Undefined
+from functools import lru_cache
+class PreserveUndefined(Undefined):
+    def __str__(self):
+        return "{{ " + str(self._undefined_name) + " }}"
+# Create environment once at module level
+_env = Environment(undefined=PreserveUndefined)
+@lru_cache(maxsize=1024)
+def _compile_template(text: str):
+    return _env.from_string(text)
+@lru_cache(maxsize=1024)
+def _find_template_variables(template: str) -> list[str]:
+    """Find and return the template variables."""
+    ast = _env.parse(template)
+    return list(meta.find_undeclared_variables(ast))
+def _make_hashable(value):
+    """Convert unhashable types to hashable ones."""
+    if isinstance(value, list):
+        return tuple(_make_hashable(item) for item in value)
+    if isinstance(value, dict):
+        return frozenset((k, _make_hashable(v)) for k, v in value.items())
+    return value
+@lru_cache(maxsize=1024)
+def _cached_render(text: str, frozen_replacements: frozenset) -> str:
+    """Cached version of template rendering with frozen replacements."""
+    # Print cache info on every call
+    cache_info = _cached_render.cache_info()
+    print(f"\t\t\t\t\t Cache status - hits: {cache_info.hits}, misses: {cache_info.misses}, current size: {cache_info.currsize}")
+    # Convert back to dict with original types for rendering
+    replacements = {k: v for k, v in frozen_replacements}
+    template = _compile_template(text)
+    result = template.render(replacements)
+    return result
 class Prompt(PersistenceMixin, RepresentationMixin):
     """Class for creating a prompt to be used in a survey."""
@@ -145,33 +187,8 @@ class Prompt(PersistenceMixin, RepresentationMixin):
         return f'Prompt(text="""{self.text}""")'
     def template_variables(self) -> list[str]:
-        """Return the the variables in the template.
-        Example:
-        >>> p = Prompt("Hello, {{person}}")
-        >>> p.template_variables()
-        ['person']
-        """
-        return self._template_variables(self.text)
-    @staticmethod
-    def _template_variables(template: str) -> list[str]:
-        """Find and return the template variables.
-        :param template: The template to find the variables in.
-        """
-        from jinja2 import Environment, meta, Undefined
-        class PreserveUndefined(Undefined):
-            def __str__(self):
-                return "{{ " + str(self._undefined_name) + " }}"
-        env = Environment(undefined=PreserveUndefined)
-        ast = env.parse(template)
-        return list(meta.find_undeclared_variables(ast))
+        """Return the variables in the template."""
+        return _find_template_variables(self.text)
     def undefined_template_variables(self, replacement_dict: dict):
         """Return the variables in the template that are not in the replacement_dict.
@@ -239,45 +256,39 @@ class Prompt(PersistenceMixin, RepresentationMixin):
             return self
     @staticmethod
-    def _render(
-        text: str, primary_replacement, **additional_replacements
-    ) -> "PromptBase":
-        """Render the template text with variables replaced from the provided named dictionaries.
-        :param text: The text to render.
-        :param primary_replacement: The primary replacement dictionary.
-        :param additional_replacements: Additional replacement dictionaries.
-        Allows for nested variable resolution up to a specified maximum nesting depth.
-        Example:
-        >>> codebook = {"age": "Age"}
-        >>> p = Prompt("You are an agent named {{ name }}. {{ codebook['age']}}: {{ age }}")
-        >>> p.render({"name": "John", "age": 44}, codebook=codebook)
-        Prompt(text=\"""You are an agent named John. Age: 44\""")
-        """
-        from jinja2 import Environment, meta, TemplateSyntaxError, Undefined
-        class PreserveUndefined(Undefined):
-            def __str__(self):
-                return "{{ " + str(self._undefined_name) + " }}"
-        env = Environment(undefined=PreserveUndefined)
+    def _render(text: str, primary_replacement, **additional_replacements) -> "PromptBase":
+        """Render the template text with variables replaced."""
+        import time
+        # if there are no replacements, return the text
+        if not primary_replacement and not additional_replacements:
+            return text
         try:
+            variables = _find_template_variables(text)
+            if not variables: # if there are no variables, return the text
+                return text
+            # Combine all replacements
+            all_replacements = {**primary_replacement, **additional_replacements}
             previous_text = None
+            current_text = text
+            iteration = 0
             for _ in range(MAX_NESTING):
-                # breakpoint()
-                rendered_text = env.from_string(text).render(
-                    primary_replacement, **additional_replacements
-                )
-                if rendered_text == previous_text:
-                    # No more changes, so return the rendered text
+                iteration += 1
+                template = _compile_template(current_text)
+                rendered_text = template.render(all_replacements)
+                if rendered_text == current_text:
                     return rendered_text
-                previous_text = text
-                text = rendered_text
+                previous_text = current_text
+                current_text = rendered_text
-            # If the loop exits without returning, it indicates too much nesting
             raise TemplateRenderError(
                 "Too much nesting - you created an infinite loop here, pal"
             )
@@ -331,6 +342,58 @@ class Prompt(PersistenceMixin, RepresentationMixin):
         """Return an example of the prompt."""
         return cls(cls.default_instructions)
+    def get_prompts(self) -> Dict[str, Any]:
+        """Get the prompts for the question."""
+        start = time.time()
+        # Build all the components
+        instr_start = time.time()
+        agent_instructions = self.agent_instructions_prompt
+        instr_end = time.time()
+        logger.debug(f"Time taken for agent instructions: {instr_end - instr_start:.4f}s")
+        persona_start = time.time()
+        agent_persona = self.agent_persona_prompt
+        persona_end = time.time()
+        logger.debug(f"Time taken for agent persona: {persona_end - persona_start:.4f}s")
+        q_instr_start = time.time()
+        question_instructions = self.question_instructions_prompt
+        q_instr_end = time.time()
+        logger.debug(f"Time taken for question instructions: {q_instr_end - q_instr_start:.4f}s")
+        memory_start = time.time()
+        prior_question_memory = self.prior_question_memory_prompt
+        memory_end = time.time()
+        logger.debug(f"Time taken for prior question memory: {memory_end - memory_start:.4f}s")
+        # Get components dict
+        components = {
+            "agent_instructions": agent_instructions.text,
+            "agent_persona": agent_persona.text,
+            "question_instructions": question_instructions.text,
+            "prior_question_memory": prior_question_memory.text,
+        }
+        # Use PromptPlan's get_prompts method
+        plan_start = time.time()
+        prompts = self.prompt_plan.get_prompts(**components)
+        plan_end = time.time()
+        logger.debug(f"Time taken for prompt processing: {plan_end - plan_start:.4f}s")
+        # Handle file keys if present
+        if hasattr(self, 'question_file_keys') and self.question_file_keys:
+            files_start = time.time()
+            files_list = []
+            for key in self.question_file_keys:
+                files_list.append(self.scenario[key])
+            prompts["files_list"] = files_list
+            files_end = time.time()
+            logger.debug(f"Time taken for file key processing: {files_end - files_start:.4f}s")
+        end = time.time()
+        logger.debug(f"Total time in get_prompts: {end - start:.4f}s")
+        return prompts
 if __name__ == "__main__":
     print("Running doctests...")

edsl/questions/descriptors.py CHANGED Viewed

@@ -249,7 +249,28 @@ class QuestionNameDescriptor(BaseDescriptor):
 class QuestionOptionsDescriptor(BaseDescriptor):
-    """Validate that `question_options` is a list, does not exceed the min/max lengths, and has unique items."""
+    """Validate that `question_options` is a list, does not exceed the min/max lengths, and has unique items.
+    >>> import warnings
+    >>> q_class = QuestionOptionsDescriptor.example()
+    >>> with warnings.catch_warnings(record=True) as w:
+    ...     _ = q_class(["a ", "b", "c"])  # Has trailing space
+    ...     assert len(w) == 1
+    ...     assert "trailing whitespace" in str(w[0].message)
+    >>> _ = q_class(["a", "b", "c", "d", "d"])
+    Traceback (most recent call last):
+    ...
+    edsl.exceptions.questions.QuestionCreationValidationError: Question options must be unique (got ['a', 'b', 'c', 'd', 'd']).
+    We allow dynamic question options, which are strings of the form '{{ question_options }}'.
+    >>> _ = q_class("{{dynamic_options}}")
+    >>> _ = q_class("dynamic_options")
+    Traceback (most recent call last):
+    ...
+    edsl.exceptions.questions.QuestionCreationValidationError: ...
+    """
     @classmethod
     def example(cls):
@@ -273,23 +294,7 @@ class QuestionOptionsDescriptor(BaseDescriptor):
         self.q_budget = q_budget
     def validate(self, value: Any, instance) -> None:
-        """Validate the question options.
-        >>> q_class = QuestionOptionsDescriptor.example()
-        >>> _ = q_class(["a", "b", "c"])
-        >>> _ = q_class(["a", "b", "c", "d", "d"])
-        Traceback (most recent call last):
-        ...
-        edsl.exceptions.questions.QuestionCreationValidationError: Question options must be unique (got ['a', 'b', 'c', 'd', 'd']).
-        We allow dynamic question options, which are strings of the form '{{ question_options }}'.
-        >>> _ = q_class("{{dynamic_options}}")
-        >>> _ = q_class("dynamic_options")
-        Traceback (most recent call last):
-        ...
-        edsl.exceptions.questions.QuestionCreationValidationError: ...
-        """
+        """Validate the question options."""
         if isinstance(value, str):
             # Check if the string is a dynamic question option
             if "{{" in value and "}}" in value:
@@ -343,6 +348,15 @@ class QuestionOptionsDescriptor(BaseDescriptor):
                     f"All question options must be at least 1 character long but less than {Settings.MAX_OPTION_LENGTH} characters long (got {value})."
                 )
+            # Check for trailing whitespace in string options
+            if any(isinstance(x, str) and (x != x.strip()) for x in value):
+                import warnings
+                warnings.warn(
+                    "Some question options contain trailing whitespace. This may cause unexpected behavior.",
+                    UserWarning,
+                )
         if hasattr(instance, "min_selections") and instance.min_selections != None:
             if instance.min_selections > len(value):
                 raise QuestionCreationValidationError(

edsl/questions/question_base_gen_mixin.py CHANGED Viewed

@@ -114,6 +114,7 @@ class QuestionBaseGenMixin:
                         .render(strings_only_replacement_dict)
                     )
                 except Exception as e:
+                    #breakpoint()
                     import warnings
                     warnings.warn("Failed to render string: " + value)

edsl/results/DatasetExportMixin.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import Optional, Tuple, Union, List
 from edsl.results.file_exports import CSVExport, ExcelExport, JSONLExport, SQLiteExport
 class DatasetExportMixin:
     """Mixin class for exporting Dataset objects."""
@@ -220,23 +219,45 @@ class DatasetExportMixin:
         )
         return exporter.export()
-    def _db(self, remove_prefix: bool = True):
+    def _db(self, remove_prefix: bool = True, shape: str = "wide") -> "sqlalchemy.engine.Engine":
         """Create a SQLite database in memory and return the connection.
         Args:
-            shape: The shape of the data in the database (wide or long)
             remove_prefix: Whether to remove the prefix from the column names
+            shape: The shape of the data in the database ("wide" or "long")
         Returns:
             A database connection
+        >>> from sqlalchemy import text
+        >>> from edsl import Results
+        >>> engine = Results.example()._db()
+        >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
+        4
+        >>> engine = Results.example()._db(shape = "long")
+        >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
+        172
         """
-        from sqlalchemy import create_engine
+        from sqlalchemy import create_engine, text
         engine = create_engine("sqlite:///:memory:")
-        if remove_prefix:
+        if remove_prefix and shape == "wide":
             df = self.remove_prefix().to_pandas(lists_as_strings=True)
         else:
             df = self.to_pandas(lists_as_strings=True)
+        if shape == "long":
+            # Melt the dataframe to convert it to long format
+            df = df.melt(
+                var_name='key',
+                value_name='value'
+            )
+            # Add a row number column for reference
+            df.insert(0, 'row_number', range(1, len(df) + 1))
+            # Split the key into data_type and key
+            df['data_type'] = df['key'].apply(lambda x: x.split('.')[0] if '.' in x else None)
+            df['key'] = df['key'].apply(lambda x: '.'.join(x.split('.')[1:]) if '.' in x else x)
         df.to_sql(
             "self",
             engine,
@@ -251,6 +272,7 @@ class DatasetExportMixin:
         transpose: bool = None,
         transpose_by: str = None,
         remove_prefix: bool = True,
+        shape: str = "wide",
     ) -> Union["pd.DataFrame", str]:
         """Execute a SQL query and return the results as a DataFrame.
@@ -268,10 +290,17 @@ class DatasetExportMixin:
         Returns:
             DataFrame, CSV string, list, or LaTeX string depending on parameters
+       Examples:
+           >>> from edsl import Results
+           >>> r = Results.example();
+           >>> len(r.sql("SELECT * FROM self", shape = "wide"))
+           4
+           >>> len(r.sql("SELECT * FROM self", shape = "long"))
+           172
         """
         import pandas as pd
-        conn = self._db(remove_prefix=remove_prefix)
+        conn = self._db(remove_prefix=remove_prefix, shape=shape)
         df = pd.read_sql_query(query, conn)
         # Transpose the DataFrame if transpose is True

edsl 0.1.42__py3-none-any.whl → 0.1.44__py3-none-any.whl

edsl 0.1.42py3-none-any.whl → 0.1.44py3-none-any.whl