PyPI - edsl - Versions diffs - 0.1.30.dev4__py3-none-any.whl → 0.1.31__py3-none-any.whl - Mend

edsl 0.1.30.dev4py3-none-any.whl → 0.1.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

edsl/__version__.py +1 -1
edsl/agents/Invigilator.py +7 -2
edsl/agents/PromptConstructionMixin.py +18 -1
edsl/config.py +4 -0
edsl/conjure/Conjure.py +6 -0
edsl/coop/coop.py +4 -0
edsl/coop/utils.py +9 -1
edsl/data/CacheHandler.py +3 -4
edsl/enums.py +2 -0
edsl/inference_services/DeepInfraService.py +6 -91
edsl/inference_services/GroqService.py +18 -0
edsl/inference_services/InferenceServicesCollection.py +13 -5
edsl/inference_services/OpenAIService.py +64 -21
edsl/inference_services/registry.py +2 -1
edsl/jobs/Jobs.py +80 -33
edsl/jobs/buckets/TokenBucket.py +24 -5
edsl/jobs/interviews/Interview.py +122 -75
edsl/jobs/interviews/InterviewExceptionEntry.py +101 -0
edsl/jobs/interviews/InterviewTaskBuildingMixin.py +58 -52
edsl/jobs/interviews/interview_exception_tracking.py +68 -10
edsl/jobs/runners/JobsRunnerAsyncio.py +112 -81
edsl/jobs/runners/JobsRunnerStatusData.py +0 -237
edsl/jobs/runners/JobsRunnerStatusMixin.py +291 -35
edsl/jobs/tasks/QuestionTaskCreator.py +1 -5
edsl/jobs/tasks/TaskCreators.py +8 -2
edsl/jobs/tasks/TaskHistory.py +145 -1
edsl/language_models/LanguageModel.py +135 -75
edsl/language_models/ModelList.py +8 -2
edsl/language_models/registry.py +16 -0
edsl/questions/QuestionFunctional.py +34 -2
edsl/questions/QuestionMultipleChoice.py +58 -8
edsl/questions/QuestionNumerical.py +0 -1
edsl/questions/descriptors.py +42 -2
edsl/results/DatasetExportMixin.py +258 -75
edsl/results/Result.py +53 -5
edsl/results/Results.py +66 -27
edsl/results/ResultsToolsMixin.py +1 -1
edsl/scenarios/Scenario.py +14 -0
edsl/scenarios/ScenarioList.py +59 -21
edsl/scenarios/ScenarioListExportMixin.py +16 -5
edsl/scenarios/ScenarioListPdfMixin.py +3 -0
edsl/study/Study.py +2 -2
edsl/surveys/Survey.py +35 -1
{edsl-0.1.30.dev4.dist-info → edsl-0.1.31.dist-info}/METADATA +4 -2
{edsl-0.1.30.dev4.dist-info → edsl-0.1.31.dist-info}/RECORD +47 -45
{edsl-0.1.30.dev4.dist-info → edsl-0.1.31.dist-info}/WHEEL +1 -1
{edsl-0.1.30.dev4.dist-info → edsl-0.1.31.dist-info}/LICENSE +0 -0

edsl/jobs/tasks/TaskHistory.py CHANGED Viewed

@@ -11,6 +11,8 @@ class TaskHistory:
         [Interview.exceptions, Interview.exceptions, Interview.exceptions, ...]
+        >>> _ = TaskHistory.example()
+        ...
         """
         self.total_interviews = interviews
@@ -18,8 +20,26 @@ class TaskHistory:
         self._interviews = {index: i for index, i in enumerate(self.total_interviews)}
+    @classmethod
+    def example(cls):
+        from edsl.jobs.interviews.Interview import Interview
+        from edsl.jobs.Jobs import Jobs
+        j = Jobs.example(throw_exception_probability=1, test_model=True)
+        from edsl.config import CONFIG
+        results = j.run(print_exceptions=False, skip_retry=True, cache = False)
+        return cls(results.task_history.total_interviews)
     @property
     def exceptions(self):
+        """
+        >>> len(TaskHistory.example().exceptions)
+        4
+        """
         return [i.exceptions for k, i in self._interviews.items() if i.exceptions != {}]
     @property
@@ -42,7 +62,12 @@ class TaskHistory:
     @property
     def has_exceptions(self) -> bool:
-        """Return True if there are any exceptions."""
+        """Return True if there are any exceptions.
+        >>> TaskHistory.example().has_exceptions
+        True
+        """
         return len(self.exceptions) > 0
     def _repr_html_(self):
@@ -216,6 +241,47 @@ class TaskHistory:
         }
         """
+    @property
+    def exceptions_by_type(self) -> dict:
+        """Return a dictionary of exceptions by type."""
+        exceptions_by_type = {}
+        for interview in self.total_interviews:
+            for question_name, exceptions in interview.exceptions.items():
+                for exception in exceptions:
+                    exception_type = exception["exception"]
+                    if exception_type in exceptions_by_type:
+                        exceptions_by_type[exception_type] += 1
+                    else:
+                        exceptions_by_type[exception_type] = 1
+        return exceptions_by_type
+    @property
+    def exceptions_by_question_name(self) -> dict:
+        """Return a dictionary of exceptions tallied by question name."""
+        exceptions_by_question_name = {}
+        for interview in self.total_interviews:
+            for question_name, exceptions in interview.exceptions.items():
+                if question_name not in exceptions_by_question_name:
+                    exceptions_by_question_name[question_name] = 0
+                exceptions_by_question_name[question_name] += len(exceptions)
+        for question in self.total_interviews[0].survey.questions:
+            if question.question_name not in exceptions_by_question_name:
+                exceptions_by_question_name[question.question_name] = 0
+        return exceptions_by_question_name
+    @property
+    def exceptions_by_model(self) -> dict:
+        """Return a dictionary of exceptions tallied by model and question name."""
+        exceptions_by_model = {}
+        for interview in self.total_interviews:
+            model = interview.model
+            if model not in exceptions_by_model:
+                exceptions_by_model[model.model] = 0
+            if interview.exceptions != {}:
+                exceptions_by_model[model.model] += len(interview.exceptions)
+        return exceptions_by_model
     def html(
         self,
         filename: Optional[str] = None,
@@ -236,6 +302,8 @@ class TaskHistory:
         if css is None:
             css = self.css()
+        models_used = set([i.model for index, i in self._interviews.items()])
         template = Template(
             """
         <!DOCTYPE html>
@@ -249,6 +317,69 @@ class TaskHistory:
         </style>
         </head>
         <body>
+            <h1>Overview</h1>
+            <p>There were {{ interviews|length }} total interviews. The number of interviews with exceptions was {{ num_exceptions }}.</p>
+            <p>The models used were: {{ models_used }}.</p>
+            <p>For documentation on dealing with exceptions on Expected Parrot,
+            see <a href="https://docs.expectedparrot.com/en/latest/exceptions.html">here</a>.</p>
+            <h2>Exceptions by Type</h2>
+            <table>
+                <thead>
+                    <tr>
+                        <th>Exception Type</th>
+                        <th>Number</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {% for exception_type, exceptions in exceptions_by_type.items() %}
+                        <tr>
+                            <td>{{ exception_type }}</td>
+                            <td>{{ exceptions }}</td>
+                        </tr>
+                    {% endfor %}
+                </tbody>
+            </table>
+            <h2>Exceptions by Model</h2>
+            <table>
+                <thead>
+                    <tr>
+                        <th>Model</th>
+                        <th>Number</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {% for model, exceptions in exceptions_by_model.items() %}
+                        <tr>
+                            <td>{{ model }}</td>
+                            <td>{{ exceptions }}</td>
+                        </tr>
+                    {% endfor %}
+                </tbody>
+            </table>
+            <h2>Exceptions by Question Name</h2>
+            <table>
+                <thead>
+                    <tr>
+                        <th>Question Name</th>
+                        <th>Number of Exceptions</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {% for question_name, exception_count in exceptions_by_question_name.items() %}
+                        <tr>
+                            <td>{{ question_name }}</td>
+                            <td>{{ exception_count }}</td>
+                        </tr>
+                    {% endfor %}
+                </tbody>
+            </table>
             {% for index, interview in interviews.items() %}
                 {% if interview.exceptions != {} %}
                    <div class="interview">Interview: {{ index }} </div>
@@ -296,11 +427,18 @@ class TaskHistory:
         """
         )
+        # breakpoint()
         # Render the template with data
         output = template.render(
             interviews=self._interviews,
             css=css,
+            num_exceptions=len(self.exceptions),
             performance_plot_html=performance_plot_html,
+            exceptions_by_type=self.exceptions_by_type,
+            exceptions_by_question_name=self.exceptions_by_question_name,
+            exceptions_by_model=self.exceptions_by_model,
+            models_used=models_used,
         )
         # Save the rendered output to a file
@@ -344,3 +482,9 @@ class TaskHistory:
         if return_link:
             return filename
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl/language_models/LanguageModel.py CHANGED Viewed

@@ -7,9 +7,37 @@ import asyncio
 import json
 import time
 import os
+import hashlib
 from typing import Coroutine, Any, Callable, Type, List, get_type_hints
 from abc import ABC, abstractmethod
+class IntendedModelCallOutcome:
+    "This is a tuple-like class that holds the response, cache_used, and cache_key."
+    def __init__(self, response: dict, cache_used: bool, cache_key: str):
+        self.response = response
+        self.cache_used = cache_used
+        self.cache_key = cache_key
+    def __iter__(self):
+        """Iterate over the class attributes.
+        >>> a, b, c = IntendedModelCallOutcome({'answer': "yes"}, True, 'x1289')
+        >>> a
+        {'answer': 'yes'}
+        """
+        yield self.response
+        yield self.cache_used
+        yield self.cache_key
+    def __len__(self):
+        return 3
+    def __repr__(self):
+        return f"IntendedModelCallOutcome(response = {self.response}, cache_used = {self.cache_used}, cache_key = '{self.cache_key}')"
 from edsl.config import CONFIG
 from edsl.utilities.decorators import sync_wrapper, jupyter_nb_handler
@@ -96,6 +124,11 @@ class LanguageModel(
             # Skip the API key check. Sometimes this is useful for testing.
             self._api_token = None
+    def ask_question(self, question):
+        user_prompt = question.get_instructions().render(question.data).text
+        system_prompt = "You are a helpful agent pretending to be a human."
+        return self.execute_model_call(user_prompt, system_prompt)
     @property
     def api_token(self) -> str:
         if not hasattr(self, "_api_token"):
@@ -149,7 +182,7 @@ class LanguageModel(
         key_value = os.getenv(key_name)
         return key_value is not None
-    def __hash__(self):
+    def __hash__(self) -> str:
         """Allow the model to be used as a key in a dictionary."""
         from edsl.utilities.utilities import dict_hash
@@ -216,19 +249,25 @@ class LanguageModel(
         >>> LanguageModel._overide_default_parameters(passed_parameter_dict={"temperature": 0.5}, default_parameter_dict={"temperature":0.9, "max_tokens": 1000})
         {'temperature': 0.5, 'max_tokens': 1000}
         """
-        parameters = dict({})
-        for parameter, default_value in default_parameter_dict.items():
-            if parameter in passed_parameter_dict:
-                parameters[parameter] = passed_parameter_dict[parameter]
-            else:
-                parameters[parameter] = default_value
-        return parameters
+        # parameters = dict({})
+        return {
+            parameter_name: passed_parameter_dict.get(parameter_name, default_value)
+            for parameter_name, default_value in default_parameter_dict.items()
+        }
+    def __call__(self, user_prompt: str, system_prompt: str):
+        return self.execute_model_call(user_prompt, system_prompt)
     @abstractmethod
     async def async_execute_model_call(user_prompt: str, system_prompt: str):
-        """Execute the model call and returns the result as a coroutine.
+        """Execute the model call and returns a coroutine.
         >>> m = LanguageModel.example(test_model = True)
+        >>> async def test(): return await m.async_execute_model_call("Hello, model!", "You are a helpful agent.")
+        >>> asyncio.run(test())
+        {'message': '{"answer": "Hello world"}'}
         >>> m.execute_model_call("Hello, model!", "You are a helpful agent.")
         {'message': '{"answer": "Hello world"}'}
@@ -274,11 +313,38 @@ class LanguageModel(
         What is returned by the API is model-specific and often includes meta-data that we do not need.
         For example, here is the results from a call to GPT-4:
-        To actually tract the response, we need to grab
+        To actually track the response, we need to grab
         data["choices[0]"]["message"]["content"].
         """
         raise NotImplementedError
+    async def _async_prepare_response(
+        self, model_call_outcome: IntendedModelCallOutcome, cache: "Cache"
+    ) -> dict:
+        """Prepare the response for return."""
+        model_response = {
+            "cache_used": model_call_outcome.cache_used,
+            "cache_key": model_call_outcome.cache_key,
+            "usage": model_call_outcome.response.get("usage", {}),
+            "raw_model_response": model_call_outcome.response,
+        }
+        answer_portion = self.parse_response(model_call_outcome.response)
+        try:
+            answer_dict = json.loads(answer_portion)
+        except json.JSONDecodeError as e:
+            # TODO: Turn into logs to generate issues
+            answer_dict, success = await repair(
+                bad_json=answer_portion, error_message=str(e), cache=cache
+            )
+            if not success:
+                raise Exception(
+                    f"""Even the repair failed. The error was: {e}. The response was: {answer_portion}."""
+                )
+        return {**model_response, **answer_dict}
     async def async_get_raw_response(
         self,
         user_prompt: str,
@@ -286,7 +352,28 @@ class LanguageModel(
         cache: "Cache",
         iteration: int = 0,
         encoded_image=None,
-    ) -> tuple[dict, bool, str]:
+    ) -> IntendedModelCallOutcome:
+        import warnings
+        warnings.warn(
+            "This method is deprecated. Use async_get_intended_model_call_outcome."
+        )
+        return await self._async_get_intended_model_call_outcome(
+            user_prompt=user_prompt,
+            system_prompt=system_prompt,
+            cache=cache,
+            iteration=iteration,
+            encoded_image=encoded_image,
+        )
+    async def _async_get_intended_model_call_outcome(
+        self,
+        user_prompt: str,
+        system_prompt: str,
+        cache: "Cache",
+        iteration: int = 0,
+        encoded_image=None,
+    ) -> IntendedModelCallOutcome:
         """Handle caching of responses.
         :param user_prompt: The user's prompt.
@@ -304,52 +391,49 @@ class LanguageModel(
         >>> from edsl import Cache
         >>> m = LanguageModel.example(test_model = True)
-        >>> m.get_raw_response(user_prompt = "Hello", system_prompt = "hello", cache = Cache())
-        ({'message': '{"answer": "Hello world"}'}, False, '24ff6ac2bc2f1729f817f261e0792577')
+        >>> m._get_intended_model_call_outcome(user_prompt = "Hello", system_prompt = "hello", cache = Cache())
+        IntendedModelCallOutcome(response = {'message': '{"answer": "Hello world"}'}, cache_used = False, cache_key = '24ff6ac2bc2f1729f817f261e0792577')
         """
-        start_time = time.time()
+        if encoded_image:
+            # the image has is appended to the user_prompt for hash-lookup purposes
+            image_hash = hashlib.md5(encoded_image.encode()).hexdigest()
         cache_call_params = {
             "model": str(self.model),
             "parameters": self.parameters,
             "system_prompt": system_prompt,
-            "user_prompt": user_prompt,
+            "user_prompt": user_prompt + "" if not encoded_image else f" {image_hash}",
             "iteration": iteration,
         }
-        if encoded_image:
-            import hashlib
-            image_hash = hashlib.md5(encoded_image.encode()).hexdigest()
-            cache_call_params["user_prompt"] = f"{user_prompt} {image_hash}"
         cached_response, cache_key = cache.fetch(**cache_call_params)
-        if cached_response:
+        if cache_used := cached_response is not None:
             response = json.loads(cached_response)
-            cache_used = True
         else:
-            remote_call = hasattr(self, "remote") and self.remote
             f = (
                 self.remote_async_execute_model_call
-                if remote_call
+                if hasattr(self, "remote") and self.remote
                 else self.async_execute_model_call
             )
-            params = {"user_prompt": user_prompt, "system_prompt": system_prompt}
-            if encoded_image:
-                params["encoded_image"] = encoded_image
+            params = {
+                "user_prompt": user_prompt,
+                "system_prompt": system_prompt,
+                **({"encoded_image": encoded_image} if encoded_image else {}),
+            }
             response = await f(**params)
             new_cache_key = cache.store(
-                user_prompt=user_prompt,
-                model=str(self.model),
-                parameters=self.parameters,
-                system_prompt=system_prompt,
-                response=response,
-                iteration=iteration,
-            )
-            assert new_cache_key == cache_key
-            cache_used = False
+                **cache_call_params, response=response
+            )  # store the response in the cache
+            assert new_cache_key == cache_key  # should be the same
+        return IntendedModelCallOutcome(
+            response=response, cache_used=cache_used, cache_key=cache_key
+        )
-        return response, cache_used, cache_key
+    _get_intended_model_call_outcome = sync_wrapper(
+        _async_get_intended_model_call_outcome
+    )
     get_raw_response = sync_wrapper(async_get_raw_response)
@@ -370,7 +454,7 @@ class LanguageModel(
         self,
         user_prompt: str,
         system_prompt: str,
-        cache: Cache,
+        cache: "Cache",
         iteration: int = 1,
         encoded_image=None,
     ) -> dict:
@@ -388,36 +472,10 @@ class LanguageModel(
             "system_prompt": system_prompt,
             "iteration": iteration,
             "cache": cache,
+            **({"encoded_image": encoded_image} if encoded_image else {}),
         }
-        if encoded_image:
-            params["encoded_image"] = encoded_image
-        raw_response, cache_used, cache_key = await self.async_get_raw_response(
-            **params
-        )
-        response = self.parse_response(raw_response)
-        try:
-            dict_response = json.loads(response)
-        except json.JSONDecodeError as e:
-            # TODO: Turn into logs to generate issues
-            dict_response, success = await repair(
-                bad_json=response, error_message=str(e), cache=cache
-            )
-            if not success:
-                raise Exception(
-                    f"""Even the repair failed. The error was: {e}. The response was: {response}."""
-                )
-        dict_response.update(
-            {
-                "cache_used": cache_used,
-                "cache_key": cache_key,
-                "usage": raw_response.get("usage", {}),
-                "raw_model_response": raw_response,
-            }
-        )
-        return dict_response
+        model_call_outcome = await self._async_get_intended_model_call_outcome(**params)
+        return await self._async_prepare_response(model_call_outcome, cache=cache)
     get_response = sync_wrapper(async_get_response)
@@ -494,7 +552,12 @@ class LanguageModel(
         return table
     @classmethod
-    def example(cls, test_model: bool = False, canned_response: str = "Hello world"):
+    def example(
+        cls,
+        test_model: bool = False,
+        canned_response: str = "Hello world",
+        throw_exception: bool = False,
+    ):
         """Return a default instance of the class.
         >>> from edsl.language_models import LanguageModel
@@ -519,6 +582,8 @@ class LanguageModel(
             ) -> dict[str, Any]:
                 await asyncio.sleep(0.1)
                 # return {"message": """{"answer": "Hello, world"}"""}
+                if throw_exception:
+                    raise Exception("This is a test error")
                 return {"message": f'{{"answer": "{canned_response}"}}'}
             def parse_response(self, raw_response: dict[str, Any]) -> str:
@@ -536,8 +601,3 @@ if __name__ == "__main__":
     import doctest
     doctest.testmod(optionflags=doctest.ELLIPSIS)
-    # from edsl.language_models import LanguageModel
-    # from edsl.language_models import LanguageModel
-    # print(LanguageModel.example())

edsl/language_models/ModelList.py CHANGED Viewed

@@ -86,8 +86,14 @@ class ModelList(Base, UserList):
         pass
     @classmethod
-    def example(cl):
-        return ModelList([LanguageModel.example() for _ in range(3)])
+    def example(cls, randomize: bool = False) -> "ModelList":
+        """
+        Returns an example ModelList instance.
+        :param randomize: If True, uses Model's randomize method.
+        """
+        return cls([Model.example(randomize) for _ in range(3)])
 if __name__ == "__main__":

edsl/language_models/registry.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import textwrap
+from random import random
 def get_model_class(model_name, registry=None):
@@ -35,6 +36,10 @@ class Model(metaclass=Meta):
         from edsl.inference_services.registry import default
         registry = registry or default
+        if isinstance(model_name, int):
+            model_name = cls.available(name_only=True)[model_name]
         factory = registry.create_model_factory(model_name)
         return factory(*args, **kwargs)
@@ -92,6 +97,17 @@ class Model(metaclass=Meta):
             print("OK!")
             print("\n")
+    @classmethod
+    def example(cls, randomize: bool = False) -> "Model":
+        """
+        Returns an example Model instance.
+        :param randomize: If True, the temperature is set to a random decimal between 0 and 1.
+        """
+        temperature = 0.5 if not randomize else round(random(), 2)
+        model_name = cls.default_model
+        return cls(model_name, temperature=temperature)
 if __name__ == "__main__":
     import doctest

edsl/questions/QuestionFunctional.py CHANGED Viewed

@@ -4,10 +4,34 @@ import inspect
 from edsl.questions.QuestionBase import QuestionBase
 from edsl.utilities.restricted_python import create_restricted_function
+from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
 class QuestionFunctional(QuestionBase):
-    """A special type of question that is *not* answered by an LLM."""
+    """A special type of question that is *not* answered by an LLM.
+    >>> from edsl import Scenario, Agent
+    # Create an instance of QuestionFunctional with the new function
+    >>> question = QuestionFunctional.example()
+    # Activate and test the function
+    >>> question.activate()
+    >>> scenario = Scenario({"numbers": [1, 2, 3, 4, 5]})
+    >>> agent = Agent(traits={"multiplier": 10})
+    >>> results = question.by(scenario).by(agent).run()
+    >>> results.select("answer.*").to_list()[0] == 150
+    True
+    # Serialize the question to a dictionary
+    >>> from edsl.questions.QuestionBase import QuestionBase
+    >>> new_question = QuestionBase.from_dict(question.to_dict())
+    >>> results = new_question.by(scenario).by(agent).run()
+    >>> results.select("answer.*").to_list()[0] == 150
+    True
+    """
     question_type = "functional"
     default_instructions = ""
@@ -73,6 +97,7 @@ class QuestionFunctional(QuestionBase):
         """Required by Question, but not used by QuestionFunctional."""
         raise NotImplementedError
+    @add_edsl_version
     def to_dict(self):
         return {
             "question_name": self.question_name,
@@ -113,4 +138,11 @@ def main():
     scenario = Scenario({"numbers": [1, 2, 3, 4, 5]})
     agent = Agent(traits={"multiplier": 10})
     results = question.by(scenario).by(agent).run()
-    print(results)
+    assert results.select("answer.*").to_list()[0] == 150
+if __name__ == "__main__":
+    # main()
+    import doctest
+    doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl 0.1.30.dev4__py3-none-any.whl → 0.1.31__py3-none-any.whl

edsl 0.1.30.dev4py3-none-any.whl → 0.1.31py3-none-any.whl