PyPI - edsl - Versions diffs - 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl - Mend

edsl 0.1.27.dev2py3-none-any.whl → 0.1.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

edsl/Base.py +107 -30
edsl/BaseDiff.py +260 -0
edsl/__init__.py +25 -21
edsl/__version__.py +1 -1
edsl/agents/Agent.py +103 -46
edsl/agents/AgentList.py +97 -13
edsl/agents/Invigilator.py +23 -10
edsl/agents/InvigilatorBase.py +19 -14
edsl/agents/PromptConstructionMixin.py +342 -100
edsl/agents/descriptors.py +5 -2
edsl/base/Base.py +289 -0
edsl/config.py +2 -1
edsl/conjure/AgentConstructionMixin.py +152 -0
edsl/conjure/Conjure.py +56 -0
edsl/conjure/InputData.py +659 -0
edsl/conjure/InputDataCSV.py +48 -0
edsl/conjure/InputDataMixinQuestionStats.py +182 -0
edsl/conjure/InputDataPyRead.py +91 -0
edsl/conjure/InputDataSPSS.py +8 -0
edsl/conjure/InputDataStata.py +8 -0
edsl/conjure/QuestionOptionMixin.py +76 -0
edsl/conjure/QuestionTypeMixin.py +23 -0
edsl/conjure/RawQuestion.py +65 -0
edsl/conjure/SurveyResponses.py +7 -0
edsl/conjure/__init__.py +9 -4
edsl/conjure/examples/placeholder.txt +0 -0
edsl/conjure/naming_utilities.py +263 -0
edsl/conjure/utilities.py +165 -28
edsl/conversation/Conversation.py +238 -0
edsl/conversation/car_buying.py +58 -0
edsl/conversation/mug_negotiation.py +81 -0
edsl/conversation/next_speaker_utilities.py +93 -0
edsl/coop/coop.py +337 -121
edsl/coop/utils.py +56 -70
edsl/data/Cache.py +74 -22
edsl/data/CacheHandler.py +10 -9
edsl/data/SQLiteDict.py +11 -3
edsl/inference_services/AnthropicService.py +1 -0
edsl/inference_services/DeepInfraService.py +20 -13
edsl/inference_services/GoogleService.py +7 -1
edsl/inference_services/InferenceServicesCollection.py +33 -7
edsl/inference_services/OpenAIService.py +17 -10
edsl/inference_services/models_available_cache.py +69 -0
edsl/inference_services/rate_limits_cache.py +25 -0
edsl/inference_services/write_available.py +10 -0
edsl/jobs/Answers.py +15 -1
edsl/jobs/Jobs.py +322 -73
edsl/jobs/buckets/BucketCollection.py +9 -3
edsl/jobs/buckets/ModelBuckets.py +4 -2
edsl/jobs/buckets/TokenBucket.py +1 -2
edsl/jobs/interviews/Interview.py +7 -10
edsl/jobs/interviews/InterviewStatusMixin.py +3 -3
edsl/jobs/interviews/InterviewTaskBuildingMixin.py +39 -20
edsl/jobs/interviews/retry_management.py +4 -4
edsl/jobs/runners/JobsRunnerAsyncio.py +103 -65
edsl/jobs/runners/JobsRunnerStatusData.py +3 -3
edsl/jobs/tasks/QuestionTaskCreator.py +4 -2
edsl/jobs/tasks/TaskHistory.py +4 -3
edsl/language_models/LanguageModel.py +42 -55
edsl/language_models/ModelList.py +96 -0
edsl/language_models/registry.py +14 -0
edsl/language_models/repair.py +97 -25
edsl/notebooks/Notebook.py +157 -32
edsl/prompts/Prompt.py +31 -19
edsl/questions/QuestionBase.py +145 -23
edsl/questions/QuestionBudget.py +5 -6
edsl/questions/QuestionCheckBox.py +7 -3
edsl/questions/QuestionExtract.py +5 -3
edsl/questions/QuestionFreeText.py +3 -3
edsl/questions/QuestionFunctional.py +0 -3
edsl/questions/QuestionList.py +3 -4
edsl/questions/QuestionMultipleChoice.py +16 -8
edsl/questions/QuestionNumerical.py +4 -3
edsl/questions/QuestionRank.py +5 -3
edsl/questions/__init__.py +4 -3
edsl/questions/descriptors.py +9 -4
edsl/questions/question_registry.py +27 -31
edsl/questions/settings.py +1 -1
edsl/results/Dataset.py +31 -0
edsl/results/DatasetExportMixin.py +493 -0
edsl/results/Result.py +42 -82
edsl/results/Results.py +178 -66
edsl/results/ResultsDBMixin.py +10 -9
edsl/results/ResultsExportMixin.py +23 -507
edsl/results/ResultsGGMixin.py +3 -3
edsl/results/ResultsToolsMixin.py +9 -9
edsl/scenarios/FileStore.py +140 -0
edsl/scenarios/Scenario.py +59 -6
edsl/scenarios/ScenarioList.py +138 -52
edsl/scenarios/ScenarioListExportMixin.py +32 -0
edsl/scenarios/ScenarioListPdfMixin.py +2 -1
edsl/scenarios/__init__.py +1 -0
edsl/study/ObjectEntry.py +173 -0
edsl/study/ProofOfWork.py +113 -0
edsl/study/SnapShot.py +73 -0
edsl/study/Study.py +498 -0
edsl/study/__init__.py +4 -0
edsl/surveys/MemoryPlan.py +11 -4
edsl/surveys/Survey.py +124 -37
edsl/surveys/SurveyExportMixin.py +25 -5
edsl/surveys/SurveyFlowVisualizationMixin.py +6 -4
edsl/tools/plotting.py +4 -2
edsl/utilities/__init__.py +21 -20
edsl/utilities/gcp_bucket/__init__.py +0 -0
edsl/utilities/gcp_bucket/cloud_storage.py +96 -0
edsl/utilities/gcp_bucket/simple_example.py +9 -0
edsl/utilities/interface.py +90 -73
edsl/utilities/repair_functions.py +28 -0
edsl/utilities/utilities.py +59 -6
{edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/METADATA +42 -15
edsl-0.1.29.dist-info/RECORD +203 -0
edsl/conjure/RawResponseColumn.py +0 -327
edsl/conjure/SurveyBuilder.py +0 -308
edsl/conjure/SurveyBuilderCSV.py +0 -78
edsl/conjure/SurveyBuilderSPSS.py +0 -118
edsl/data/RemoteDict.py +0 -103
edsl-0.1.27.dev2.dist-info/RECORD +0 -172
{edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/LICENSE +0 -0
{edsl-0.1.27.dev2.dist-info → edsl-0.1.29.dist-info}/WHEEL +0 -0

edsl/language_models/LanguageModel.py CHANGED Viewed

@@ -7,26 +7,18 @@ import asyncio
 import json
 import time
 import os
 from typing import Coroutine, Any, Callable, Type, List, get_type_hints
-from abc import ABC, abstractmethod, ABCMeta
-from rich.table import Table
+from abc import ABC, abstractmethod
 from edsl.config import CONFIG
-from edsl.utilities.utilities import clean_json
 from edsl.utilities.decorators import sync_wrapper, jupyter_nb_handler
 from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
 from edsl.language_models.repair import repair
-from edsl.exceptions.language_models import LanguageModelAttributeTypeError
 from edsl.enums import InferenceServiceType
 from edsl.Base import RichPrintingMixin, PersistenceMixin
-from edsl.data.Cache import Cache
 from edsl.enums import service_to_api_keyname
 from edsl.exceptions import MissingAPIKeyError
 from edsl.language_models.RegisterLanguageModelsMeta import RegisterLanguageModelsMeta
@@ -142,7 +134,7 @@ class LanguageModel(
     def has_valid_api_key(self) -> bool:
         """Check if the model has a valid API key.
-        >>> LanguageModel.example().has_valid_api_key()
+        >>> LanguageModel.example().has_valid_api_key() : # doctest: +SKIP
         True
         This method is used to check if the model has a valid API key.
@@ -159,7 +151,9 @@ class LanguageModel(
     def __hash__(self):
         """Allow the model to be used as a key in a dictionary."""
-        return hash(self.model + str(self.parameters))
+        from edsl.utilities.utilities import dict_hash
+        return dict_hash(self.to_dict())
     def __eq__(self, other):
         """Check is two models are the same.
@@ -207,8 +201,8 @@ class LanguageModel(
         """Model's tokens-per-minute limit.
         >>> m = LanguageModel.example()
-        >>> m.TPM
-        1600000.0
+        >>> m.TPM > 0
+        True
         """
         self._set_rate_limits()
         return self._safety_factor * self.__rate_limits["tpm"]
@@ -285,36 +279,14 @@ class LanguageModel(
         """
         raise NotImplementedError
-    def _update_response_with_tracking(
-        self, response: dict, start_time: int, cached_response=False, cache_key=None
-    ):
-        """Update the response with tracking information.
-        >>> m = LanguageModel.example()
-        >>> m._update_response_with_tracking(response={"response": "Hello"}, start_time=0, cached_response=False, cache_key=None)
-        {'response': 'Hello', 'elapsed_time': ..., 'timestamp': ..., 'cached_response': False, 'cache_key': None}
-        """
-        end_time = time.time()
-        response.update(
-            {
-                "elapsed_time": end_time - start_time,
-                "timestamp": end_time,
-                "cached_response": cached_response,
-                "cache_key": cache_key,
-            }
-        )
-        return response
     async def async_get_raw_response(
         self,
         user_prompt: str,
         system_prompt: str,
-        cache,
+        cache: "Cache",
         iteration: int = 0,
         encoded_image=None,
-    ) -> dict[str, Any]:
+    ) -> tuple[dict, bool, str]:
         """Handle caching of responses.
         :param user_prompt: The user's prompt.
@@ -322,8 +294,7 @@ class LanguageModel(
         :param iteration: The iteration number.
         :param cache: The cache to use.
-        If the cache isn't being used, it just returns a 'fresh' call to the LLM,
-        but appends some tracking information to the response (using the _update_response_with_tracking method).
+        If the cache isn't being used, it just returns a 'fresh' call to the LLM.
         But if cache is being used, it first checks the database to see if the response is already there.
         If it is, it returns the cached response, but again appends some tracking information.
         If it isn't, it calls the LLM, saves the response to the database, and returns the response with tracking information.
@@ -334,7 +305,7 @@ class LanguageModel(
         >>> from edsl import Cache
         >>> m = LanguageModel.example(test_model = True)
         >>> m.get_raw_response(user_prompt = "Hello", system_prompt = "hello", cache = Cache())
-        {'message': '{"answer": "Hello world"}', 'elapsed_time': ..., 'timestamp': ..., 'cached_response': False, 'cache_key': '24ff6ac2bc2f1729f817f261e0792577'}
+        ({'message': '{"answer": "Hello world"}'}, False, '24ff6ac2bc2f1729f817f261e0792577')
         """
         start_time = time.time()
@@ -379,12 +350,7 @@ class LanguageModel(
             )
             cache_used = False
-        return self._update_response_with_tracking(
-            response=response,
-            start_time=start_time,
-            cached_response=cache_used,
-            cache_key=cache_key,
-        )
+        return response, cache_used, cache_key
     get_raw_response = sync_wrapper(async_get_raw_response)
@@ -427,14 +393,18 @@ class LanguageModel(
         if encoded_image:
             params["encoded_image"] = encoded_image
-        raw_response = await self.async_get_raw_response(**params)
+        raw_response, cache_used, cache_key = await self.async_get_raw_response(
+            **params
+        )
         response = self.parse_response(raw_response)
         try:
             dict_response = json.loads(response)
         except json.JSONDecodeError as e:
             # TODO: Turn into logs to generate issues
-            dict_response, success = await repair(response, str(e))
+            dict_response, success = await repair(
+                bad_json=response, error_message=str(e), cache=cache
+            )
             if not success:
                 raise Exception(
                     f"""Even the repair failed. The error was: {e}. The response was: {response}."""
@@ -442,7 +412,8 @@ class LanguageModel(
         dict_response.update(
             {
-                "cached_response": raw_response["cached_response"],
+                "cached_used": cache_used,
+                "cache_key": cache_key,
                 "usage": raw_response.get("usage", {}),
                 "raw_model_response": raw_response,
             }
@@ -458,15 +429,18 @@ class LanguageModel(
     #######################
     # SERIALIZATION METHODS
     #######################
+    def _to_dict(self) -> dict[str, Any]:
+        return {"model": self.model, "parameters": self.parameters}
     @add_edsl_version
     def to_dict(self) -> dict[str, Any]:
         """Convert instance to a dictionary.
         >>> m = LanguageModel.example()
         >>> m.to_dict()
-        {'model': 'gpt-4-1106-preview', 'parameters': {'temperature': 0.5, 'max_tokens': 1000, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'logprobs': False, 'top_logprobs': 3}}
+        {'model': 'gpt-4-1106-preview', 'parameters': {'temperature': 0.5, 'max_tokens': 1000, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'logprobs': False, 'top_logprobs': 3}, 'edsl_version': '...', 'edsl_class_name': 'LanguageModel'}
         """
-        return {"model": self.model, "parameters": self.parameters}
+        return self._to_dict()
     @classmethod
     @remove_edsl_version
@@ -508,6 +482,8 @@ class LanguageModel(
     def rich_print(self):
         """Display an object as a table."""
+        from rich.table import Table
         table = Table(title="Language Model")
         table.add_column("Attribute", style="bold")
         table.add_column("Value")
@@ -519,8 +495,18 @@ class LanguageModel(
         return table
     @classmethod
-    def example(cls, test_model=False):
-        """Return a default instance of the class."""
+    def example(cls, test_model: bool = False, canned_response: str = "Hello world"):
+        """Return a default instance of the class.
+        >>> from edsl.language_models import LanguageModel
+        >>> m = LanguageModel.example(test_model = True, canned_response = "WOWZA!")
+        >>> isinstance(m, LanguageModel)
+        True
+        >>> from edsl import QuestionFreeText
+        >>> q = QuestionFreeText(question_text = "What is your name?", question_name = 'example')
+        >>> q.by(m).run(cache = False).select('example').first()
+        'WOWZA!'
+        """
         from edsl import Model
         class TestLanguageModelGood(LanguageModel):
@@ -533,7 +519,8 @@ class LanguageModel(
                 self, user_prompt: str, system_prompt: str
             ) -> dict[str, Any]:
                 await asyncio.sleep(0.1)
-                return {"message": """{"answer": "Hello world"}"""}
+                # return {"message": """{"answer": "Hello, world"}"""}
+                return {"message": f'{{"answer": "{canned_response}"}}'}
             def parse_response(self, raw_response: dict[str, Any]) -> str:
                 return raw_response["message"]

edsl/language_models/ModelList.py ADDED Viewed

@@ -0,0 +1,96 @@
+from typing import Optional
+from collections import UserList
+from edsl import Model
+from edsl.language_models import LanguageModel
+from edsl.Base import Base
+from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
+from edsl.utilities.utilities import is_valid_variable_name
+from edsl.utilities.utilities import dict_hash
+class ModelList(Base, UserList):
+    def __init__(self, data: Optional[list] = None):
+        """Initialize the ScenarioList class.
+        >>> from edsl import Model
+        >>> m = ModelList(Model.available())
+        """
+        if data is not None:
+            super().__init__(data)
+        else:
+            super().__init__([])
+    @property
+    def names(self):
+        """
+        >>> ModelList.example().names
+        {'...'}
+        """
+        return set([model.model for model in self])
+    def rich_print(self):
+        pass
+    def __repr__(self):
+        return f"ModelList({super().__repr__()})"
+    def __hash__(self):
+        """Return a hash of the ModelList. This is used for comparison of ModelLists.
+        >>> hash(ModelList.example())
+        1423518243781418961
+        """
+        from edsl.utilities.utilities import dict_hash
+        return dict_hash(self._to_dict(sort=True))
+    def _to_dict(self, sort=False):
+        if sort:
+            model_list = sorted([model for model in self], key=lambda x: hash(x))
+            return {"models": [model._to_dict() for model in model_list]}
+        else:
+            return {"models": [model._to_dict() for model in self]}
+    @classmethod
+    def from_names(self, *args, **kwargs):
+        """A a model list from a list of names"""
+        if len(args) == 1 and isinstance(args[0], list):
+            args = args[0]
+        return ModelList([Model(model_name, **kwargs) for model_name in args])
+    @add_edsl_version
+    def to_dict(self):
+        """
+        Convert the ModelList to a dictionary.
+        >>> ModelList.example().to_dict()
+        {'models': [...], 'edsl_version': '...', 'edsl_class_name': 'ModelList'}
+        """
+        return self._to_dict()
+    @classmethod
+    @remove_edsl_version
+    def from_dict(cls, data):
+        """
+        Create a ModelList from a dictionary.
+        >>> newm = ModelList.from_dict(ModelList.example().to_dict())
+        >>> assert ModelList.example() == newm
+        """
+        return cls(data=[LanguageModel.from_dict(model) for model in data["models"]])
+    def code(self):
+        pass
+    @classmethod
+    def example(cl):
+        return ModelList([LanguageModel.example() for _ in range(3)])
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl/language_models/registry.py CHANGED Viewed

@@ -38,6 +38,20 @@ class Model(metaclass=Meta):
         factory = registry.create_model_factory(model_name)
         return factory(*args, **kwargs)
+    @classmethod
+    def add_model(cls, service_name, model_name):
+        from edsl.inference_services.registry import default
+        registry = default
+        registry.add_model(service_name, model_name)
+    @classmethod
+    def services(cls, registry=None):
+        from edsl.inference_services.registry import default
+        registry = registry or default
+        return [r._inference_service_ for r in registry.services]
     @classmethod
     def available(cls, search_term=None, name_only=False, registry=None):
         from edsl.inference_services.registry import default

edsl/language_models/repair.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import json
 import asyncio
+import warnings
-from edsl.utilities.utilities import clean_json
+async def async_repair(
+    bad_json, error_message="", user_prompt=None, system_prompt=None, cache=None
+):
+    from edsl.utilities.utilities import clean_json
-async def async_repair(bad_json, error_message=""):
     s = clean_json(bad_json)
-    from edsl import Model
-    m = Model()
     try:
         # this is the OpenAI version, but that's fine
@@ -17,56 +17,128 @@ async def async_repair(bad_json, error_message=""):
     except json.JSONDecodeError:
         valid_dict = {}
         success = False
-        # print("Replacing control characters didn't work. Trying with the model.")
+        # print("Replacing control characters didn't work. Trying extracting the sub-string.")
+    else:
+        return valid_dict, success
+    try:
+        from edsl.utilities.repair_functions import extract_json_from_string
+        valid_dict = extract_json_from_string(s)
+        success = True
+    except ValueError:
+        valid_dict = {}
+        success = False
     else:
         return valid_dict, success
-    prompt = f"""This is the output from a less capable language model.
-    It was supposed to respond with just a JSON object with an answer to a question and some commentary,
-    in a field called "comment" next to "answer".
-    Please repair this bad JSON: {bad_json}."""
+    from edsl import Model
-    if error_message:
-        prompt += f" Parsing error message: {error_message}"
+    m = Model()
-    try:
-        results = await m.async_execute_model_call(
-            prompt,
-            system_prompt="You are a helpful agent. Only return the repaired JSON, nothing else.",
+    from edsl import QuestionExtract
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", UserWarning)
+        q = QuestionExtract(
+            question_text="""
+        A language model was supposed to respond to a question.
+        The response should have been JSON object with an answer to a question and some commentary.
+        It should have retured a string like this:
+        '{'answer': 'The answer to the question.', 'comment': 'Some commentary.'}'
+        or:
+        '{'answer': 'The answer to the question.'}'
+        The answer field is very like an integer number. The comment field is always string.
+        You job is to return just the repaired JSON object that the model should have returned, properly formatted.
+            - It might have included some preliminary comments.
+            - It might have included some control characters.
+            - It might have included some extraneous text.
+        DO NOT include any extraneous text in your response. Just return the repaired JSON object.
+        Do not preface the JSON object with any text. Just return the JSON object.
+        Bad answer: """
+            + str(bad_json)
+            + "The model received a user prompt of: '"
+            + str(user_prompt)
+            + """'
+        The model received a system prompt of: ' """
+            + str(system_prompt)
+            + """
+        '
+        Please return the repaired JSON object, following the instructions the original model should have followed, though
+        using 'new_answer' a nd 'new_comment' as the keys.""",
+            answer_template={
+                "new_answer": "<number, string, list, etc.>",
+                "new_comment": "Model's comments",
+            },
+            question_name="model_repair",
         )
-    except Exception as e:
-        return {}, False
+    results = await q.run_async(cache=cache)
     try:
         # this is the OpenAI version, but that's fine
-        valid_dict = json.loads(results["choices"][0]["message"]["content"])
+        valid_dict = json.loads(json.dumps(results))
         success = True
+        # this is to deal with the fact that the model returns the answer and comment as new_answer and new_comment
+        valid_dict["answer"] = valid_dict.pop("new_answer")
+        valid_dict["comment"] = valid_dict.pop("new_comment")
     except json.JSONDecodeError:
         valid_dict = {}
         success = False
+        from rich import print
+        from rich.console import Console
+        from rich.syntax import Syntax
+        console = Console()
+        error_message = (
+            f"All repairs. failed. LLM Model given [red]{str(bad_json)}[/red]"
+        )
+        console.print("    " + error_message)
+        model_returned = results["choices"][0]["message"]["content"]
+        console.print(f"LLM Model returned: [blue]{model_returned}[/blue]")
     return valid_dict, success
-def repair_wrapper(bad_json, error_message=""):
+def repair_wrapper(
+    bad_json, error_message="", user_prompt=None, system_prompt=None, cache=None
+):
     try:
         loop = asyncio.get_event_loop()
         if loop.is_running():
             # Add repair as a task to the running loop
-            task = loop.create_task(async_repair(bad_json, error_message))
+            task = loop.create_task(
+                async_repair(bad_json, error_message, user_prompt, system_prompt, cache)
+            )
             return task
         else:
             # Run a new event loop for repair
-            return loop.run_until_complete(async_repair(bad_json, error_message))
+            return loop.run_until_complete(
+                async_repair(bad_json, error_message, user_prompt, system_prompt, cache)
+            )
     except RuntimeError:
         # Create a new event loop if one is not already available
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
-        return loop.run_until_complete(async_repair(bad_json, error_message))
+        return loop.run_until_complete(
+            async_repair(bad_json, error_message, user_prompt, system_prompt, cache)
+        )
-def repair(bad_json, error_message=""):
-    return repair_wrapper(bad_json, error_message)
+def repair(
+    bad_json, error_message="", user_prompt=None, system_prompt=None, cache=None
+):
+    return repair_wrapper(bad_json, error_message, user_prompt, system_prompt, cache)
 # Example usage:

edsl 0.1.27.dev2__py3-none-any.whl → 0.1.29__py3-none-any.whl

edsl 0.1.27.dev2py3-none-any.whl → 0.1.29py3-none-any.whl