PyPI - edsl - Versions diffs - 0.1.36__py3-none-any.whl → 0.1.36.dev2__py3-none-any.whl - Mend

edsl 0.1.36py3-none-any.whl → 0.1.36.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

edsl/Base.py +0 -5
edsl/__init__.py +0 -1
edsl/__version__.py +1 -1
edsl/agents/Agent.py +7 -11
edsl/agents/InvigilatorBase.py +1 -5
edsl/agents/PromptConstructor.py +18 -27
edsl/conversation/Conversation.py +1 -1
edsl/coop/PriceFetcher.py +18 -14
edsl/coop/coop.py +8 -42
edsl/exceptions/coop.py +0 -8
edsl/inference_services/InferenceServiceABC.py +0 -28
edsl/inference_services/InferenceServicesCollection.py +4 -10
edsl/inference_services/models_available_cache.py +1 -25
edsl/jobs/Jobs.py +167 -190
edsl/jobs/interviews/Interview.py +14 -42
edsl/jobs/interviews/InterviewExceptionCollection.py +0 -9
edsl/jobs/interviews/InterviewExceptionEntry.py +6 -31
edsl/jobs/runners/JobsRunnerAsyncio.py +13 -8
edsl/jobs/tasks/TaskHistory.py +7 -23
edsl/questions/QuestionFunctional.py +3 -7
edsl/results/Dataset.py +0 -12
edsl/results/Result.py +0 -2
edsl/results/Results.py +1 -13
edsl/scenarios/FileStore.py +5 -20
edsl/scenarios/Scenario.py +1 -15
edsl/scenarios/__init__.py +0 -2
edsl/surveys/Survey.py +0 -3
edsl/surveys/instructions/Instruction.py +3 -20
{edsl-0.1.36.dist-info → edsl-0.1.36.dev2.dist-info}/METADATA +1 -1
{edsl-0.1.36.dist-info → edsl-0.1.36.dev2.dist-info}/RECORD +32 -33
edsl/data/RemoteCacheSync.py +0 -97
{edsl-0.1.36.dist-info → edsl-0.1.36.dev2.dist-info}/LICENSE +0 -0
{edsl-0.1.36.dist-info → edsl-0.1.36.dev2.dist-info}/WHEEL +0 -0

edsl/jobs/Jobs.py CHANGED Viewed

@@ -1,10 +1,8 @@
 # """The Jobs class is a collection of agents, scenarios and models and one survey."""
 from __future__ import annotations
 import warnings
-import requests
 from itertools import product
 from typing import Optional, Union, Sequence, Generator
 from edsl.Base import Base
 from edsl.exceptions import MissingAPIKeyError
 from edsl.jobs.buckets.BucketCollection import BucketCollection
@@ -12,9 +10,6 @@ from edsl.jobs.interviews.Interview import Interview
 from edsl.jobs.runners.JobsRunnerAsyncio import JobsRunnerAsyncio
 from edsl.utilities.decorators import add_edsl_version, remove_edsl_version
-from edsl.data.RemoteCacheSync import RemoteCacheSync
-from edsl.exceptions.coop import CoopServerResponseError
 class Jobs(Base):
     """
@@ -208,15 +203,14 @@ class Jobs(Base):
             ]
         )
         return d
+        # if table:
+        #     d.to_scenario_list().print(format="rich")
+        # else:
+        #     return d
-    def show_prompts(self, all=False) -> None:
+    def show_prompts(self) -> None:
         """Print the prompts."""
-        if all:
-            self.prompts().to_scenario_list().print(format="rich")
-        else:
-            self.prompts().select(
-                "user_prompt", "system_prompt"
-            ).to_scenario_list().print(format="rich")
+        self.prompts().to_scenario_list().print(format="rich")
     @staticmethod
     def estimate_prompt_cost(
@@ -225,11 +219,11 @@ class Jobs(Base):
         price_lookup: dict,
         inference_service: str,
         model: str,
-    ) -> dict:
+    ):
         """Estimates the cost of a prompt. Takes piping into account."""
         def get_piping_multiplier(prompt: str):
-            """Returns 2 if a prompt includes Jinja braces, and 1 otherwise."""
+            """Returns 2 if a prompt includes Jinja brances, and 1 otherwise."""
             if "{{" in prompt and "}}" in prompt:
                 return 2
@@ -237,25 +231,9 @@ class Jobs(Base):
         # Look up prices per token
         key = (inference_service, model)
-        try:
-            relevant_prices = price_lookup[key]
-            output_price_per_token = 1 / float(
-                relevant_prices["output"]["one_usd_buys"]
-            )
-            input_price_per_token = 1 / float(relevant_prices["input"]["one_usd_buys"])
-        except KeyError:
-            # A KeyError is likely to occur if we cannot retrieve prices (the price_lookup dict is empty)
-            # Use a sensible default
-            import warnings
-            warnings.warn(
-                "Price data could not be retrieved. Using default estimates for input and output token prices. Input: $0.15 / 1M tokens; Output: $0.60 / 1M tokens"
-            )
-            output_price_per_token = 0.00000015  # $0.15 / 1M tokens
-            input_price_per_token = 0.00000060  # $0.60 / 1M tokens
+        relevant_prices = price_lookup[key]
+        output_price_per_token = 1 / float(relevant_prices["output"]["one_usd_buys"])
+        input_price_per_token = 1 / float(relevant_prices["input"]["one_usd_buys"])
         # Compute the number of characters (double if the question involves piping)
         user_prompt_chars = len(str(user_prompt)) * get_piping_multiplier(
@@ -280,7 +258,7 @@ class Jobs(Base):
             "cost": cost,
         }
-    def estimate_job_cost_from_external_prices(self, price_lookup: dict) -> dict:
+    def estimate_job_cost_from_external_prices(self, price_lookup: dict):
         """
         Estimates the cost of a job according to the following assumptions:
@@ -363,7 +341,7 @@ class Jobs(Base):
         return output
-    def estimate_job_cost(self) -> dict:
+    def estimate_job_cost(self):
         """
         Estimates the cost of a job according to the following assumptions:
@@ -379,25 +357,6 @@ class Jobs(Base):
         return self.estimate_job_cost_from_external_prices(price_lookup=price_lookup)
-    @staticmethod
-    def compute_job_cost(job_results: "Results") -> float:
-        """
-        Computes the cost of a completed job in USD.
-        """
-        total_cost = 0
-        for result in job_results:
-            for key in result.raw_model_response:
-                if key.endswith("_cost"):
-                    result_cost = result.raw_model_response[key]
-                    question_name = key.removesuffix("_cost")
-                    cache_used = result.cache_used_dict[question_name]
-                    if isinstance(result_cost, (int, float)) and not cache_used:
-                        total_cost += result_cost
-        return total_cost
     @staticmethod
     def _get_container_class(object):
         from edsl.agents.AgentList import AgentList
@@ -621,7 +580,7 @@ class Jobs(Base):
     def _output(self, message) -> None:
         """Check if a Job is verbose. If so, print the message."""
-        if hasattr(self, "verbose") and self.verbose:
+        if self.verbose:
             print(message)
     def _check_parameters(self, strict=False, warn=False) -> None:
@@ -698,123 +657,6 @@ class Jobs(Base):
             return False
         return self._raise_validation_errors
-    def create_remote_inference_job(
-        self, iterations: int = 1, remote_inference_description: Optional[str] = None
-    ):
-        """ """
-        from edsl.coop.coop import Coop
-        coop = Coop()
-        self._output("Remote inference activated. Sending job to server...")
-        remote_job_creation_data = coop.remote_inference_create(
-            self,
-            description=remote_inference_description,
-            status="queued",
-            iterations=iterations,
-        )
-        job_uuid = remote_job_creation_data.get("uuid")
-        print(f"Job sent to server. (Job uuid={job_uuid}).")
-        return remote_job_creation_data
-    @staticmethod
-    def check_status(job_uuid):
-        from edsl.coop.coop import Coop
-        coop = Coop()
-        return coop.remote_inference_get(job_uuid)
-    def poll_remote_inference_job(
-        self, remote_job_creation_data: dict
-    ) -> Union[Results, None]:
-        from edsl.coop.coop import Coop
-        import time
-        from datetime import datetime
-        from edsl.config import CONFIG
-        expected_parrot_url = CONFIG.get("EXPECTED_PARROT_URL")
-        job_uuid = remote_job_creation_data.get("uuid")
-        coop = Coop()
-        job_in_queue = True
-        while job_in_queue:
-            remote_job_data = coop.remote_inference_get(job_uuid)
-            status = remote_job_data.get("status")
-            if status == "cancelled":
-                print("\r" + " " * 80 + "\r", end="")
-                print("Job cancelled by the user.")
-                print(
-                    f"See {expected_parrot_url}/home/remote-inference for more details."
-                )
-                return None
-            elif status == "failed":
-                print("\r" + " " * 80 + "\r", end="")
-                print("Job failed.")
-                print(
-                    f"See {expected_parrot_url}/home/remote-inference for more details."
-                )
-                return None
-            elif status == "completed":
-                results_uuid = remote_job_data.get("results_uuid")
-                results = coop.get(results_uuid, expected_object_type="results")
-                print("\r" + " " * 80 + "\r", end="")
-                url = f"{expected_parrot_url}/content/{results_uuid}"
-                print(f"Job completed and Results stored on Coop: {url}.")
-                return results
-            else:
-                duration = 5
-                time_checked = datetime.now().strftime("%Y-%m-%d %I:%M:%S %p")
-                frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
-                start_time = time.time()
-                i = 0
-                while time.time() - start_time < duration:
-                    print(
-                        f"\r{frames[i % len(frames)]} Job status: {status} - last update: {time_checked}",
-                        end="",
-                        flush=True,
-                    )
-                    time.sleep(0.1)
-                    i += 1
-    def use_remote_inference(self, disable_remote_inference: bool):
-        if disable_remote_inference:
-            return False
-        if not disable_remote_inference:
-            try:
-                from edsl import Coop
-                user_edsl_settings = Coop().edsl_settings
-                return user_edsl_settings.get("remote_inference", False)
-            except requests.ConnectionError:
-                pass
-            except CoopServerResponseError as e:
-                pass
-        return False
-    def use_remote_cache(self):
-        try:
-            from edsl import Coop
-            user_edsl_settings = Coop().edsl_settings
-            return user_edsl_settings.get("remote_caching", False)
-        except requests.ConnectionError:
-            pass
-        except CoopServerResponseError as e:
-            pass
-        return False
-    def check_api_keys(self):
-        from edsl import Model
-        for model in self.models + [Model()]:
-            if not model.has_valid_api_key():
-                raise MissingAPIKeyError(
-                    model_name=str(model.model),
-                    inference_service=model._inference_service_,
-                )
     def run(
         self,
         n: int = 1,
@@ -852,17 +694,91 @@ class Jobs(Base):
         self.verbose = verbose
-        if remote_inference := self.use_remote_inference(disable_remote_inference):
-            remote_job_creation_data = self.create_remote_inference_job(
-                iterations=n, remote_inference_description=remote_inference_description
+        remote_cache = False
+        remote_inference = False
+        if not disable_remote_inference:
+            try:
+                coop = Coop()
+                user_edsl_settings = Coop().edsl_settings
+                remote_cache = user_edsl_settings.get("remote_caching", False)
+                remote_inference = user_edsl_settings.get("remote_inference", False)
+            except Exception:
+                pass
+        if remote_inference:
+            import time
+            from datetime import datetime
+            from edsl.config import CONFIG
+            expected_parrot_url = CONFIG.get("EXPECTED_PARROT_URL")
+            self._output("Remote inference activated. Sending job to server...")
+            if remote_cache:
+                self._output(
+                    "Remote caching activated. The remote cache will be used for this job."
+                )
+            remote_job_creation_data = coop.remote_inference_create(
+                self,
+                description=remote_inference_description,
+                status="queued",
+                iterations=n,
             )
-            results = self.poll_remote_inference_job(remote_job_creation_data)
-            if results is None:
-                self._output("Job failed.")
-            return results
+            time_queued = datetime.now().strftime("%m/%d/%Y %I:%M:%S %p")
+            job_uuid = remote_job_creation_data.get("uuid")
+            print(f"Remote inference started (Job uuid={job_uuid}).")
+            # print(f"Job queued at {time_queued}.")
+            job_in_queue = True
+            while job_in_queue:
+                remote_job_data = coop.remote_inference_get(job_uuid)
+                status = remote_job_data.get("status")
+                if status == "cancelled":
+                    print("\r" + " " * 80 + "\r", end="")
+                    print("Job cancelled by the user.")
+                    print(
+                        f"See {expected_parrot_url}/home/remote-inference for more details."
+                    )
+                    return None
+                elif status == "failed":
+                    print("\r" + " " * 80 + "\r", end="")
+                    print("Job failed.")
+                    print(
+                        f"See {expected_parrot_url}/home/remote-inference for more details."
+                    )
+                    return None
+                elif status == "completed":
+                    results_uuid = remote_job_data.get("results_uuid")
+                    results = coop.get(results_uuid, expected_object_type="results")
+                    print("\r" + " " * 80 + "\r", end="")
+                    print(
+                        f"Job completed and Results stored on Coop (Results uuid={results_uuid})."
+                    )
+                    return results
+                else:
+                    duration = 5
+                    time_checked = datetime.now().strftime("%Y-%m-%d %I:%M:%S %p")
+                    frames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
+                    start_time = time.time()
+                    i = 0
+                    while time.time() - start_time < duration:
+                        print(
+                            f"\r{frames[i % len(frames)]} Job status: {status} - last update: {time_checked}",
+                            end="",
+                            flush=True,
+                        )
+                        time.sleep(0.1)
+                        i += 1
+        else:
+            if check_api_keys:
+                from edsl import Model
-        if check_api_keys:
-            self.check_api_keys()
+                for model in self.models + [Model()]:
+                    if not model.has_valid_api_key():
+                        raise MissingAPIKeyError(
+                            model_name=str(model.model),
+                            inference_service=model._inference_service_,
+                        )
         # handle cache
         if cache is None or cache is True:
@@ -874,14 +790,51 @@ class Jobs(Base):
             cache = Cache()
-        remote_cache = self.use_remote_cache()
-        with RemoteCacheSync(
-            coop=Coop(),
-            cache=cache,
-            output_func=self._output,
-            remote_cache=remote_cache,
-            remote_cache_description=remote_cache_description,
-        ) as r:
+        if not remote_cache:
+            results = self._run_local(
+                n=n,
+                progress_bar=progress_bar,
+                cache=cache,
+                stop_on_exception=stop_on_exception,
+                sidecar_model=sidecar_model,
+                print_exceptions=print_exceptions,
+                raise_validation_errors=raise_validation_errors,
+            )
+            results.cache = cache.new_entries_cache()
+            self._output(f"There are {len(cache.keys()):,} entries in the local cache.")
+        else:
+            cache_difference = coop.remote_cache_get_diff(cache.keys())
+            client_missing_cacheentries = cache_difference.get(
+                "client_missing_cacheentries", []
+            )
+            missing_entry_count = len(client_missing_cacheentries)
+            if missing_entry_count > 0:
+                self._output(
+                    f"Updating local cache with {missing_entry_count:,} new "
+                    f"{'entry' if missing_entry_count == 1 else 'entries'} from remote..."
+                )
+                cache.add_from_dict(
+                    {entry.key: entry for entry in client_missing_cacheentries}
+                )
+                self._output("Local cache updated!")
+            else:
+                self._output("No new entries to add to local cache.")
+            server_missing_cacheentry_keys = cache_difference.get(
+                "server_missing_cacheentry_keys", []
+            )
+            server_missing_cacheentries = [
+                entry
+                for key in server_missing_cacheentry_keys
+                if (entry := cache.data.get(key)) is not None
+            ]
+            old_entry_keys = [key for key in cache.keys()]
+            self._output("Running job...")
             results = self._run_local(
                 n=n,
                 progress_bar=progress_bar,
@@ -891,8 +844,32 @@ class Jobs(Base):
                 print_exceptions=print_exceptions,
                 raise_validation_errors=raise_validation_errors,
             )
+            self._output("Job completed!")
+            new_cache_entries = list(
+                [entry for entry in cache.values() if entry.key not in old_entry_keys]
+            )
+            server_missing_cacheentries.extend(new_cache_entries)
+            new_entry_count = len(server_missing_cacheentries)
+            if new_entry_count > 0:
+                self._output(
+                    f"Updating remote cache with {new_entry_count:,} new "
+                    f"{'entry' if new_entry_count == 1 else 'entries'}..."
+                )
+                coop.remote_cache_create_many(
+                    server_missing_cacheentries,
+                    visibility="private",
+                    description=remote_cache_description,
+                )
+                self._output("Remote cache updated!")
+            else:
+                self._output("No new entries to add to remote cache.")
+            results.cache = cache.new_entries_cache()
+            self._output(f"There are {len(cache.keys()):,} entries in the local cache.")
-        results.cache = cache.new_entries_cache()
         return results
     def _run_local(self, *args, **kwargs):

edsl/jobs/interviews/Interview.py CHANGED Viewed

@@ -110,9 +110,9 @@ class Interview:
         self.debug = debug
         self.iteration = iteration
         self.cache = cache
-        self.answers: dict[
-            str, str
-        ] = Answers()  # will get filled in as interview progresses
+        self.answers: dict[str, str] = (
+            Answers()
+        )  # will get filled in as interview progresses
         self.sidecar_model = sidecar_model
         # Trackers
@@ -143,9 +143,9 @@ class Interview:
         The keys are the question names; the values are the lists of status log changes for each task.
         """
         for task_creator in self.task_creators.values():
-            self._task_status_log_dict[
-                task_creator.question.question_name
-            ] = task_creator.status_log
+            self._task_status_log_dict[task_creator.question.question_name] = (
+                task_creator.status_log
+            )
         return self._task_status_log_dict
     @property
@@ -159,13 +159,13 @@ class Interview:
         return self.task_creators.interview_status
     # region: Serialization
-    def _to_dict(self, include_exceptions=True) -> dict[str, Any]:
+    def _to_dict(self, include_exceptions=False) -> dict[str, Any]:
         """Return a dictionary representation of the Interview instance.
         This is just for hashing purposes.
         >>> i = Interview.example()
         >>> hash(i)
-        1217840301076717434
+        1646262796627658719
         """
         d = {
             "agent": self.agent._to_dict(),
@@ -177,39 +177,11 @@ class Interview:
         }
         if include_exceptions:
             d["exceptions"] = self.exceptions.to_dict()
-        return d
-    @classmethod
-    def from_dict(cls, d: dict[str, Any]) -> "Interview":
-        """Return an Interview instance from a dictionary."""
-        agent = Agent.from_dict(d["agent"])
-        survey = Survey.from_dict(d["survey"])
-        scenario = Scenario.from_dict(d["scenario"])
-        model = LanguageModel.from_dict(d["model"])
-        iteration = d["iteration"]
-        interview = cls(
-            agent=agent,
-            survey=survey,
-            scenario=scenario,
-            model=model,
-            iteration=iteration,
-        )
-        if "exceptions" in d:
-            exceptions = InterviewExceptionCollection.from_dict(d["exceptions"])
-            interview.exceptions = exceptions
-        return interview
     def __hash__(self) -> int:
         from edsl.utilities.utilities import dict_hash
-        return dict_hash(self._to_dict(include_exceptions=False))
-    def __eq__(self, other: "Interview") -> bool:
-        """
-        >>> from edsl.jobs.interviews.Interview import Interview; i = Interview.example(); d = i._to_dict(); i2 = Interview.from_dict(d); i == i2
-        True
-        """
-        return hash(self) == hash(other)
+        return dict_hash(self._to_dict())
     # endregion
@@ -486,11 +458,11 @@ class Interview:
         """
         current_question_index: int = self.to_index[current_question.question_name]
-        next_question: Union[
-            int, EndOfSurvey
-        ] = self.survey.rule_collection.next_question(
-            q_now=current_question_index,
-            answers=self.answers | self.scenario | self.agent["traits"],
+        next_question: Union[int, EndOfSurvey] = (
+            self.survey.rule_collection.next_question(
+                q_now=current_question_index,
+                answers=self.answers | self.scenario | self.agent["traits"],
+            )
         )
         next_question_index = next_question.next_q

edsl/jobs/interviews/InterviewExceptionCollection.py CHANGED Viewed

@@ -34,15 +34,6 @@ class InterviewExceptionCollection(UserDict):
         newdata = {k: [e.to_dict() for e in v] for k, v in self.data.items()}
         return newdata
-    @classmethod
-    def from_dict(cls, data: dict) -> "InterviewExceptionCollection":
-        """Create an InterviewExceptionCollection from a dictionary."""
-        collection = cls()
-        for question_name, entries in data.items():
-            for entry in entries:
-                collection.add(question_name, InterviewExceptionEntry.from_dict(entry))
-        return collection
     def _repr_html_(self) -> str:
         from edsl.utilities.utilities import data_to_html

edsl/jobs/interviews/InterviewExceptionEntry.py CHANGED Viewed

@@ -9,6 +9,7 @@ class InterviewExceptionEntry:
         self,
         *,
         exception: Exception,
+        # failed_question: FailedQuestion,
         invigilator: "Invigilator",
         traceback_format="text",
         answers=None,
@@ -133,48 +134,22 @@ class InterviewExceptionEntry:
         console.print(tb)
         return html_output.getvalue()
-    @staticmethod
-    def serialize_exception(exception: Exception) -> dict:
-        return {
-            "type": type(exception).__name__,
-            "message": str(exception),
-            "traceback": "".join(
-                traceback.format_exception(
-                    type(exception), exception, exception.__traceback__
-                )
-            ),
-        }
-    @staticmethod
-    def deserialize_exception(data: dict) -> Exception:
-        try:
-            exception_class = globals()[data["type"]]
-        except KeyError:
-            exception_class = Exception
-        return exception_class(data["message"])
     def to_dict(self) -> dict:
         """Return the exception as a dictionary.
         >>> entry = InterviewExceptionEntry.example()
-        >>> _ = entry.to_dict()
+        >>> entry.to_dict()['exception']
+        ValueError()
         """
         return {
-            "exception": self.serialize_exception(self.exception),
+            "exception": self.exception,
             "time": self.time,
             "traceback": self.traceback,
+            # "failed_question": self.failed_question.to_dict(),
             "invigilator": self.invigilator.to_dict(),
         }
-    @classmethod
-    def from_dict(cls, data: dict) -> "InterviewExceptionEntry":
-        """Create an InterviewExceptionEntry from a dictionary."""
-        from edsl.agents.Invigilator import InvigilatorAI
-        exception = cls.deserialize_exception(data["exception"])
-        invigilator = InvigilatorAI.from_dict(data["invigilator"])
-        return cls(exception=exception, invigilator=invigilator)
     def push(self):
         from edsl import Coop

edsl 0.1.36__py3-none-any.whl → 0.1.36.dev2__py3-none-any.whl

edsl 0.1.36py3-none-any.whl → 0.1.36.dev2py3-none-any.whl