PyPI - edsl - Versions diffs - 0.1.57__py3-none-any.whl → 0.1.59__py3-none-any.whl - Mend

edsl 0.1.57py3-none-any.whl → 0.1.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

edsl/__version__.py +1 -1
edsl/agents/agent.py +23 -4
edsl/agents/agent_list.py +36 -6
edsl/coop/coop.py +274 -35
edsl/coop/utils.py +63 -0
edsl/dataset/dataset.py +74 -0
edsl/dataset/dataset_operations_mixin.py +67 -62
edsl/inference_services/services/test_service.py +1 -1
edsl/interviews/exception_tracking.py +92 -20
edsl/invigilators/invigilators.py +5 -1
edsl/invigilators/prompt_constructor.py +299 -136
edsl/jobs/html_table_job_logger.py +394 -48
edsl/jobs/jobs_pricing_estimation.py +19 -114
edsl/jobs/jobs_remote_inference_logger.py +29 -0
edsl/jobs/jobs_runner_status.py +52 -21
edsl/jobs/remote_inference.py +214 -30
edsl/language_models/language_model.py +40 -3
edsl/language_models/price_manager.py +91 -57
edsl/prompts/prompt.py +1 -0
edsl/questions/question_list.py +76 -20
edsl/results/results.py +8 -1
edsl/scenarios/file_store.py +8 -12
edsl/scenarios/scenario.py +50 -2
edsl/scenarios/scenario_list.py +34 -12
edsl/surveys/survey.py +4 -0
edsl/tasks/task_history.py +180 -6
edsl/utilities/wikipedia.py +194 -0
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/METADATA +4 -3
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/RECORD +32 -32
edsl/language_models/compute_cost.py +0 -78
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/LICENSE +0 -0
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/WHEEL +0 -0
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/entry_points.txt +0 -0

edsl/jobs/remote_inference.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import re
+import math
 from typing import Optional, Union, Literal, TYPE_CHECKING, NewType, Callable, Any
 from dataclasses import dataclass
 from ..coop import CoopServerResponseError
-from ..coop.utils import VisibilityType
+from ..coop.utils import VisibilityType, CostConverter
 from ..coop.coop import RemoteInferenceResponse, RemoteInferenceCreationInfo
 from .jobs_status_enums import JobsStatus
-from .jobs_remote_inference_logger import JobLogger
+from .jobs_remote_inference_logger import JobLogger, JobRunExceptionCounter, ModelCost
 from .exceptions import RemoteInferenceError
@@ -94,6 +95,12 @@ class JobsRemoteInferenceHandler:
             "Remote inference activated. Sending job to server...",
             status=JobsStatus.QUEUED,
         )
+        logger.add_info(
+            "remote_inference_url", f"{self.expected_parrot_url}/home/remote-inference"
+        )
+        logger.add_info(
+            "remote_cache_url", f"{self.expected_parrot_url}/home/remote-cache"
+        )
         remote_job_creation_data = coop.remote_inference_create(
             self.jobs,
             description=remote_inference_description,
@@ -183,7 +190,9 @@ class JobsRemoteInferenceHandler:
         self, job_info: RemoteJobInfo, remote_job_data: RemoteInferenceResponse
     ) -> None:
         "Handles a failed job by logging the error and updating the job status."
-        latest_error_report_url = remote_job_data.get("latest_error_report_url")
+        error_report_url = remote_job_data.get("latest_job_run_details", {}).get(
+            "error_report_url"
+        )
         reason = remote_job_data.get("reason")
@@ -193,8 +202,8 @@ class JobsRemoteInferenceHandler:
                 status=JobsStatus.FAILED,
             )
-        if latest_error_report_url:
-            job_info.logger.add_info("error_report_url", latest_error_report_url)
+        if error_report_url:
+            job_info.logger.add_info("error_report_url", error_report_url)
         job_info.logger.update("Job failed.", status=JobsStatus.FAILED)
         job_info.logger.update(
@@ -206,39 +215,46 @@ class JobsRemoteInferenceHandler:
             status=JobsStatus.FAILED,
         )
-    def _handle_partially_failed_job_interview_details(
+    def _update_interview_details(
         self, job_info: RemoteJobInfo, remote_job_data: RemoteInferenceResponse
     ) -> None:
-        "Extracts the interview details from the remote job data."
-        try:
-            # Job details is a string of the form "64 out of 1,758 interviews failed"
-            job_details = remote_job_data.get("latest_failure_description")
-            text_without_commas = job_details.replace(",", "")
-            # Find all numbers in the text
-            numbers = [int(num) for num in re.findall(r"\d+", text_without_commas)]
-            failed = numbers[0]
-            total = numbers[1]
-            completed = total - failed
-            job_info.logger.add_info("completed_interviews", completed)
-            job_info.logger.add_info("failed_interviews", failed)
-        # This is mainly helpful metadata, and any errors here should not stop the code
-        except:
-            pass
+        "Updates the interview details in the job info."
+        latest_job_run_details = remote_job_data.get("latest_job_run_details", {})
+        interview_details = latest_job_run_details.get("interview_details", {}) or {}
+        completed_interviews = interview_details.get("completed_interviews", 0)
+        interviews_with_exceptions = interview_details.get(
+            "interviews_with_exceptions", 0
+        )
+        interviews_without_exceptions = (
+            completed_interviews - interviews_with_exceptions
+        )
+        job_info.logger.add_info("completed_interviews", interviews_without_exceptions)
+        job_info.logger.add_info("failed_interviews", interviews_with_exceptions)
+        exception_summary = interview_details.get("exception_summary", []) or []
+        if exception_summary:
+            job_run_exception_counters = []
+            for exception in exception_summary:
+                exception_counter = JobRunExceptionCounter(
+                    exception_type=exception.get("exception_type"),
+                    inference_service=exception.get("inference_service"),
+                    model=exception.get("model"),
+                    question_name=exception.get("question_name"),
+                    exception_count=exception.get("exception_count"),
+                )
+                job_run_exception_counters.append(exception_counter)
+            job_info.logger.add_info("exception_summary", job_run_exception_counters)
     def _handle_partially_failed_job(
         self, job_info: RemoteJobInfo, remote_job_data: RemoteInferenceResponse
     ) -> None:
         "Handles a partially failed job by logging the error and updating the job status."
-        self._handle_partially_failed_job_interview_details(job_info, remote_job_data)
-        latest_error_report_url = remote_job_data.get("latest_error_report_url")
+        error_report_url = remote_job_data.get("latest_job_run_details", {}).get(
+            "error_report_url"
+        )
-        if latest_error_report_url:
-            job_info.logger.add_info("error_report_url", latest_error_report_url)
+        if error_report_url:
+            job_info.logger.add_info("error_report_url", error_report_url)
         job_info.logger.update(
             "Job completed with partial results.", status=JobsStatus.PARTIALLY_FAILED
@@ -263,6 +279,149 @@ class JobsRemoteInferenceHandler:
         )
         time.sleep(self.poll_interval)
+    def _get_expenses_from_results(self, results: "Results") -> dict:
+        """
+        Calculates expenses from Results object.
+        Args:
+            results: Results object containing model responses
+            include_cached_responses_in_cost: Whether to include cached responses in cost calculation
+        Returns:
+            Dictionary mapping ExpenseKey to TokenExpense information
+        """
+        expenses = {}
+        for result in results:
+            raw_response = result["raw_model_response"]
+            # Process each cost field in the response
+            for key in raw_response:
+                if not key.endswith("_cost"):
+                    continue
+                result_cost = raw_response[key]
+                if not isinstance(result_cost, (int, float)):
+                    continue
+                question_name = key.removesuffix("_cost")
+                cache_used = result["cache_used_dict"][question_name]
+                # Get expense keys for input and output tokens
+                input_key = (
+                    result["model"]._inference_service_,
+                    result["model"].model,
+                    "input",
+                    raw_response[f"{question_name}_input_price_per_million_tokens"],
+                )
+                output_key = (
+                    result["model"]._inference_service_,
+                    result["model"].model,
+                    "output",
+                    raw_response[f"{question_name}_output_price_per_million_tokens"],
+                )
+                # Update input token expenses
+                if input_key not in expenses:
+                    expenses[input_key] = {
+                        "tokens": 0,
+                        "cost_usd": 0,
+                        "cost_usd_with_cache": 0,
+                    }
+                input_price_per_million_tokens = input_key[3]
+                input_tokens = raw_response[f"{question_name}_input_tokens"]
+                input_cost = (input_price_per_million_tokens / 1_000_000) * input_tokens
+                expenses[input_key]["tokens"] += input_tokens
+                expenses[input_key]["cost_usd"] += input_cost
+                if not cache_used:
+                    expenses[input_key]["cost_usd_with_cache"] += input_cost
+                # Update output token expenses
+                if output_key not in expenses:
+                    expenses[output_key] = {
+                        "tokens": 0,
+                        "cost_usd": 0,
+                        "cost_usd_with_cache": 0,
+                    }
+                output_price_per_million_tokens = output_key[3]
+                output_tokens = raw_response[f"{question_name}_output_tokens"]
+                output_cost = (
+                    output_price_per_million_tokens / 1_000_000
+                ) * output_tokens
+                expenses[output_key]["tokens"] += output_tokens
+                expenses[output_key]["cost_usd"] += output_cost
+                if not cache_used:
+                    expenses[output_key]["cost_usd_with_cache"] += output_cost
+        expenses_by_model = {}
+        for expense_key, expense_usage in expenses.items():
+            service, model, token_type, _ = expense_key
+            model_key = (service, model)
+            if model_key not in expenses_by_model:
+                expenses_by_model[model_key] = {
+                    "service": service,
+                    "model": model,
+                    "input_tokens": 0,
+                    "input_cost_usd": 0,
+                    "input_cost_usd_with_cache": 0,
+                    "output_tokens": 0,
+                    "output_cost_usd": 0,
+                    "output_cost_usd_with_cache": 0,
+                }
+            if token_type == "input":
+                expenses_by_model[model_key]["input_tokens"] += expense_usage["tokens"]
+                expenses_by_model[model_key]["input_cost_usd"] += expense_usage[
+                    "cost_usd"
+                ]
+                expenses_by_model[model_key][
+                    "input_cost_usd_with_cache"
+                ] += expense_usage["cost_usd_with_cache"]
+            elif token_type == "output":
+                expenses_by_model[model_key]["output_tokens"] += expense_usage["tokens"]
+                expenses_by_model[model_key]["output_cost_usd"] += expense_usage[
+                    "cost_usd"
+                ]
+                expenses_by_model[model_key][
+                    "output_cost_usd_with_cache"
+                ] += expense_usage["cost_usd_with_cache"]
+        converter = CostConverter()
+        for model_key, model_cost_dict in expenses_by_model.items():
+            # Handle full cost (without cache)
+            input_cost = model_cost_dict["input_cost_usd"]
+            output_cost = model_cost_dict["output_cost_usd"]
+            model_cost_dict["input_cost_credits"] = converter.usd_to_credits(input_cost)
+            model_cost_dict["output_cost_credits"] = converter.usd_to_credits(
+                output_cost
+            )
+            # Convert back to USD (to get the rounded value)
+            model_cost_dict["input_cost_usd"] = converter.credits_to_usd(
+                model_cost_dict["input_cost_credits"]
+            )
+            model_cost_dict["output_cost_usd"] = converter.credits_to_usd(
+                model_cost_dict["output_cost_credits"]
+            )
+            # Handle cost with cache
+            input_cost_with_cache = model_cost_dict["input_cost_usd_with_cache"]
+            output_cost_with_cache = model_cost_dict["output_cost_usd_with_cache"]
+            model_cost_dict["input_cost_credits_with_cache"] = converter.usd_to_credits(
+                input_cost_with_cache
+            )
+            model_cost_dict["output_cost_credits_with_cache"] = (
+                converter.usd_to_credits(output_cost_with_cache)
+            )
+        return list(expenses_by_model.values())
     def _fetch_results_and_log(
         self,
         job_info: RemoteJobInfo,
@@ -274,12 +433,36 @@ class JobsRemoteInferenceHandler:
         "Fetches the results object and logs the results URL."
         job_info.logger.add_info("results_uuid", results_uuid)
         results = object_fetcher(results_uuid, expected_object_type="results")
+        model_cost_dicts = self._get_expenses_from_results(results)
+        model_costs = [
+            ModelCost(
+                service=model_cost_dict.get("service"),
+                model=model_cost_dict.get("model"),
+                input_tokens=model_cost_dict.get("input_tokens"),
+                input_cost_usd=model_cost_dict.get("input_cost_usd"),
+                output_tokens=model_cost_dict.get("output_tokens"),
+                output_cost_usd=model_cost_dict.get("output_cost_usd"),
+                input_cost_credits_with_cache=model_cost_dict.get(
+                    "input_cost_credits_with_cache"
+                ),
+                output_cost_credits_with_cache=model_cost_dict.get(
+                    "output_cost_credits_with_cache"
+                ),
+            )
+            for model_cost_dict in model_cost_dicts
+        ]
+        job_info.logger.add_info("model_costs", model_costs)
         results_url = remote_job_data.get("results_url")
         if "localhost" in results_url:
             results_url = results_url.replace("8000", "1234")
         job_info.logger.add_info("results_url", results_url)
         if job_status == "completed":
+            job_info.logger.add_info("completed_interviews", len(results))
+            job_info.logger.add_info("failed_interviews", 0)
             job_info.logger.update(
                 f"Job completed and Results stored on Coop. [View Results]({results_url})",
                 status=JobsStatus.COMPLETED,
@@ -302,6 +485,7 @@ class JobsRemoteInferenceHandler:
     ) -> Union[None, "Results", Literal["continue"]]:
         """Makes one attempt to fetch and process a remote job's status and results."""
         remote_job_data = remote_job_data_fetcher(job_info.job_uuid)
+        self._update_interview_details(job_info, remote_job_data)
         status = remote_job_data.get("status")
         reason = remote_job_data.get("reason")
         if status == "cancelled":

edsl/language_models/language_model.py CHANGED Viewed

@@ -769,8 +769,45 @@ class LanguageModel(
                 params["question_name"] = invigilator.question.question_name
             # Get timeout from configuration
             from ..config import CONFIG
-            TIMEOUT = float(CONFIG.get("EDSL_API_TIMEOUT"))
+            import logging
+            logger = logging.getLogger(__name__)
+            base_timeout = float(CONFIG.get("EDSL_API_TIMEOUT"))
+            # Adjust timeout if files are present
+            import time
+            start = time.time()
+            if files_list:
+                # Calculate total size of attached files in MB
+                file_sizes = []
+                for file in files_list:
+                    # Try different attributes that might contain the file content
+                    if hasattr(file, "base64_string") and file.base64_string:
+                        file_sizes.append(len(file.base64_string) / (1024 * 1024))
+                    elif hasattr(file, "content") and file.content:
+                        file_sizes.append(len(file.content) / (1024 * 1024))
+                    elif hasattr(file, "data") and file.data:
+                        file_sizes.append(len(file.data) / (1024 * 1024))
+                    else:
+                        # Default minimum size if we can't determine actual size
+                        file_sizes.append(1)  # Assume at least 1MB
+                total_size_mb = sum(file_sizes)
+                # Increase timeout proportionally to file size
+                # For each MB of file size, add 10 seconds to the timeout (adjust as needed)
+                size_adjustment = total_size_mb * 10
+                # Cap the maximum timeout adjustment at 5 minutes (300 seconds)
+                size_adjustment = min(size_adjustment, 300)
+                TIMEOUT = base_timeout + size_adjustment
+                logger.info(
+                    f"Adjusted timeout for API call with {len(files_list)} files (total size: {total_size_mb:.2f}MB). Base timeout: {base_timeout}s, New timeout: {TIMEOUT}s"
+                )
+            else:
+                TIMEOUT = base_timeout
             # Execute the model call with timeout
             response = await asyncio.wait_for(f(**params), timeout=TIMEOUT)
@@ -1019,7 +1056,7 @@ class LanguageModel(
         # Combine model name and parameters
         return (
-            f"Model(model_name = '{self.model}'"
+            f"Model(model_name = '{self.model}', service_name = '{self._inference_service_}'"
             + (f", {param_string}" if param_string else "")
             + ")"
         )

edsl/language_models/price_manager.py CHANGED Viewed

@@ -19,56 +19,12 @@ class ResponseCost:
     total_cost: Union[float, str, None] = None
-class PriceManager:
-    _instance = None
-    _price_lookup: Dict[Tuple[str, str], Dict] = {}
-    _is_initialized = False
-    def __new__(cls):
-        if cls._instance is None:
-            instance = super(PriceManager, cls).__new__(cls)
-            instance._price_lookup = {}  # Instance-specific attribute
-            instance._is_initialized = False
-            cls._instance = instance  # Store the instance directly
-            return instance
-        return cls._instance
-    def __init__(self):
-        """Initialize the singleton instance only once."""
-        if not self._is_initialized:
-            self._is_initialized = True
-            self.refresh_prices()
-    @classmethod
-    def get_instance(cls):
-        """Get the singleton instance, creating it if necessary."""
-        if cls._instance is None:
-            cls()  # Create the instance if it doesn't exist
-        return cls._instance
-    @classmethod
-    def reset(cls):
-        """Reset the singleton instance to clean up resources."""
-        cls._instance = None
-        cls._is_initialized = False
-        cls._price_lookup = {}
-    def __del__(self):
-        """Ensure proper cleanup when the instance is garbage collected."""
-        try:
-            self._price_lookup = {}  # Clean up resources
-        except:
-            pass  # Ignore any cleanup errors
+class PriceRetriever:
+    DEFAULT_INPUT_PRICE_PER_MILLION_TOKENS = 1.0
+    DEFAULT_OUTPUT_PRICE_PER_MILLION_TOKENS = 1.0
-    def refresh_prices(self) -> None:
-        """Fetch fresh prices and update the internal price lookup."""
-        from edsl.coop import Coop
-        c = Coop()
-        try:
-            self._price_lookup = c.fetch_prices()
-        except Exception as e:
-            print(f"Error fetching prices: {str(e)}")
+    def __init__(self, price_lookup: Dict[Tuple[str, str], Dict]):
+        self._price_lookup = price_lookup
     def get_price(self, inference_service: str, model: str) -> Dict:
         """Get the price information for a specific service and model."""
@@ -77,10 +33,6 @@ class PriceManager:
             inference_service
         )
-    def get_all_prices(self) -> Dict[Tuple[str, str], Dict]:
-        """Get the complete price lookup dictionary."""
-        return self._price_lookup.copy()
     def _get_fallback_price(self, inference_service: str) -> Dict:
         """
         Get fallback prices for a service.
@@ -101,15 +53,21 @@ class PriceManager:
             if service == inference_service
         ]
-        default_price_info = {
+        default_input_price_info = {
+            "one_usd_buys": 1_000_000,
+            "service_stated_token_qty": 1_000_000,
+            "service_stated_token_price": self.DEFAULT_INPUT_PRICE_PER_MILLION_TOKENS,
+        }
+        default_output_price_info = {
             "one_usd_buys": 1_000_000,
             "service_stated_token_qty": 1_000_000,
-            "service_stated_token_price": 1.0,
+            "service_stated_token_price": self.DEFAULT_OUTPUT_PRICE_PER_MILLION_TOKENS,
         }
         # Find the most expensive price entries (lowest tokens per USD)
-        input_price_info = default_price_info
-        output_price_info = default_price_info
+        input_price_info = default_input_price_info
+        output_price_info = default_output_price_info
         input_prices = [
             PriceEntry(float(p["input"]["one_usd_buys"]), p["input"])
@@ -156,6 +114,82 @@ class PriceManager:
             price_per_million_tokens = round(price_per_token * 1_000_000, 10)
         return price_per_million_tokens
+class PriceManager:
+    _instance = None
+    _price_lookup: Dict[Tuple[str, str], Dict] = {}
+    _is_initialized = False
+    def __new__(cls):
+        if cls._instance is None:
+            instance = super(PriceManager, cls).__new__(cls)
+            instance._price_lookup = {}  # Instance-specific attribute
+            instance._is_initialized = False
+            cls._instance = instance  # Store the instance directly
+            return instance
+        return cls._instance
+    def __init__(self):
+        """Initialize the singleton instance only once."""
+        if not self._is_initialized:
+            self._is_initialized = True
+            self.refresh_prices()
+    @classmethod
+    def get_instance(cls):
+        """Get the singleton instance, creating it if necessary."""
+        if cls._instance is None:
+            cls()  # Create the instance if it doesn't exist
+        return cls._instance
+    @classmethod
+    def reset(cls):
+        """Reset the singleton instance to clean up resources."""
+        cls._instance = None
+        cls._is_initialized = False
+        cls._price_lookup = {}
+    def __del__(self):
+        """Ensure proper cleanup when the instance is garbage collected."""
+        try:
+            self._price_lookup = {}  # Clean up resources
+        except:
+            pass  # Ignore any cleanup errors
+    @property
+    def price_retriever(self):
+        return PriceRetriever(self._price_lookup)
+    def refresh_prices(self) -> None:
+        """Fetch fresh prices and update the internal price lookup."""
+        from edsl.coop import Coop
+        c = Coop()
+        try:
+            self._price_lookup = c.fetch_prices()
+        except Exception as e:
+            print(f"Error fetching prices: {str(e)}")
+    def get_price(self, inference_service: str, model: str) -> Dict:
+        """Get the price information for a specific service and model."""
+        return self.price_retriever.get_price(inference_service, model)
+    def get_all_prices(self) -> Dict[Tuple[str, str], Dict]:
+        """Get the complete price lookup dictionary."""
+        return self._price_lookup.copy()
+    def get_price_per_million_tokens(
+        self,
+        relevant_prices: Dict,
+        token_type: Literal["input", "output"],
+    ) -> Dict:
+        """
+        Get the price per million tokens for a specific service, model, and token type.
+        """
+        return self.price_retriever.get_price_per_million_tokens(
+            relevant_prices, token_type
+        )
     def _calculate_total_cost(
         self,
         relevant_prices: Dict,

edsl/prompts/prompt.py CHANGED Viewed

@@ -290,6 +290,7 @@ class Prompt(PersistenceMixin, RepresentationMixin):
             return result
         except Exception as e:
             print(f"Error rendering prompt: {e}")
+            raise e
             return self
     @staticmethod

edsl 0.1.57__py3-none-any.whl → 0.1.59__py3-none-any.whl

edsl 0.1.57py3-none-any.whl → 0.1.59py3-none-any.whl