PyPI - edsl - Versions diffs - 0.1.57__py3-none-any.whl → 0.1.59__py3-none-any.whl - Mend

edsl 0.1.57py3-none-any.whl → 0.1.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

edsl/__version__.py +1 -1
edsl/agents/agent.py +23 -4
edsl/agents/agent_list.py +36 -6
edsl/coop/coop.py +274 -35
edsl/coop/utils.py +63 -0
edsl/dataset/dataset.py +74 -0
edsl/dataset/dataset_operations_mixin.py +67 -62
edsl/inference_services/services/test_service.py +1 -1
edsl/interviews/exception_tracking.py +92 -20
edsl/invigilators/invigilators.py +5 -1
edsl/invigilators/prompt_constructor.py +299 -136
edsl/jobs/html_table_job_logger.py +394 -48
edsl/jobs/jobs_pricing_estimation.py +19 -114
edsl/jobs/jobs_remote_inference_logger.py +29 -0
edsl/jobs/jobs_runner_status.py +52 -21
edsl/jobs/remote_inference.py +214 -30
edsl/language_models/language_model.py +40 -3
edsl/language_models/price_manager.py +91 -57
edsl/prompts/prompt.py +1 -0
edsl/questions/question_list.py +76 -20
edsl/results/results.py +8 -1
edsl/scenarios/file_store.py +8 -12
edsl/scenarios/scenario.py +50 -2
edsl/scenarios/scenario_list.py +34 -12
edsl/surveys/survey.py +4 -0
edsl/tasks/task_history.py +180 -6
edsl/utilities/wikipedia.py +194 -0
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/METADATA +4 -3
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/RECORD +32 -32
edsl/language_models/compute_cost.py +0 -78
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/LICENSE +0 -0
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/WHEEL +0 -0
{edsl-0.1.57.dist-info → edsl-0.1.59.dist-info}/entry_points.txt +0 -0

edsl/jobs/jobs_pricing_estimation.py CHANGED Viewed

@@ -13,16 +13,15 @@ if TYPE_CHECKING:
     from ..invigilators.invigilator_base import Invigilator
 from .fetch_invigilator import FetchInvigilator
+from ..coop.utils import CostConverter
 from ..caching import CacheEntry
 from ..dataset import Dataset
+from ..language_models.price_manager import PriceRetriever
 logger = logging.getLogger(__name__)
 class PromptCostEstimator:
-    DEFAULT_INPUT_PRICE_PER_MILLION_TOKENS = 1.0
-    DEFAULT_OUTPUT_PRICE_PER_MILLION_TOKENS = 1.0
     CHARS_PER_TOKEN = 4
     OUTPUT_TOKENS_PER_INPUT_TOKEN = 0.75
     PIPING_MULTIPLIER = 2
@@ -37,7 +36,7 @@ class PromptCostEstimator:
     ):
         self.system_prompt = system_prompt
         self.user_prompt = user_prompt
-        self.price_lookup = price_lookup
+        self.price_retriever = PriceRetriever(price_lookup)
         self.inference_service = inference_service
         self.model = model
@@ -49,91 +48,6 @@ class PromptCostEstimator:
             return PromptCostEstimator.PIPING_MULTIPLIER
         return 1
-    def _get_fallback_price(self, inference_service: str) -> Dict:
-        """
-        Get fallback prices for a service.
-        - First fallback: The highest input and output prices for that service from the price lookup.
-        - Second fallback: $1.00 per million tokens (for both input and output).
-        Args:
-            inference_service (str): The inference service name
-        Returns:
-            Dict: Price information
-        """
-        PriceEntry = namedtuple("PriceEntry", ["tokens_per_usd", "price_info"])
-        service_prices = [
-            prices
-            for (service, _), prices in self.price_lookup.items()
-            if service == inference_service
-        ]
-        default_input_price_info = {
-            "one_usd_buys": 1_000_000,
-            "service_stated_token_qty": 1_000_000,
-            "service_stated_token_price": self.DEFAULT_INPUT_PRICE_PER_MILLION_TOKENS,
-        }
-        default_output_price_info = {
-            "one_usd_buys": 1_000_000,
-            "service_stated_token_qty": 1_000_000,
-            "service_stated_token_price": self.DEFAULT_OUTPUT_PRICE_PER_MILLION_TOKENS,
-        }
-        # Find the most expensive price entries (lowest tokens per USD)
-        input_price_info = default_input_price_info
-        output_price_info = default_output_price_info
-        input_prices = [
-            PriceEntry(float(p["input"]["one_usd_buys"]), p["input"])
-            for p in service_prices
-            if "input" in p
-        ]
-        if input_prices:
-            input_price_info = min(
-                input_prices, key=lambda price: price.tokens_per_usd
-            ).price_info
-        output_prices = [
-            PriceEntry(float(p["output"]["one_usd_buys"]), p["output"])
-            for p in service_prices
-            if "output" in p
-        ]
-        if output_prices:
-            output_price_info = min(
-                output_prices, key=lambda price: price.tokens_per_usd
-            ).price_info
-        return {
-            "input": input_price_info,
-            "output": output_price_info,
-        }
-    def get_price(self, inference_service: str, model: str) -> Dict:
-        """Get the price information for a specific service and model."""
-        key = (inference_service, model)
-        return self.price_lookup.get(key) or self._get_fallback_price(inference_service)
-    def get_price_per_million_tokens(
-        self,
-        relevant_prices: Dict,
-        token_type: Literal["input", "output"],
-    ) -> Dict:
-        """
-        Get the price per million tokens for a specific service, model, and token type.
-        """
-        service_price = relevant_prices[token_type]["service_stated_token_price"]
-        service_qty = relevant_prices[token_type]["service_stated_token_qty"]
-        if service_qty == 1_000_000:
-            price_per_million_tokens = service_price
-        elif service_qty == 1_000:
-            price_per_million_tokens = service_price * 1_000
-        else:
-            price_per_token = service_price / service_qty
-            price_per_million_tokens = round(price_per_token * 1_000_000, 10)
-        return price_per_million_tokens
     def __call__(self):
         user_prompt_chars = len(str(self.user_prompt)) * self.get_piping_multiplier(
             str(self.user_prompt)
@@ -145,13 +59,15 @@ class PromptCostEstimator:
         input_tokens = (user_prompt_chars + system_prompt_chars) // self.CHARS_PER_TOKEN
         output_tokens = math.ceil(self.OUTPUT_TOKENS_PER_INPUT_TOKEN * input_tokens)
-        relevant_prices = self.get_price(self.inference_service, self.model)
+        relevant_prices = self.price_retriever.get_price(
+            self.inference_service, self.model
+        )
-        input_price_per_million_tokens = self.get_price_per_million_tokens(
-            relevant_prices, "input"
+        input_price_per_million_tokens = (
+            self.price_retriever.get_price_per_million_tokens(relevant_prices, "input")
         )
-        output_price_per_million_tokens = self.get_price_per_million_tokens(
-            relevant_prices, "output"
+        output_price_per_million_tokens = (
+            self.price_retriever.get_price_per_million_tokens(relevant_prices, "output")
         )
         input_price_per_token = input_price_per_million_tokens / 1_000_000
@@ -172,7 +88,6 @@ class PromptCostEstimator:
 class JobsPrompts:
     relevant_keys = [
         "user_prompt",
         "system_prompt",
@@ -255,13 +170,18 @@ class JobsPrompts:
         cost = prompt_cost["cost_usd"]
         # Generate cache keys for each iteration
+        files_list = prompts.get("files_list", None)
+        if files_list:
+            files_hash = "+".join([str(hash(file)) for file in files_list])
+            user_prompt_with_hashes = user_prompt + f" {files_hash}"
         cache_keys = []
         for iteration in range(iterations):
             cache_key = CacheEntry.gen_key(
                 model=model,
                 parameters=invigilator.model.parameters,
                 system_prompt=system_prompt,
-                user_prompt=user_prompt,
+                user_prompt=user_prompt_with_hashes if files_list else user_prompt,
                 iteration=iteration,
             )
             cache_keys.append(cache_key)
@@ -366,20 +286,6 @@ class JobsPrompts:
             },
         )
-    @staticmethod
-    def usd_to_credits(usd: float) -> float:
-        """Converts USD to credits."""
-        cents = usd * 100
-        credits_per_cent = 1
-        credits = cents * credits_per_cent
-        # Round up to the nearest hundredth of a credit
-        minicredits = math.ceil(credits * 100)
-        # Convert back to credits
-        credits = round(minicredits / 100, 2)
-        return credits
     def estimate_job_cost_from_external_prices(
         self, price_lookup: dict, iterations: int = 1
     ) -> dict:
@@ -444,14 +350,13 @@ class JobsPrompts:
             detailed_costs.append(group)
         # Convert to credits
+        converter = CostConverter()
         for group in detailed_costs:
-            group["credits_hold"] = self.usd_to_credits(group["cost_usd"])
+            group["credits_hold"] = converter.usd_to_credits(group["cost_usd"])
         # Calculate totals
         estimated_total_cost_usd = sum(group["cost_usd"] for group in detailed_costs)
-        total_credits_hold = sum(
-            group["credits_hold"] for group in detailed_costs
-        )
+        total_credits_hold = sum(group["credits_hold"] for group in detailed_costs)
         estimated_total_input_tokens = sum(
             group["tokens"]
             for group in detailed_costs

edsl/jobs/jobs_remote_inference_logger.py CHANGED Viewed

@@ -23,15 +23,40 @@ class LogMessage:
     status: JobsStatus
+@dataclass
+class JobRunExceptionCounter:
+    exception_type: str = None
+    inference_service: str = None
+    model: str = None
+    question_name: str = None
+    exception_count: int = None
+@dataclass
+class ModelCost:
+    service: str = None
+    model: str = None
+    input_tokens: int = None
+    input_cost_usd: float = None
+    output_tokens: int = None
+    output_cost_usd: float = None
+    input_cost_credits_with_cache: int = None
+    output_cost_credits_with_cache: int = None
 @dataclass
 class JobsInfo:
     job_uuid: str = None
     progress_bar_url: str = None
     error_report_url: str = None
+    remote_inference_url: str = None
+    remote_cache_url: str = None
     results_uuid: str = None
     results_url: str = None
     completed_interviews: int = None
     failed_interviews: int = None
+    exception_summary: list[JobRunExceptionCounter] = None
+    model_costs: list[ModelCost] = None
     pretty_names = {
         "job_uuid": "Job UUID",
@@ -39,6 +64,8 @@ class JobsInfo:
         "error_report_url": "Exceptions Report URL",
         "results_uuid": "Results UUID",
         "results_url": "Results URL",
+        "remote_inference_url": "Remote Jobs",
+        "remote_cache_url": "Remote Cache",
     }
@@ -57,6 +84,8 @@ class JobLogger(ABC):
             "results_url",
             "completed_interviews",
             "failed_interviews",
+            "model_costs",
+            "exception_summary",
         ],
         value: str,
     ):

edsl/jobs/jobs_runner_status.py CHANGED Viewed

@@ -11,15 +11,7 @@ from uuid import UUID
 if TYPE_CHECKING:
     from .jobs import Jobs
-@dataclass
-class ModelInfo:
-    model_name: str
-    TPM_limit_k: float
-    RPM_limit_k: float
-    num_tasks_waiting: int
-    token_usage_info: dict
+    from ..interviews import Interview
 class StatisticsTracker:
@@ -29,16 +21,33 @@ class StatisticsTracker:
         self.completed_count = 0
         self.completed_by_model = defaultdict(int)
         self.distinct_models = distinct_models
+        self.interviews_with_exceptions = 0
         self.total_exceptions = 0
         self.unfixed_exceptions = 0
+        self.exceptions_counter = defaultdict(int)
     def add_completed_interview(
-        self, model: str, num_exceptions: int = 0, num_unfixed: int = 0
+        self,
+        model: str,
+        exceptions: list[dict],
+        num_exceptions: int = 0,
+        num_unfixed: int = 0,
     ):
         self.completed_count += 1
         self.completed_by_model[model] += 1
         self.total_exceptions += num_exceptions
         self.unfixed_exceptions += num_unfixed
+        if num_exceptions > 0:
+            self.interviews_with_exceptions += 1
+        for exception in exceptions:
+            key = (
+                exception["exception_type"],
+                exception["inference_service"],
+                exception["model"],
+                exception["question_name"],
+            )
+            self.exceptions_counter[key] += 1
     def get_elapsed_time(self) -> float:
         return time.time() - self.start_time
@@ -88,9 +97,7 @@ class JobsRunnerStatusBase(ABC):
         ]
         self.num_total_interviews = n * len(self.jobs)
-        self.distinct_models = list(
-            set(model.model for model in self.jobs.models)
-        )
+        self.distinct_models = list(set(model.model for model in self.jobs.models))
         self.stats_tracker = StatisticsTracker(
             total_interviews=self.num_total_interviews,
@@ -130,6 +137,7 @@ class JobsRunnerStatusBase(ABC):
         status_dict = {
             "overall_progress": {
                 "completed": self.stats_tracker.completed_count,
+                "has_exceptions": self.stats_tracker.interviews_with_exceptions,
                 "total": self.num_total_interviews,
                 "percent": (
                     (
@@ -148,16 +156,36 @@ class JobsRunnerStatusBase(ABC):
                 if self.stats_tracker.completed_count >= self.num_total_interviews
                 else "running"
             ),
+            "exceptions_counter": [
+                {
+                    "exception_type": exception_type,
+                    "inference_service": inference_service,
+                    "model": model,
+                    "question_name": question_name,
+                    "count": count,
+                }
+                for (
+                    exception_type,
+                    inference_service,
+                    model,
+                    question_name,
+                ), count in self.stats_tracker.exceptions_counter.items()
+            ],
         }
         model_queues = {}
         # Check if bucket collection exists and is not empty
-        if (hasattr(self.jobs, 'run_config') and
-            hasattr(self.jobs.run_config, 'environment') and
-            hasattr(self.jobs.run_config.environment, 'bucket_collection') and
-            self.jobs.run_config.environment.bucket_collection):
-            for model, bucket in self.jobs.run_config.environment.bucket_collection.items():
+        if (
+            hasattr(self.jobs, "run_config")
+            and hasattr(self.jobs.run_config, "environment")
+            and hasattr(self.jobs.run_config.environment, "bucket_collection")
+            and self.jobs.run_config.environment.bucket_collection
+        ):
+            for (
+                model,
+                bucket,
+            ) in self.jobs.run_config.environment.bucket_collection.items():
                 model_name = model.model
                 model_queues[model_name] = {
                     "language_model_name": model_name,
@@ -166,7 +194,9 @@ class JobsRunnerStatusBase(ABC):
                         "requested": bucket.requests_bucket.num_requests,
                         "tokens_returned": bucket.requests_bucket.tokens_returned,
                         "target_rate": round(bucket.requests_bucket.target_rate, 1),
-                        "current_rate": round(bucket.requests_bucket.get_throughput(), 1),
+                        "current_rate": round(
+                            bucket.requests_bucket.get_throughput(), 1
+                        ),
                     },
                     "tokens_bucket": {
                         "completed": bucket.tokens_bucket.num_released,
@@ -179,10 +209,11 @@ class JobsRunnerStatusBase(ABC):
         status_dict["language_model_queues"] = model_queues
         return status_dict
-    def add_completed_interview(self, interview):
+    def add_completed_interview(self, interview: "Interview"):
         """Records a completed interview without storing the full interview data."""
         self.stats_tracker.add_completed_interview(
             model=interview.model.model,
+            exceptions=interview.exceptions.list(),
             num_exceptions=interview.exceptions.num_exceptions(),
             num_unfixed=interview.exceptions.num_unfixed_exceptions(),
         )

edsl 0.1.57__py3-none-any.whl → 0.1.59__py3-none-any.whl

edsl 0.1.57py3-none-any.whl → 0.1.59py3-none-any.whl