PyPI - edsl - Versions diffs - 0.1.55__py3-none-any.whl → 0.1.57__py3-none-any.whl - Mend

edsl 0.1.55py3-none-any.whl → 0.1.57py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

edsl/__version__.py +1 -1
edsl/base/data_transfer_models.py +15 -4
edsl/coop/coop.py +3 -3
edsl/dataset/dataset_operations_mixin.py +216 -180
edsl/inference_services/services/google_service.py +5 -2
edsl/interviews/request_token_estimator.py +8 -0
edsl/invigilators/invigilators.py +26 -13
edsl/jobs/jobs_pricing_estimation.py +176 -113
edsl/language_models/language_model.py +24 -6
edsl/language_models/price_manager.py +171 -36
edsl/results/result.py +52 -30
edsl/scenarios/file_store.py +60 -30
{edsl-0.1.55.dist-info → edsl-0.1.57.dist-info}/METADATA +2 -2
{edsl-0.1.55.dist-info → edsl-0.1.57.dist-info}/RECORD +17 -17
{edsl-0.1.55.dist-info → edsl-0.1.57.dist-info}/LICENSE +0 -0
{edsl-0.1.55.dist-info → edsl-0.1.57.dist-info}/WHEEL +0 -0
{edsl-0.1.55.dist-info → edsl-0.1.57.dist-info}/entry_points.txt +0 -0

edsl/language_models/price_manager.py CHANGED Viewed

@@ -1,4 +1,22 @@
-from typing import Dict, Tuple, Union
+from dataclasses import dataclass
+from typing import Dict, Literal, Tuple, Union
+from collections import namedtuple
+@dataclass
+class ResponseCost:
+    """
+    Class for storing the cost and token usage of a language model response.
+    If an error occurs when computing the cost, the total_cost will contain a string with the error message.
+    All other fields will be None.
+    """
+    input_tokens: Union[int, None] = None
+    output_tokens: Union[int, None] = None
+    input_price_per_million_tokens: Union[float, None] = None
+    output_price_per_million_tokens: Union[float, None] = None
+    total_cost: Union[float, str, None] = None
 class PriceManager:
@@ -64,28 +82,127 @@ class PriceManager:
         return self._price_lookup.copy()
     def _get_fallback_price(self, inference_service: str) -> Dict:
-        """Get fallback prices for a service."""
+        """
+        Get fallback prices for a service.
+        - First fallback: The highest input and output prices for that service from the price lookup.
+        - Second fallback: $1.00 per million tokens (for both input and output).
+        Args:
+            inference_service (str): The inference service name
+        Returns:
+            Dict: Price information
+        """
+        PriceEntry = namedtuple("PriceEntry", ["tokens_per_usd", "price_info"])
         service_prices = [
             prices
             for (service, _), prices in self._price_lookup.items()
             if service == inference_service
         ]
-        input_tokens_per_usd = [
-            float(p["input"]["one_usd_buys"]) for p in service_prices if "input" in p
+        default_price_info = {
+            "one_usd_buys": 1_000_000,
+            "service_stated_token_qty": 1_000_000,
+            "service_stated_token_price": 1.0,
+        }
+        # Find the most expensive price entries (lowest tokens per USD)
+        input_price_info = default_price_info
+        output_price_info = default_price_info
+        input_prices = [
+            PriceEntry(float(p["input"]["one_usd_buys"]), p["input"])
+            for p in service_prices
+            if "input" in p
         ]
-        min_input_tokens = min(input_tokens_per_usd, default=1_000_000)
+        if input_prices:
+            input_price_info = min(
+                input_prices, key=lambda price: price.tokens_per_usd
+            ).price_info
-        output_tokens_per_usd = [
-            float(p["output"]["one_usd_buys"]) for p in service_prices if "output" in p
+        output_prices = [
+            PriceEntry(float(p["output"]["one_usd_buys"]), p["output"])
+            for p in service_prices
+            if "output" in p
         ]
-        min_output_tokens = min(output_tokens_per_usd, default=1_000_000)
+        if output_prices:
+            output_price_info = min(
+                output_prices, key=lambda price: price.tokens_per_usd
+            ).price_info
         return {
-            "input": {"one_usd_buys": min_input_tokens},
-            "output": {"one_usd_buys": min_output_tokens},
+            "input": input_price_info,
+            "output": output_price_info,
         }
+    def get_price_per_million_tokens(
+        self,
+        relevant_prices: Dict,
+        token_type: Literal["input", "output"],
+    ) -> Dict:
+        """
+        Get the price per million tokens for a specific service, model, and token type.
+        """
+        service_price = relevant_prices[token_type]["service_stated_token_price"]
+        service_qty = relevant_prices[token_type]["service_stated_token_qty"]
+        if service_qty == 1_000_000:
+            price_per_million_tokens = service_price
+        elif service_qty == 1_000:
+            price_per_million_tokens = service_price * 1_000
+        else:
+            price_per_token = service_price / service_qty
+            price_per_million_tokens = round(price_per_token * 1_000_000, 10)
+        return price_per_million_tokens
+    def _calculate_total_cost(
+        self,
+        relevant_prices: Dict,
+        input_tokens: int,
+        output_tokens: int,
+    ) -> float:
+        """
+        Calculate the total cost for a model usage based on input and output tokens.
+        Returns:
+            float: Total cost
+        """
+        # Extract price information
+        try:
+            inverse_output_price = relevant_prices["output"]["one_usd_buys"]
+            inverse_input_price = relevant_prices["input"]["one_usd_buys"]
+        except Exception as e:
+            if "output" not in relevant_prices:
+                raise KeyError(
+                    f"Could not fetch prices from {relevant_prices} - {e}; Missing 'output' key."
+                )
+            if "input" not in relevant_prices:
+                raise KeyError(
+                    f"Could not fetch prices from {relevant_prices} - {e}; Missing 'input' key."
+                )
+            raise Exception(f"Could not fetch prices from {relevant_prices} - {e}")
+        # Calculate input cost
+        if inverse_input_price == "infinity":
+            input_cost = 0
+        else:
+            try:
+                input_cost = input_tokens / float(inverse_input_price)
+            except Exception as e:
+                raise Exception(f"Could not compute input price - {e}")
+        # Calculate output cost
+        if inverse_output_price == "infinity":
+            output_cost = 0
+        else:
+            try:
+                output_cost = output_tokens / float(inverse_output_price)
+            except Exception as e:
+                raise Exception(f"Could not compute output price - {e}")
+        return input_cost + output_cost
     def calculate_cost(
         self,
         inference_service: str,
@@ -93,43 +210,61 @@ class PriceManager:
         usage: Dict[str, Union[str, int]],
         input_token_name: str,
         output_token_name: str,
-    ) -> Union[float, str]:
-        """Calculate the total cost for a model usage."""
-        relevant_prices = self.get_price(inference_service, model)
+    ) -> ResponseCost:
+        """
+        Calculate the cost and token usage for a model response.
-        # Extract token counts
+        Args:
+            inference_service (str): The inference service identifier
+            model (str): The model identifier
+            usage (Dict[str, Union[str, int]]): Dictionary containing token usage information
+            input_token_name (str): Key name for input tokens in the usage dict
+            output_token_name (str): Key name for output tokens in the usage dict
+        Returns:
+            ResponseCost: Object containing token counts and total cost
+        """
         try:
             input_tokens = int(usage[input_token_name])
             output_tokens = int(usage[output_token_name])
         except Exception as e:
-            return f"Could not fetch tokens from model response: {e}"
+            return ResponseCost(
+                total_cost=f"Could not fetch tokens from model response: {e}",
+            )
-        # Extract price information
         try:
-            inverse_output_price = relevant_prices["output"]["one_usd_buys"]
-            inverse_input_price = relevant_prices["input"]["one_usd_buys"]
+            relevant_prices = self.get_price(inference_service, model)
         except Exception as e:
-            if "output" not in relevant_prices:
-                return f"Could not fetch prices from {relevant_prices} - {e}; Missing 'output' key."
-            if "input" not in relevant_prices:
-                return f"Could not fetch prices from {relevant_prices} - {e}; Missing 'input' key."
-            return f"Could not fetch prices from {relevant_prices} - {e}"
+            return ResponseCost(
+                total_cost=f"Could not fetch prices from {inference_service} - {model}: {e}",
+            )
-        # Calculate input cost
-        input_cost = (
-            0
-            if inverse_input_price == "infinity"
-            else input_tokens / float(inverse_input_price)
-        )
+        try:
+            input_price_per_million_tokens = self.get_price_per_million_tokens(
+                relevant_prices, "input"
+            )
+            output_price_per_million_tokens = self.get_price_per_million_tokens(
+                relevant_prices, "output"
+            )
+        except Exception as e:
+            return ResponseCost(
+                total_cost=f"Could not compute price per million tokens: {e}",
+            )
-        # Calculate output cost
-        output_cost = (
-            0
-            if inverse_output_price == "infinity"
-            else output_tokens / float(inverse_output_price)
-        )
+        try:
+            total_cost = self._calculate_total_cost(
+                relevant_prices, input_tokens, output_tokens
+            )
+        except Exception as e:
+            return ResponseCost(total_cost=f"{e}")
-        return input_cost + output_cost
+        return ResponseCost(
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            input_price_per_million_tokens=input_price_per_million_tokens,
+            output_price_per_million_tokens=output_price_per_million_tokens,
+            total_cost=total_cost,
+        )
     @property
     def is_initialized(self) -> bool:

edsl/results/result.py CHANGED Viewed

@@ -20,6 +20,7 @@ The Result class inherits from both Base (for serialization) and UserDict (for
 dictionary-like behavior), allowing it to be accessed like a dictionary while
 maintaining a rich object model.
 """
 from __future__ import annotations
 import inspect
 from collections import UserDict
@@ -40,6 +41,7 @@ if TYPE_CHECKING:
 QuestionName = str
 AnswerValue = Any
 class AgentNamer:
     """Maintains a registry of agent names to ensure unique naming."""
@@ -61,20 +63,20 @@ agent_namer = AgentNamer().get_name
 class Result(Base, UserDict):
     """
     The Result class captures the complete data from one agent interview.
     A Result object stores the agent, scenario, language model, and all answers
     provided during an interview, along with metadata such as token usage,
     caching information, and raw model responses. It provides a rich interface
     for accessing this data and supports serialization for storage and retrieval.
     Key features:
     - Dictionary-like access to all data through the UserDict interface
     - Properties for convenient access to common attributes (agent, scenario, model, answer)
     - Rich data structure with sub-dictionaries for organization
     - Support for scoring results against reference answers
     - Serialization to/from dictionaries for storage
     Results are typically created by the Jobs system when running interviews and
     collected into a Results collection for analysis. You rarely need to create
     Result objects manually.
@@ -260,6 +262,7 @@ class Result(Base, UserDict):
         for key in self.problem_keys:
             if key in expression and key + "." not in expression:
                 from .exceptions import ResultsColumnNotFoundError
                 raise ResultsColumnNotFoundError(
                     f"Key by itself {key} is problematic. Use the full key {key + '.' + key} name instead."
                 )
@@ -268,6 +271,7 @@ class Result(Base, UserDict):
     def code(self):
         """Return a string of code that can be used to recreate the Result object."""
         from .exceptions import ResultsError
         raise ResultsError("The code() method is not implemented for Result objects")
     @property
@@ -316,7 +320,7 @@ class Result(Base, UserDict):
     def get_value(self, data_type: str, key: str) -> Any:
         """Return the value for a given data type and key.
         This method provides a consistent way to access values across different
         sub-dictionaries in the Result object. It's particularly useful when you
         need to programmatically access values without knowing which data type
@@ -331,7 +335,7 @@ class Result(Base, UserDict):
         Returns:
             The value associated with the key in the specified data type
         Examples:
             >>> r = Result.example()
             >>> r.get_value("answer", "how_feeling")
@@ -344,15 +348,15 @@ class Result(Base, UserDict):
     @property
     def key_to_data_type(self) -> dict[str, str]:
         """A mapping of attribute names to their container data types.
         This property returns a dictionary that maps each attribute name (like 'how_feeling')
         to its containing data type or category (like 'answer'). This is useful for
         determining which part of the Result object a particular attribute belongs to,
         especially when working with data programmatically.
         If a key name appears in multiple data types, the property will automatically
         rename the conflicting keys by appending the data type name to avoid ambiguity.
         Returns:
             A dictionary mapping attribute names to their data types
@@ -435,7 +439,7 @@ class Result(Base, UserDict):
                         else prompt_obj.to_dict()
                     )
                 d[key] = new_prompt_dict
         if self.indices is not None:
             d["indices"] = self.indices
@@ -495,7 +499,7 @@ class Result(Base, UserDict):
             comments_dict=json_dict.get("comments_dict", {}),
             cache_used_dict=json_dict.get("cache_used_dict", {}),
             cache_keys=json_dict.get("cache_keys", {}),
-            indices = json_dict.get("indices", None)
+            indices=json_dict.get("indices", None),
         )
         if "interview_hash" in json_dict:
             result.interview_hash = json_dict["interview_hash"]
@@ -522,14 +526,14 @@ class Result(Base, UserDict):
         from .results import Results
         return Results.example()[0]
     def score_with_answer_key(self, answer_key: dict) -> dict[str, int]:
         """Score the result against a reference answer key.
-        This method evaluates the correctness of answers by comparing them to a
-        provided answer key. It returns a dictionary with counts of correct,
+        This method evaluates the correctness of answers by comparing them to a
+        provided answer key. It returns a dictionary with counts of correct,
         incorrect, and missing answers.
         The answer key can contain either single values or lists of acceptable values.
         If a list is provided, the answer is considered correct if it matches any
         value in the list.
@@ -541,7 +545,7 @@ class Result(Base, UserDict):
         Returns:
             A dictionary with keys 'correct', 'incorrect', and 'missing', indicating
             the counts of each answer type.
         Examples:
             >>> Result.example()['answer']
             {'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
@@ -550,21 +554,24 @@ class Result(Base, UserDict):
             >>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
             >>> Result.example().score_with_answer_key(answer_key)
             {'correct': 2, 'incorrect': 0, 'missing': 0}
             >>> # Using answer key with multiple acceptable answers
             >>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': ['Great', 'Good']}
             >>> Result.example().score_with_answer_key(answer_key)
             {'correct': 2, 'incorrect': 0, 'missing': 0}
         """
-        final_scores = {'correct': 0, 'incorrect': 0, 'missing': 0}
+        final_scores = {"correct": 0, "incorrect": 0, "missing": 0}
         for question_name, answer in self.answer.items():
             if question_name in answer_key:
-                if answer == answer_key[question_name] or answer in answer_key[question_name]:
-                    final_scores['correct'] += 1
+                if (
+                    answer == answer_key[question_name]
+                    or answer in answer_key[question_name]
+                ):
+                    final_scores["correct"] += 1
                 else:
-                    final_scores['incorrect'] += 1
+                    final_scores["incorrect"] += 1
             else:
-                final_scores['missing'] += 1
+                final_scores["missing"] += 1
         return final_scores
@@ -584,6 +591,7 @@ class Result(Base, UserDict):
                 params[k] = v.default
             else:
                 from .exceptions import ResultsError
                 raise ResultsError(f"Parameter {k} not found in Result object")
         return scoring_function(**params)
@@ -654,16 +662,30 @@ class Result(Base, UserDict):
                 raw_model_results_dictionary[question_name + "_raw_model_response"] = (
                     result.raw_model_response
                 )
-                raw_model_results_dictionary[question_name + "_cost"] = result.cost
-                one_use_buys = (
+                raw_model_results_dictionary[question_name + "_input_tokens"] = (
+                    result.input_tokens
+                )
+                raw_model_results_dictionary[question_name + "_output_tokens"] = (
+                    result.output_tokens
+                )
+                raw_model_results_dictionary[
+                    question_name + "_input_price_per_million_tokens"
+                ] = result.input_price_per_million_tokens
+                raw_model_results_dictionary[
+                    question_name + "_output_price_per_million_tokens"
+                ] = result.output_price_per_million_tokens
+                raw_model_results_dictionary[question_name + "_cost"] = (
+                    result.total_cost
+                )
+                one_usd_buys = (
                     "NA"
-                    if isinstance(result.cost, str)
-                    or result.cost == 0
-                    or result.cost is None
-                    else 1.0 / result.cost
+                    if isinstance(result.total_cost, str)
+                    or result.total_cost == 0
+                    or result.total_cost is None
+                    else 1.0 / result.total_cost
                 )
                 raw_model_results_dictionary[question_name + "_one_usd_buys"] = (
-                    one_use_buys
+                    one_usd_buys
                 )
                 cache_used_dictionary[question_name] = result.cache_used

edsl/scenarios/file_store.py CHANGED Viewed

@@ -294,10 +294,23 @@ class FileStore(Scenario):
     def upload_google(self, refresh: bool = False) -> None:
         import google.generativeai as genai
+        import google
-        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-        google_info = genai.upload_file(self.path, mime_type=self.mime_type)
-        self.external_locations["google"] = google_info.to_dict()
+        try:
+            genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+            google_info = genai.upload_file(self.path, mime_type=self.mime_type)
+            self.external_locations["google"] = google_info.to_dict()
+            while True:
+                file_metadata = genai.get_file(name=google_info.name)
+                file_state = file_metadata.state
+                if file_state == 2:  # "ACTIVE":
+                    break
+                elif file_state == 10:  # "FAILED":
+                    break
+        except Exception as e:
+            print(f"Error uploading to Google: {e}")
+            raise
     @classmethod
     @remove_edsl_version
@@ -592,14 +605,14 @@ class FileStore(Scenario):
         """
         # Check if the mime type starts with 'image/'
         return self.mime_type.startswith("image/")
     def is_video(self) -> bool:
         """
         Check if the file is a video by examining its MIME type.
         Returns:
             bool: True if the file is a video, False otherwise.
         Examples:
             >>> fs = FileStore.example("mp4")
             >>> fs.is_video()
@@ -613,19 +626,19 @@ class FileStore(Scenario):
         """
         # Check if the mime type starts with 'video/'
         return self.mime_type.startswith("video/")
     def get_video_metadata(self) -> dict:
         """
         Get metadata about a video file such as duration, dimensions, codec, etc.
         Uses FFmpeg to extract the information if available.
         Returns:
             dict: A dictionary containing video metadata, or a dictionary with
                  error information if metadata extraction fails.
         Raises:
             ValueError: If the file is not a video.
         Example:
             >>> fs = FileStore.example("mp4")
             >>> metadata = fs.get_video_metadata()
@@ -634,47 +647,63 @@ class FileStore(Scenario):
         """
         if not self.is_video():
             raise ValueError("This file is not a video")
         # We'll try to use ffprobe (part of ffmpeg) to get metadata
         import subprocess
         import json
         try:
             # Run ffprobe to get video metadata in JSON format
             result = subprocess.run(
                 [
-                    "ffprobe", "-v", "quiet", "-print_format", "json",
-                    "-show_format", "-show_streams", self.path
+                    "ffprobe",
+                    "-v",
+                    "quiet",
+                    "-print_format",
+                    "json",
+                    "-show_format",
+                    "-show_streams",
+                    self.path,
                 ],
-                capture_output=True, text=True, check=True
+                capture_output=True,
+                text=True,
+                check=True,
             )
             # Parse the JSON output
             metadata = json.loads(result.stdout)
             # Extract some common useful fields into a more user-friendly format
             simplified = {
                 "format": metadata.get("format", {}).get("format_name", "unknown"),
-                "duration_seconds": float(metadata.get("format", {}).get("duration", 0)),
+                "duration_seconds": float(
+                    metadata.get("format", {}).get("duration", 0)
+                ),
                 "size_bytes": int(metadata.get("format", {}).get("size", 0)),
                 "bit_rate": int(metadata.get("format", {}).get("bit_rate", 0)),
                 "streams": len(metadata.get("streams", [])),
             }
             # Add video stream info if available
-            video_streams = [s for s in metadata.get("streams", []) if s.get("codec_type") == "video"]
+            video_streams = [
+                s for s in metadata.get("streams", []) if s.get("codec_type") == "video"
+            ]
             if video_streams:
                 video = video_streams[0]  # Get the first video stream
                 simplified["video"] = {
                     "codec": video.get("codec_name", "unknown"),
                     "width": video.get("width", 0),
                     "height": video.get("height", 0),
-                    "frame_rate": eval(video.get("r_frame_rate", "0/1")),  # Convert "30/1" to 30.0
+                    "frame_rate": eval(
+                        video.get("r_frame_rate", "0/1")
+                    ),  # Convert "30/1" to 30.0
                     "pixel_format": video.get("pix_fmt", "unknown"),
                 }
             # Add audio stream info if available
-            audio_streams = [s for s in metadata.get("streams", []) if s.get("codec_type") == "audio"]
+            audio_streams = [
+                s for s in metadata.get("streams", []) if s.get("codec_type") == "audio"
+            ]
             if audio_streams:
                 audio = audio_streams[0]  # Get the first audio stream
                 simplified["audio"] = {
@@ -682,14 +711,15 @@ class FileStore(Scenario):
                     "channels": audio.get("channels", 0),
                     "sample_rate": audio.get("sample_rate", "unknown"),
                 }
             # Return both the complete metadata and simplified version
-            return {
-                "simplified": simplified,
-                "full": metadata
-            }
-        except (subprocess.SubprocessError, FileNotFoundError, json.JSONDecodeError) as e:
+            return {"simplified": simplified, "full": metadata}
+        except (
+            subprocess.SubprocessError,
+            FileNotFoundError,
+            json.JSONDecodeError,
+        ) as e:
             # If ffprobe is not available or fails, return basic info
             return {
                 "error": str(e),

{edsl-0.1.55.dist-info → edsl-0.1.57.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: edsl
-Version: 0.1.55
+Version: 0.1.57
 Summary: Create and analyze LLM-based surveys
 Home-page: https://www.expectedparrot.com/
 License: MIT
@@ -23,7 +23,7 @@ Requires-Dist: azure-ai-inference (>=1.0.0b3,<2.0.0)
 Requires-Dist: black[jupyter] (>=24.4.2,<25.0.0)
 Requires-Dist: boto3 (>=1.34.161,<2.0.0)
 Requires-Dist: google-generativeai (>=0.8.2,<0.9.0)
-Requires-Dist: groq (>=0.9.0,<0.10.0)
+Requires-Dist: groq (==0.23.1)
 Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
 Requires-Dist: json-repair (>=0.28.4,<0.29.0)
 Requires-Dist: jupyter (>=1.0.0,<2.0.0)

edsl 0.1.55__py3-none-any.whl → 0.1.57__py3-none-any.whl

edsl 0.1.55py3-none-any.whl → 0.1.57py3-none-any.whl