PyPI - edsl - Versions diffs - 0.1.52__py3-none-any.whl → 0.1.54__py3-none-any.whl - Mend

edsl 0.1.52py3-none-any.whl → 0.1.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

edsl/__version__.py +1 -1
edsl/inference_services/services/test_service.py +11 -2
edsl/interviews/request_token_estimator.py +104 -2
edsl/invigilators/invigilators.py +6 -1
edsl/jobs/jobs_pricing_estimation.py +127 -46
edsl/language_models/language_model.py +16 -6
edsl/language_models/utilities.py +2 -1
edsl/questions/question_check_box.py +171 -149
edsl/questions/question_dict.py +47 -40
edsl/scenarios/file_store.py +73 -23
{edsl-0.1.52.dist-info → edsl-0.1.54.dist-info}/METADATA +2 -1
{edsl-0.1.52.dist-info → edsl-0.1.54.dist-info}/RECORD +15 -15
{edsl-0.1.52.dist-info → edsl-0.1.54.dist-info}/LICENSE +0 -0
{edsl-0.1.52.dist-info → edsl-0.1.54.dist-info}/WHEEL +0 -0
{edsl-0.1.52.dist-info → edsl-0.1.54.dist-info}/entry_points.txt +0 -0

edsl/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.52"
1	+ __version__ = "0.1.54"

edsl/inference_services/services/test_service.py CHANGED Viewed

@@ -53,7 +53,6 @@ class TestService(InferenceServiceABC):
             @property
             def _canned_response(self):
                 if hasattr(self, "canned_response"):
                     return self.canned_response
                 else:
                     return "Hello, world X"
@@ -64,6 +63,7 @@ class TestService(InferenceServiceABC):
                 system_prompt: str,
                 # func: Optional[callable] = None,
                 files_list: Optional[List["File"]] = None,
+                question_name: Optional[str] = None,
             ) -> dict[str, Any]:
                 await asyncio.sleep(0.1)
@@ -75,6 +75,7 @@ class TestService(InferenceServiceABC):
                     if random.random() < p:
                         from ..exceptions import InferenceServiceError
                         raise InferenceServiceError("This is a test error")
                 if hasattr(self, "func"):
@@ -85,8 +86,16 @@ class TestService(InferenceServiceABC):
                         "usage": {"prompt_tokens": 1, "completion_tokens": 1},
                     }
+                response = self._canned_response
+                if isinstance(response, dict) and question_name:
+                    canned_text = response.get(
+                        question_name, f"No canned response for '{question_name}'"
+                    )
+                else:
+                    canned_text = response
                 return {
-                    "message": [{"text": f"{self._canned_response}"}],
+                    "message": [{"text": f"{canned_text}"}],
                     "usage": {"prompt_tokens": 1, "completion_tokens": 1},
                 }

edsl/interviews/request_token_estimator.py CHANGED Viewed

@@ -1,6 +1,101 @@
 from ..jobs.fetch_invigilator import FetchInvigilator
 from ..scenarios import FileStore
+import math
+# Model configs: base tokens and tile tokens only
+VISION_MODELS = {
+    "gpt-4o": {
+        "base_tokens": 85,
+        "tile_tokens": 170,
+    },
+    "gpt-4o-mini": {
+        "base_tokens": 2833,
+        "tile_tokens": 5667,
+    },
+    "o1": {
+        "base_tokens": 75,
+        "tile_tokens": 150,
+    },
+}
+def approximate_image_tokens_google(width: int, height: int) -> int:
+    """
+    Approximates the token usage for an image based on its dimensions.
+    This calculation is based on the rules described for Gemini 2.0 models
+    in the provided text:
+    - Images with both dimensions <= 384px cost 258 tokens.
+    - Larger images are processed in 768x768 tiles, each costing 258 tokens.
+    Note: This is an *approximation*. The exact cropping, scaling, and tiling
+    strategy used by the actual Gemini API might differ slightly.
+    Args:
+        width: The width of the image in pixels.
+        height: The height of the image in pixels.
+    Returns:
+        An estimated integer token count for the image.
+    Raises:
+        ValueError: If width or height are not positive integers.
+    """
+    SMALL_IMAGE_THRESHOLD = 384  # Max dimension for fixed token count
+    FIXED_TOKEN_COST_SMALL = 258  # Token cost for small images (<= 384x384)
+    TILE_SIZE = 768  # Dimension of tiles for larger images
+    TOKEN_COST_PER_TILE = 258  # Token cost per 768x768 tile
+    if (
+        not isinstance(width, int)
+        or not isinstance(height, int)
+        or width <= 0
+        or height <= 0
+    ):
+        raise ValueError("Image width and height must be positive integers.")
+    # Case 1: Small image (both dimensions <= threshold)
+    if width <= SMALL_IMAGE_THRESHOLD and height <= SMALL_IMAGE_THRESHOLD:
+        return FIXED_TOKEN_COST_SMALL
+    # Case 2: Larger image (at least one dimension > threshold)
+    else:
+        # Calculate how many tiles are needed to cover the width and height
+        # Use ceiling division to ensure full coverage
+        tiles_wide = math.ceil(width / TILE_SIZE)
+        tiles_high = math.ceil(height / TILE_SIZE)
+        # Total number of tiles is the product of tiles needed in each dimension
+        total_tiles = tiles_wide * tiles_high
+        # Total token cost is the number of tiles times the cost per tile
+        estimated_tokens = total_tiles * TOKEN_COST_PER_TILE
+        return estimated_tokens
+def estimate_tokens(model_name, width, height):
+    if model_name == "test":
+        return 10  # for testing purposes
+    if "gemini" in model_name:
+        out = approximate_image_tokens_google(width, height)
+        return out
+    if "claude" in model_name:
+        total_tokens = width * height / 750
+        return total_tokens
+    if model_name not in VISION_MODELS:
+        total_tokens = width * height / 750
+        return total_tokens
+    config = VISION_MODELS[model_name]
+    TILE_SIZE = 512
+    tiles_x = math.ceil(width / TILE_SIZE)
+    tiles_y = math.ceil(height / TILE_SIZE)
+    total_tiles = tiles_x * tiles_y
+    total_tokens = config["base_tokens"] + config["tile_tokens"] * total_tiles
+    return total_tokens
 class RequestTokenEstimator:
     """Estimate the number of tokens that will be required to run the focal task."""
@@ -24,15 +119,22 @@ class RequestTokenEstimator:
             elif isinstance(prompt, list):
                 for file in prompt:
                     if isinstance(file, FileStore):
-                        file_tokens += file.size * 0.25
+                        if file.is_image():
+                            model_name = self.interview.model.model
+                            width, height = file.get_image_dimensions()
+                            token_usage = estimate_tokens(model_name, width, height)
+                            file_tokens += token_usage
+                        else:
+                            file_tokens += file.size * 0.25
             else:
                 from .exceptions import InterviewTokenError
                 raise InterviewTokenError(f"Prompt is of type {type(prompt)}")
         result: float = len(combined_text) / 4.0 + file_tokens
         return result
 if __name__ == "__main__":
     import doctest
     doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl/invigilators/invigilators.py CHANGED Viewed

@@ -393,10 +393,15 @@ class InvigilatorAI(InvigilatorBase):
         exception_occurred = None
         validated = False
-        if agent_response_dict.model_outputs.cache_used:
+        if agent_response_dict.model_outputs.cache_used and False:
             data = {
                 "answer": agent_response_dict.edsl_dict.answer
                 if type(agent_response_dict.edsl_dict.answer) is str
+                or type(agent_response_dict.edsl_dict.answer) is dict
+                or type(agent_response_dict.edsl_dict.answer) is list
+                or type(agent_response_dict.edsl_dict.answer) is int
+                or type(agent_response_dict.edsl_dict.answer) is float
+                or type(agent_response_dict.edsl_dict.answer) is bool
                 else "",
                 "comment": agent_response_dict.edsl_dict.comment
                 if agent_response_dict.edsl_dict.comment

edsl/jobs/jobs_pricing_estimation.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import math
-from typing import List, TYPE_CHECKING
+from typing import List, TYPE_CHECKING, Union, Literal
 if TYPE_CHECKING:
     from .jobs import Jobs
@@ -26,53 +26,104 @@ class PromptCostEstimator:
     OUTPUT_TOKENS_PER_INPUT_TOKEN = 0.75
     PIPING_MULTIPLIER = 2
-    def __init__(self,
+    def __init__(
+        self,
         system_prompt: str,
         user_prompt: str,
         price_lookup: dict,
         inference_service: str,
-        model: str):
+        model: str,
+    ):
         self.system_prompt = system_prompt
         self.user_prompt = user_prompt
         self.price_lookup = price_lookup
         self.inference_service = inference_service
         self.model = model
-    @staticmethod
+    @staticmethod
     def get_piping_multiplier(prompt: str):
         """Returns 2 if a prompt includes Jinja braces, and 1 otherwise."""
         if "{{" in prompt and "}}" in prompt:
             return PromptCostEstimator.PIPING_MULTIPLIER
         return 1
     @property
     def key(self):
         return (self.inference_service, self.model)
     @property
     def relevant_prices(self):
         try:
             return self.price_lookup[self.key]
         except KeyError:
             return {}
-    def input_price_per_token(self):
+    def _get_highest_price_for_service(self, price_type: str) -> Union[float, None]:
+        """Returns the highest price per token for a given service and price type (input/output).
+        Args:
+            price_type: Either "input" or "output"
+        Returns:
+            float | None: The highest price per token for the service, or None if not found
+        """
+        prices_for_service = [
+            prices[price_type]["service_stated_token_price"]
+            / prices[price_type]["service_stated_token_qty"]
+            for (service, _), prices in self.price_lookup.items()
+            if service == self.inference_service and price_type in prices
+        ]
+        return max(prices_for_service) if prices_for_service else None
+    def input_price_per_token(
+        self,
+    ) -> tuple[float, Literal["price_lookup", "highest_price_for_service", "default"]]:
         try:
-            return self.relevant_prices["input"]["service_stated_token_price"] / self.relevant_prices["input"]["service_stated_token_qty"]
+            return (
+                self.relevant_prices["input"]["service_stated_token_price"]
+                / self.relevant_prices["input"]["service_stated_token_qty"]
+            ), "price_lookup"
         except KeyError:
+            highest_price = self._get_highest_price_for_service("input")
+            if highest_price is not None:
+                import warnings
+                warnings.warn(
+                    f"Price data not found for {self.key}. Using highest available input price for {self.inference_service}: ${highest_price:.6f} per token"
+                )
+                return highest_price, "highest_price_for_service"
             import warnings
             warnings.warn(
-                "Price data could not be retrieved. Using default estimates for input and output token prices. Input: $1.00 / 1M tokens; Output: $1.00 / 1M tokens"
+                f"Price data not found for {self.inference_service}. Using default estimate for input token price: $1.00 / 1M tokens"
             )
-            return self.DEFAULT_INPUT_PRICE_PER_TOKEN
+            return self.DEFAULT_INPUT_PRICE_PER_TOKEN, "default"
-    def output_price_per_token(self):
+    def output_price_per_token(
+        self,
+    ) -> tuple[float, Literal["price_lookup", "highest_price_for_service", "default"]]:
         try:
-            return self.relevant_prices["output"]["service_stated_token_price"] / self.relevant_prices["output"]["service_stated_token_qty"]
+            return (
+                self.relevant_prices["output"]["service_stated_token_price"]
+                / self.relevant_prices["output"]["service_stated_token_qty"]
+            ), "price_lookup"
         except KeyError:
-            return self.DEFAULT_OUTPUT_PRICE_PER_TOKEN
+            highest_price = self._get_highest_price_for_service("output")
+            if highest_price is not None:
+                import warnings
+                warnings.warn(
+                    f"Price data not found for {self.key}. Using highest available output price for {self.inference_service}: ${highest_price:.6f} per token"
+                )
+                return highest_price, "highest_price_for_service"
+            import warnings
+            warnings.warn(
+                f"Price data not found for {self.inference_service}. Using default estimate for output token price: $1.00 / 1M tokens"
+            )
+            return self.DEFAULT_OUTPUT_PRICE_PER_TOKEN, "default"
     def __call__(self):
         user_prompt_chars = len(str(self.user_prompt)) * self.get_piping_multiplier(
             str(self.user_prompt)
@@ -84,20 +135,37 @@ class PromptCostEstimator:
         input_tokens = (user_prompt_chars + system_prompt_chars) // self.CHARS_PER_TOKEN
         output_tokens = math.ceil(self.OUTPUT_TOKENS_PER_INPUT_TOKEN * input_tokens)
+        input_price_per_token, input_price_source = self.input_price_per_token()
+        output_price_per_token, output_price_source = self.output_price_per_token()
         cost = (
-            input_tokens * self.input_price_per_token()
-            + output_tokens * self.output_price_per_token()
+            input_tokens * input_price_per_token
+            + output_tokens * output_price_per_token
         )
         return {
+            "input_price_source": input_price_source,
+            "input_price_per_token": input_price_per_token,
             "input_tokens": input_tokens,
+            "output_price_source": output_price_source,
             "output_tokens": output_tokens,
+            "output_price_per_token": output_price_per_token,
             "cost_usd": cost,
         }
 class JobsPrompts:
-    relevant_keys = ["user_prompt", "system_prompt", "interview_index", "question_name", "scenario_index", "agent_index", "model", "estimated_cost", "cache_keys"]
+    relevant_keys = [
+        "user_prompt",
+        "system_prompt",
+        "interview_index",
+        "question_name",
+        "scenario_index",
+        "agent_index",
+        "model",
+        "estimated_cost",
+        "cache_keys",
+    ]
     """This generates the prompts for a job for price estimation purposes.
@@ -105,7 +173,6 @@ class JobsPrompts:
     So assumptions are made about expansion of Jinja braces, etc.
     """
     @classmethod
     def from_jobs(cls, jobs: "Jobs"):
         """Construct a JobsPrompts object from a Jobs object."""
@@ -114,13 +181,16 @@ class JobsPrompts:
         scenarios = jobs.scenarios
         survey = jobs.survey
         return cls(
-            interviews=interviews,
-            agents=agents,
-            scenarios=scenarios,
-            survey=survey
+            interviews=interviews, agents=agents, scenarios=scenarios, survey=survey
         )
-    def __init__(self, interviews: List['Interview'], agents:'AgentList', scenarios: 'ScenarioList', survey: 'Survey'):
+    def __init__(
+        self,
+        interviews: List["Interview"],
+        agents: "AgentList",
+        scenarios: "ScenarioList",
+        survey: "Survey",
+    ):
         """Initialize with extracted components rather than a Jobs object."""
         self.interviews = interviews
         self.agents = agents
@@ -143,17 +213,19 @@ class JobsPrompts:
             self._price_lookup = c.fetch_prices()
         return self._price_lookup
-    def _process_one_invigilator(self, invigilator: 'Invigilator', interview_index: int, iterations: int = 1) -> dict:
+    def _process_one_invigilator(
+        self, invigilator: "Invigilator", interview_index: int, iterations: int = 1
+    ) -> dict:
         """Process a single invigilator and return a dictionary with all needed data fields."""
         prompts = invigilator.get_prompts()
         user_prompt = prompts["user_prompt"]
         system_prompt = prompts["system_prompt"]
         agent_index = self._agent_lookup[invigilator.agent]
         scenario_index = self._scenario_lookup[invigilator.scenario]
         model = invigilator.model.model
         question_name = invigilator.question.question_name
         # Calculate prompt cost
         prompt_cost = self.estimate_prompt_cost(
             system_prompt=system_prompt,
@@ -163,7 +235,7 @@ class JobsPrompts:
             model=model,
         )
         cost = prompt_cost["cost_usd"]
         # Generate cache keys for each iteration
         cache_keys = []
         for iteration in range(iterations):
@@ -175,7 +247,7 @@ class JobsPrompts:
                 iteration=iteration,
             )
             cache_keys.append(cache_key)
         d = {
             "user_prompt": user_prompt,
             "system_prompt": system_prompt,
@@ -200,7 +272,7 @@ class JobsPrompts:
         dataset_of_prompts = {k: [] for k in self.relevant_keys}
         interviews = self.interviews
         # Process each interview and invigilator
         for interview_index, interview in enumerate(interviews):
             invigilators = [
@@ -210,11 +282,13 @@ class JobsPrompts:
             for invigilator in invigilators:
                 # Process the invigilator and get all data as a dictionary
-                data = self._process_one_invigilator(invigilator, interview_index, iterations)
+                data = self._process_one_invigilator(
+                    invigilator, interview_index, iterations
+                )
                 for k in self.relevant_keys:
                     dataset_of_prompts[k].append(data[k])
-        return Dataset([{k:dataset_of_prompts[k]} for k in self.relevant_keys])
+        return Dataset([{k: dataset_of_prompts[k]} for k in self.relevant_keys])
     @staticmethod
     def estimate_prompt_cost(
@@ -230,13 +304,13 @@ class JobsPrompts:
             user_prompt=user_prompt,
             price_lookup=price_lookup,
             inference_service=inference_service,
-            model=model
+            model=model,
         )()
     @staticmethod
     def _extract_prompt_details(invigilator: FetchInvigilator) -> dict:
         """Extracts the prompt details from the invigilator.
         >>> from edsl.invigilators import InvigilatorAI
         >>> invigilator = InvigilatorAI.example()
         >>> JobsPrompts._extract_prompt_details(invigilator)
@@ -276,11 +350,13 @@ class JobsPrompts:
             ]
             for invigilator in invigilators:
                 prompt_details = self._extract_prompt_details(invigilator)
-                prompt_cost = self.estimate_prompt_cost(**prompt_details, price_lookup=price_lookup)
+                prompt_cost = self.estimate_prompt_cost(
+                    **prompt_details, price_lookup=price_lookup
+                )
                 price_estimates = {
-                    'estimated_input_tokens': prompt_cost['input_tokens'],
-                    'estimated_output_tokens': prompt_cost['output_tokens'],
-                    'estimated_cost_usd': prompt_cost['cost_usd']
+                    "estimated_input_tokens": prompt_cost["input_tokens"],
+                    "estimated_output_tokens": prompt_cost["output_tokens"],
+                    "estimated_cost_usd": prompt_cost["cost_usd"],
                 }
                 data.append({**price_estimates, **prompt_details})
@@ -293,14 +369,18 @@ class JobsPrompts:
                     "model": item["model"],
                     "estimated_cost_usd": 0,
                     "estimated_input_tokens": 0,
-                    "estimated_output_tokens": 0
+                    "estimated_output_tokens": 0,
                 }
             # Accumulate values
             model_groups[key]["estimated_cost_usd"] += item["estimated_cost_usd"]
-            model_groups[key]["estimated_input_tokens"] += item["estimated_input_tokens"]
-            model_groups[key]["estimated_output_tokens"] += item["estimated_output_tokens"]
+            model_groups[key]["estimated_input_tokens"] += item[
+                "estimated_input_tokens"
+            ]
+            model_groups[key]["estimated_output_tokens"] += item[
+                "estimated_output_tokens"
+            ]
         # Apply iterations and convert to list
         estimated_costs_by_model = []
         for group_data in model_groups.values():
@@ -345,4 +425,5 @@ class JobsPrompts:
 if __name__ == "__main__":
     import doctest
     doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl/language_models/language_model.py CHANGED Viewed

@@ -509,7 +509,9 @@ class LanguageModel(
         return self.execute_model_call(user_prompt, system_prompt)
     @abstractmethod
-    async def async_execute_model_call(self, user_prompt: str, system_prompt: str):
+    async def async_execute_model_call(
+        self, user_prompt: str, system_prompt: str, question_name: Optional[str] = None
+    ):
         """Execute the model call asynchronously.
         This abstract method must be implemented by all model subclasses
@@ -518,6 +520,7 @@ class LanguageModel(
         Args:
             user_prompt: The user message or input prompt
             system_prompt: The system message or context
+            question_name: Optional name of the question being asked (primarily used for test models)
         Returns:
             Coroutine that resolves to the model response
@@ -529,7 +532,7 @@ class LanguageModel(
         pass
     async def remote_async_execute_model_call(
-        self, user_prompt: str, system_prompt: str
+        self, user_prompt: str, system_prompt: str, question_name: Optional[str] = None
     ):
         """Execute the model call remotely through the EDSL Coop service.
@@ -540,6 +543,7 @@ class LanguageModel(
         Args:
             user_prompt: The user message or input prompt
             system_prompt: The system message or context
+            question_name: Optional name of the question being asked (primarily used for test models)
         Returns:
             Coroutine that resolves to the model response from the remote service
@@ -563,6 +567,7 @@ class LanguageModel(
         Args:
             *args: Positional arguments to pass to async_execute_model_call
             **kwargs: Keyword arguments to pass to async_execute_model_call
+                     Can include question_name for test models
         Returns:
             The model response
@@ -702,7 +707,9 @@ class LanguageModel(
                 "system_prompt": system_prompt,
                 "files_list": files_list,
             }
+            # Add question_name parameter for test models
+            if self.model == "test" and invigilator:
+                params["question_name"] = invigilator.question.question_name
             # Get timeout from configuration
             from ..config import CONFIG
@@ -710,7 +717,6 @@ class LanguageModel(
             # Execute the model call with timeout
             response = await asyncio.wait_for(f(**params), timeout=TIMEOUT)
             # Store the response in the cache
             new_cache_key = cache.store(
                 **cache_call_params, response=response, service=self._inference_service_
@@ -801,7 +807,6 @@ class LanguageModel(
         # Create structured input record
         model_inputs = ModelInputs(user_prompt=user_prompt, system_prompt=system_prompt)
         # Get model response (using cache if available)
         model_outputs: ModelResponse = (
             await self._async_get_intended_model_call_outcome(**params)
@@ -1046,7 +1051,12 @@ class LanguageModel(
         ]
         # Define a new async_execute_model_call that only reads from cache
-        async def async_execute_model_call(self, user_prompt: str, system_prompt: str):
+        async def async_execute_model_call(
+            self,
+            user_prompt: str,
+            system_prompt: str,
+            question_name: Optional[str] = None,
+        ):
             """Only use cached responses, never making new API calls."""
             cache_call_params = {
                 "model": str(self.model),

edsl/language_models/utilities.py CHANGED Viewed

@@ -5,6 +5,7 @@ from ..surveys import Survey
 from .language_model import LanguageModel
 def create_survey(num_questions: int, chained: bool = True, take_scenario=False):
     from ..questions import QuestionFreeText
@@ -28,7 +29,6 @@ def create_survey(num_questions: int, chained: bool = True, take_scenario=False)
 def create_language_model(
     exception: Exception, fail_at_number: int, never_ending=False
 ):
     class LanguageModelFromUtilities(LanguageModel):
         _model_ = "test"
         _parameters_ = {"temperature": 0.5}
@@ -45,6 +45,7 @@ def create_language_model(
             user_prompt: str,
             system_prompt: str,
             files_list: Optional[List[Any]] = None,
+            question_name: Optional[str] = None,
         ) -> dict[str, Any]:
             question_number = int(
                 user_prompt.split("XX")[1]

edsl 0.1.52__py3-none-any.whl → 0.1.54__py3-none-any.whl

edsl 0.1.52py3-none-any.whl → 0.1.54py3-none-any.whl