PyPI - edsl - Versions diffs - 0.1.52__py3-none-any.whl → 0.1.53__py3-none-any.whl - Mend

edsl 0.1.52py3-none-any.whl → 0.1.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

edsl/__version__.py +1 -1
edsl/interviews/request_token_estimator.py +104 -2
edsl/invigilators/invigilators.py +5 -0
edsl/scenarios/file_store.py +73 -23
{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/METADATA +1 -1
{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/RECORD +9 -9
{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/LICENSE +0 -0
{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/WHEEL +0 -0
{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/entry_points.txt +0 -0

edsl/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.52"
1	+ __version__ = "0.1.53"

edsl/interviews/request_token_estimator.py CHANGED Viewed

@@ -1,6 +1,101 @@
 from ..jobs.fetch_invigilator import FetchInvigilator
 from ..scenarios import FileStore
+import math
+# Model configs: base tokens and tile tokens only
+VISION_MODELS = {
+    "gpt-4o": {
+        "base_tokens": 85,
+        "tile_tokens": 170,
+    },
+    "gpt-4o-mini": {
+        "base_tokens": 2833,
+        "tile_tokens": 5667,
+    },
+    "o1": {
+        "base_tokens": 75,
+        "tile_tokens": 150,
+    },
+}
+def approximate_image_tokens_google(width: int, height: int) -> int:
+    """
+    Approximates the token usage for an image based on its dimensions.
+    This calculation is based on the rules described for Gemini 2.0 models
+    in the provided text:
+    - Images with both dimensions <= 384px cost 258 tokens.
+    - Larger images are processed in 768x768 tiles, each costing 258 tokens.
+    Note: This is an *approximation*. The exact cropping, scaling, and tiling
+    strategy used by the actual Gemini API might differ slightly.
+    Args:
+        width: The width of the image in pixels.
+        height: The height of the image in pixels.
+    Returns:
+        An estimated integer token count for the image.
+    Raises:
+        ValueError: If width or height are not positive integers.
+    """
+    SMALL_IMAGE_THRESHOLD = 384  # Max dimension for fixed token count
+    FIXED_TOKEN_COST_SMALL = 258  # Token cost for small images (<= 384x384)
+    TILE_SIZE = 768  # Dimension of tiles for larger images
+    TOKEN_COST_PER_TILE = 258  # Token cost per 768x768 tile
+    if (
+        not isinstance(width, int)
+        or not isinstance(height, int)
+        or width <= 0
+        or height <= 0
+    ):
+        raise ValueError("Image width and height must be positive integers.")
+    # Case 1: Small image (both dimensions <= threshold)
+    if width <= SMALL_IMAGE_THRESHOLD and height <= SMALL_IMAGE_THRESHOLD:
+        return FIXED_TOKEN_COST_SMALL
+    # Case 2: Larger image (at least one dimension > threshold)
+    else:
+        # Calculate how many tiles are needed to cover the width and height
+        # Use ceiling division to ensure full coverage
+        tiles_wide = math.ceil(width / TILE_SIZE)
+        tiles_high = math.ceil(height / TILE_SIZE)
+        # Total number of tiles is the product of tiles needed in each dimension
+        total_tiles = tiles_wide * tiles_high
+        # Total token cost is the number of tiles times the cost per tile
+        estimated_tokens = total_tiles * TOKEN_COST_PER_TILE
+        return estimated_tokens
+def estimate_tokens(model_name, width, height):
+    if model_name == "test":
+        return 10  # for testing purposes
+    if "gemini" in model_name:
+        out = approximate_image_tokens_google(width, height)
+        return out
+    if "claude" in model_name:
+        total_tokens = width * height / 750
+        return total_tokens
+    if model_name not in VISION_MODELS:
+        total_tokens = width * height / 750
+        return total_tokens
+    config = VISION_MODELS[model_name]
+    TILE_SIZE = 512
+    tiles_x = math.ceil(width / TILE_SIZE)
+    tiles_y = math.ceil(height / TILE_SIZE)
+    total_tiles = tiles_x * tiles_y
+    total_tokens = config["base_tokens"] + config["tile_tokens"] * total_tiles
+    return total_tokens
 class RequestTokenEstimator:
     """Estimate the number of tokens that will be required to run the focal task."""
@@ -24,15 +119,22 @@ class RequestTokenEstimator:
             elif isinstance(prompt, list):
                 for file in prompt:
                     if isinstance(file, FileStore):
-                        file_tokens += file.size * 0.25
+                        if file.is_image():
+                            model_name = self.interview.model.model
+                            width, height = file.get_image_dimensions()
+                            token_usage = estimate_tokens(model_name, width, height)
+                            file_tokens += token_usage
+                        else:
+                            file_tokens += file.size * 0.25
             else:
                 from .exceptions import InterviewTokenError
                 raise InterviewTokenError(f"Prompt is of type {type(prompt)}")
         result: float = len(combined_text) / 4.0 + file_tokens
         return result
 if __name__ == "__main__":
     import doctest
     doctest.testmod(optionflags=doctest.ELLIPSIS)

edsl/invigilators/invigilators.py CHANGED Viewed

@@ -397,6 +397,11 @@ class InvigilatorAI(InvigilatorBase):
             data = {
                 "answer": agent_response_dict.edsl_dict.answer
                 if type(agent_response_dict.edsl_dict.answer) is str
+                or type(agent_response_dict.edsl_dict.answer) is dict
+                or type(agent_response_dict.edsl_dict.answer) is list
+                or type(agent_response_dict.edsl_dict.answer) is int
+                or type(agent_response_dict.edsl_dict.answer) is float
+                or type(agent_response_dict.edsl_dict.answer) is bool
                 else "",
                 "comment": agent_response_dict.edsl_dict.comment
                 if agent_response_dict.edsl_dict.comment

edsl/scenarios/file_store.py CHANGED Viewed

@@ -17,25 +17,26 @@ from .file_methods import FileMethods
 if TYPE_CHECKING:
     from .scenario_list import ScenarioList
 class FileStore(Scenario):
     """
     A specialized Scenario subclass for managing file content and metadata.
     FileStore provides functionality for working with files in EDSL, handling various
     file formats with appropriate encoding, storage, and access methods. It extends
     Scenario to allow files to be included in surveys, questions, and other EDSL components.
     FileStore supports multiple file formats including text, PDF, Word documents, images,
     and more. It can load files from local paths or URLs, and provides methods for
     accessing file content, extracting text, and managing file operations.
     Key features:
     - Base64 encoding for portability and serialization
     - Lazy loading through temporary files when needed
     - Automatic MIME type detection
     - Text extraction from various file formats
     - Format-specific operations through specialized handlers
     Attributes:
         _path (str): The original file path.
         _temp_path (str): Path to any generated temporary file.
@@ -45,7 +46,7 @@ class FileStore(Scenario):
         base64_string (str): Base64-encoded file content.
         external_locations (dict): Dictionary of external locations.
         extracted_text (str): Text extracted from the file.
     Examples:
         >>> import tempfile
         >>> # Create a text file
@@ -53,13 +54,14 @@ class FileStore(Scenario):
         ...     _ = f.write("Hello World")
         ...     _ = f.flush()
         ...     fs = FileStore(f.name)
         # The following example works locally but is commented out for CI environments
         # where dependencies like pandoc may not be available:
         # >>> # FileStore supports various formats
         # >>> formats = ["txt", "pdf", "docx", "pptx", "md", "py", "json", "csv", "html", "png", "db"]
         # >>> _ = [FileStore.example(format) for format in formats]
     """
     __documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
     def __init__(
@@ -75,11 +77,11 @@ class FileStore(Scenario):
     ):
         """
         Initialize a new FileStore object.
         This constructor creates a FileStore object from either a file path or a base64-encoded
         string representation of file content. It handles automatic detection of file properties
         like MIME type, extracts text content when possible, and manages file encoding.
         Args:
             path: Path to the file to load. Can be a local file path or URL.
             mime_type: MIME type of the file. If not provided, will be auto-detected.
@@ -93,7 +95,7 @@ class FileStore(Scenario):
                           text will be extracted automatically if possible.
             **kwargs: Additional keyword arguments. 'filename' can be used as an
                      alternative to 'path'.
         Note:
             If path is a URL (starts with http:// or https://), the file will be
             downloaded automatically.
@@ -138,15 +140,15 @@ class FileStore(Scenario):
     def path(self) -> str:
         """
         Returns a valid path to the file content, creating a temporary file if needed.
         This property ensures that a valid file path is always available for the file
         content, even if the original file is no longer accessible or if the FileStore
         was created from a base64 string without a path. If the original path doesn't
         exist, it automatically generates a temporary file from the base64 content.
         Returns:
             A string containing a valid file path to access the file content.
         Examples:
             >>> import tempfile, os
             >>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
@@ -155,8 +157,8 @@ class FileStore(Scenario):
             ...     fs = FileStore(f.name)
             ...     os.path.isfile(fs.path)
             True
         Notes:
             - The path may point to a temporary file that will be cleaned up when the
               Python process exits
@@ -319,9 +321,10 @@ class FileStore(Scenario):
         link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
         return f"{parent_html}<br>{link}"
     def download_link(self):
         from .construct_download_link import ConstructDownloadLink
         return ConstructDownloadLink(self).html_create_link(self.path, style=None)
     def encode_file_to_base64_string(self, file_path: str):
@@ -572,6 +575,53 @@ class FileStore(Scenario):
             f"Converting {self.suffix} files to pandas DataFrame is not supported"
         )
+    def is_image(self) -> bool:
+        """
+        Check if the file is an image by examining its MIME type.
+        Returns:
+            bool: True if the file is an image, False otherwise.
+        Examples:
+            >>> fs = FileStore.example("png")
+            >>> fs.is_image()
+            True
+            >>> fs = FileStore.example("txt")
+            >>> fs.is_image()
+            False
+        """
+        # Check if the mime type starts with 'image/'
+        return self.mime_type.startswith("image/")
+    def get_image_dimensions(self) -> tuple:
+        """
+        Get the dimensions (width, height) of an image file.
+        Returns:
+            tuple: A tuple containing the width and height of the image.
+        Raises:
+            ValueError: If the file is not an image or PIL is not installed.
+        Examples:
+            >>> fs = FileStore.example("png")
+            >>> width, height = fs.get_image_dimensions()
+            >>> isinstance(width, int) and isinstance(height, int)
+            True
+        """
+        if not self.is_image():
+            raise ValueError("This file is not an image")
+        try:
+            from PIL import Image
+        except ImportError:
+            raise ImportError(
+                "PIL (Pillow) is required to get image dimensions. Install it with: pip install pillow"
+            )
+        with Image.open(self.path) as img:
+            return img.size  # Returns (width, height)
     def __getattr__(self, name):
         """
         Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
@@ -662,13 +712,13 @@ class FileStore(Scenario):
 #         endobj
 #         xref
 #         0 7
-#         0000000000 65535 f
-#         0000000010 00000 n
-#         0000000053 00000 n
-#         0000000100 00000 n
-#         0000000173 00000 n
-#         0000000232 00000 n
-#         0000000272 00000 n
+#         0000000000 65535 f
+#         0000000010 00000 n
+#         0000000053 00000 n
+#         0000000100 00000 n
+#         0000000173 00000 n
+#         0000000232 00000 n
+#         0000000272 00000 n
 #         trailer
 #         << /Size 7 /Root 1 0 R >>
 #         startxref
@@ -748,6 +798,7 @@ class FileStore(Scenario):
 if __name__ == "__main__":
     import doctest
     doctest.testmod()
     # formats = FileMethods.supported_file_types()
@@ -756,4 +807,3 @@ if __name__ == "__main__":
     #     fs = FileStore.example(file_type)
     #     fs.view()
     #     input("Press Enter to continue...")

{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: edsl
-Version: 0.1.52
+Version: 0.1.53
 Summary: Create and analyze LLM-based surveys
 Home-page: https://www.expectedparrot.com/
 License: MIT

{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 edsl/__init__.py,sha256=SXi_Zm4kf6H2WW_YeTuF6zRNZEWKzpKa7NRXUzn2Ty4,4593
-edsl/__version__.py,sha256=xEFLBJ_IHewFRxpAOMcKAhQVPVf06lsExSa2472N2G8,23
+edsl/__version__.py,sha256=FdQ5_-vfyHXSNAuzQXtxxDH2WjNJ3g581mH_zvly6Xo,23
 edsl/agents/__init__.py,sha256=AyhfXjygRHT1Pd9w16lcu5Bu0jnBmMPz86aKP1uRL3Y,93
 edsl/agents/agent.py,sha256=svTVvvg9eCMUhnb49Bxsf9nAwXragtRaeBkyB6q89EE,54423
 edsl/agents/agent_list.py,sha256=JA39_6RSmiD2mqJgWr2NWovNxNmu4mhZbYmn5be87NQ,21572
@@ -107,12 +107,12 @@ edsl/interviews/interview_status_dictionary.py,sha256=0ZvXLusfOA8xD_Fco4PjEBGwmR
 edsl/interviews/interview_status_enum.py,sha256=KJ-1yLAHdX-p8TiFnM0M3v1tnBwkq4aMCuBX6-ytrI8,229
 edsl/interviews/interview_status_log.py,sha256=sRiQ9kIT1WcF-8beETn6E7IsdRRrfbco-yjdAjkXncw,3587
 edsl/interviews/interview_task_manager.py,sha256=wPi5izhsVK5wI5HfMXMLL5NIoucHNCoGXfRuRzI-wYE,3665
-edsl/interviews/request_token_estimator.py,sha256=VATjVBcFyEyc9fhqySo1jIRm5bI8l3lSmX3_N-t8W3Y,1359
+edsl/interviews/request_token_estimator.py,sha256=n_C-alSYOFi27cBcIRhtBX-fvklDcvM2Kowte-EDnzM,4833
 edsl/interviews/statistics.py,sha256=lZCtq79QrDKG3jXao_OWuBRhnly9VyuhM6IdTJaYqPg,2461
 edsl/invigilators/__init__.py,sha256=fKbZ7p9-kMelpvET3Ku2Owu-tL_apC-8gi9JychpMBY,1843
 edsl/invigilators/exceptions.py,sha256=ejoF-Gt-YcnW1yHyfpJ3jZm8AC_zD0GCYafRO2LlAMQ,2767
 edsl/invigilators/invigilator_base.py,sha256=DgrXTK4AAxXr4wg2pzc0p1aGPPf1UUt01C-JW1UBTvo,20099
-edsl/invigilators/invigilators.py,sha256=dc_H4WptOKzAaHiKBeW-FFBOB1ULVO-xamtjner_xGY,22005
+edsl/invigilators/invigilators.py,sha256=UH8gy59qq0_f9jzumDbdugF0SvGW_eIr2GT5zCUO8V0,22355
 edsl/invigilators/prompt_constructor.py,sha256=THHGcZPI-QUOH8Z9cQEzH7bZEoo0V_Nc_Phlhc9AzL0,19115
 edsl/invigilators/prompt_helpers.py,sha256=LuMZFZkInPY8M7Rw9fG9rpJIcT89tr2_Iq10ZHH_Y4A,5409
 edsl/invigilators/question_instructions_prompt_builder.py,sha256=E5zpwctpt_5JjONkZRcMwB0MACAzDvvnzUhmuWTnjd0,9684
@@ -276,7 +276,7 @@ edsl/scenarios/directory_scanner.py,sha256=gnDXU1jKSjSE3LXEhE7ilfJUL_sxK2HHmsA2L
 edsl/scenarios/document_chunker.py,sha256=EpB0V0oxLzpKntl00Qa3VZNPS7sg9aXdYyqKxhFFzTM,7680
 edsl/scenarios/exceptions.py,sha256=FeORBm90UthKHDp7cE8I7KJgyA3-pFKNpoivZRr8ifc,10636
 edsl/scenarios/file_methods.py,sha256=cB_IPVTGz4_yJiRMTdNTvpW4l43lrTbyJOV3Pnm6UPs,2631
-edsl/scenarios/file_store.py,sha256=Zb9hA2i9jrDCONjcptWYj0dkSCAzNyL8EnHbNT3dtcE,26704
+edsl/scenarios/file_store.py,sha256=slqSIENW6SP1dhnXTviq4umlvGHeYsDB3SM24t0ll_I,28033
 edsl/scenarios/handlers/__init__.py,sha256=9r1fDjUviGXso9h4d05wG9RECfqzfps55CQgb-ojCBo,848
 edsl/scenarios/handlers/csv_file_store.py,sha256=kXOms0ph5JJj6jSbpfQ-SZjuT4vvSRhq5AGpv1L4TPQ,1369
 edsl/scenarios/handlers/docx_file_store.py,sha256=KSKAAUIWF2K5xr92nx7UGQ9djgtDX4ke-Eyik8QAdlQ,2155
@@ -358,8 +358,8 @@ edsl/utilities/repair_functions.py,sha256=EXkXsqnmgPqj9b3dff1cZnJyaZw-qEvGENXCRH
 edsl/utilities/restricted_python.py,sha256=248N2p5EWHDSpcK1G-q7DUoJeWy4sB6aO-RV0-5O7uY,2038
 edsl/utilities/template_loader.py,sha256=SCAcnTnxNQ67MNSkmfz7F-S_u2peyGn2j1oRIqi1wfg,870
 edsl/utilities/utilities.py,sha256=irHheAGOnl_6RwI--Hi9StVzvsHcWCqB48PWsWJQYOw,12045
-edsl-0.1.52.dist-info/LICENSE,sha256=_qszBDs8KHShVYcYzdMz3HNMtH-fKN_p5zjoVAVumFc,1111
-edsl-0.1.52.dist-info/METADATA,sha256=18LEADuuU0qmUcarS9CorkQL7Fh59xC1D842ahXazFk,12670
-edsl-0.1.52.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-edsl-0.1.52.dist-info/entry_points.txt,sha256=JnG7xqMtHaQu9BU-yPATxdyCeA48XJpuclnWCqMfIMU,38
-edsl-0.1.52.dist-info/RECORD,,
+edsl-0.1.53.dist-info/LICENSE,sha256=_qszBDs8KHShVYcYzdMz3HNMtH-fKN_p5zjoVAVumFc,1111
+edsl-0.1.53.dist-info/METADATA,sha256=WUkrcqMnf8uKQvTyrQTx0q3oVFDurmcI3HkXz9Ljk6g,12670
+edsl-0.1.53.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+edsl-0.1.53.dist-info/entry_points.txt,sha256=JnG7xqMtHaQu9BU-yPATxdyCeA48XJpuclnWCqMfIMU,38
+edsl-0.1.53.dist-info/RECORD,,

{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/LICENSE RENAMED Viewed

File without changes

{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/WHEEL RENAMED Viewed

File without changes

{edsl-0.1.52.dist-info → edsl-0.1.53.dist-info}/entry_points.txt RENAMED Viewed

File without changes

edsl 0.1.52__py3-none-any.whl → 0.1.53__py3-none-any.whl

edsl 0.1.52py3-none-any.whl → 0.1.53py3-none-any.whl