edsl 0.1.52__py3-none-any.whl → 0.1.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
edsl/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.52"
1
+ __version__ = "0.1.53"
@@ -1,6 +1,101 @@
1
1
  from ..jobs.fetch_invigilator import FetchInvigilator
2
2
  from ..scenarios import FileStore
3
3
 
4
+ import math
5
+
6
+ # Model configs: base tokens and tile tokens only
7
+ VISION_MODELS = {
8
+ "gpt-4o": {
9
+ "base_tokens": 85,
10
+ "tile_tokens": 170,
11
+ },
12
+ "gpt-4o-mini": {
13
+ "base_tokens": 2833,
14
+ "tile_tokens": 5667,
15
+ },
16
+ "o1": {
17
+ "base_tokens": 75,
18
+ "tile_tokens": 150,
19
+ },
20
+ }
21
+
22
+
23
+ def approximate_image_tokens_google(width: int, height: int) -> int:
24
+ """
25
+ Approximates the token usage for an image based on its dimensions.
26
+
27
+ This calculation is based on the rules described for Gemini 2.0 models
28
+ in the provided text:
29
+ - Images with both dimensions <= 384px cost 258 tokens.
30
+ - Larger images are processed in 768x768 tiles, each costing 258 tokens.
31
+
32
+ Note: This is an *approximation*. The exact cropping, scaling, and tiling
33
+ strategy used by the actual Gemini API might differ slightly.
34
+
35
+ Args:
36
+ width: The width of the image in pixels.
37
+ height: The height of the image in pixels.
38
+
39
+ Returns:
40
+ An estimated integer token count for the image.
41
+
42
+ Raises:
43
+ ValueError: If width or height are not positive integers.
44
+ """
45
+ SMALL_IMAGE_THRESHOLD = 384 # Max dimension for fixed token count
46
+ FIXED_TOKEN_COST_SMALL = 258 # Token cost for small images (<= 384x384)
47
+ TILE_SIZE = 768 # Dimension of tiles for larger images
48
+ TOKEN_COST_PER_TILE = 258 # Token cost per 768x768 tile
49
+ if (
50
+ not isinstance(width, int)
51
+ or not isinstance(height, int)
52
+ or width <= 0
53
+ or height <= 0
54
+ ):
55
+ raise ValueError("Image width and height must be positive integers.")
56
+
57
+ # Case 1: Small image (both dimensions <= threshold)
58
+ if width <= SMALL_IMAGE_THRESHOLD and height <= SMALL_IMAGE_THRESHOLD:
59
+ return FIXED_TOKEN_COST_SMALL
60
+
61
+ # Case 2: Larger image (at least one dimension > threshold)
62
+ else:
63
+ # Calculate how many tiles are needed to cover the width and height
64
+ # Use ceiling division to ensure full coverage
65
+ tiles_wide = math.ceil(width / TILE_SIZE)
66
+ tiles_high = math.ceil(height / TILE_SIZE)
67
+
68
+ # Total number of tiles is the product of tiles needed in each dimension
69
+ total_tiles = tiles_wide * tiles_high
70
+
71
+ # Total token cost is the number of tiles times the cost per tile
72
+ estimated_tokens = total_tiles * TOKEN_COST_PER_TILE
73
+ return estimated_tokens
74
+
75
+
76
+ def estimate_tokens(model_name, width, height):
77
+ if model_name == "test":
78
+ return 10 # for testing purposes
79
+ if "gemini" in model_name:
80
+ out = approximate_image_tokens_google(width, height)
81
+ return out
82
+ if "claude" in model_name:
83
+ total_tokens = width * height / 750
84
+ return total_tokens
85
+ if model_name not in VISION_MODELS:
86
+ total_tokens = width * height / 750
87
+ return total_tokens
88
+
89
+ config = VISION_MODELS[model_name]
90
+ TILE_SIZE = 512
91
+
92
+ tiles_x = math.ceil(width / TILE_SIZE)
93
+ tiles_y = math.ceil(height / TILE_SIZE)
94
+ total_tiles = tiles_x * tiles_y
95
+
96
+ total_tokens = config["base_tokens"] + config["tile_tokens"] * total_tiles
97
+ return total_tokens
98
+
4
99
 
5
100
  class RequestTokenEstimator:
6
101
  """Estimate the number of tokens that will be required to run the focal task."""
@@ -24,15 +119,22 @@ class RequestTokenEstimator:
24
119
  elif isinstance(prompt, list):
25
120
  for file in prompt:
26
121
  if isinstance(file, FileStore):
27
- file_tokens += file.size * 0.25
122
+ if file.is_image():
123
+ model_name = self.interview.model.model
124
+ width, height = file.get_image_dimensions()
125
+ token_usage = estimate_tokens(model_name, width, height)
126
+ file_tokens += token_usage
127
+ else:
128
+ file_tokens += file.size * 0.25
28
129
  else:
29
130
  from .exceptions import InterviewTokenError
131
+
30
132
  raise InterviewTokenError(f"Prompt is of type {type(prompt)}")
31
133
  result: float = len(combined_text) / 4.0 + file_tokens
32
134
  return result
33
135
 
34
136
 
35
-
36
137
  if __name__ == "__main__":
37
138
  import doctest
139
+
38
140
  doctest.testmod(optionflags=doctest.ELLIPSIS)
@@ -397,6 +397,11 @@ class InvigilatorAI(InvigilatorBase):
397
397
  data = {
398
398
  "answer": agent_response_dict.edsl_dict.answer
399
399
  if type(agent_response_dict.edsl_dict.answer) is str
400
+ or type(agent_response_dict.edsl_dict.answer) is dict
401
+ or type(agent_response_dict.edsl_dict.answer) is list
402
+ or type(agent_response_dict.edsl_dict.answer) is int
403
+ or type(agent_response_dict.edsl_dict.answer) is float
404
+ or type(agent_response_dict.edsl_dict.answer) is bool
400
405
  else "",
401
406
  "comment": agent_response_dict.edsl_dict.comment
402
407
  if agent_response_dict.edsl_dict.comment
@@ -17,25 +17,26 @@ from .file_methods import FileMethods
17
17
  if TYPE_CHECKING:
18
18
  from .scenario_list import ScenarioList
19
19
 
20
+
20
21
  class FileStore(Scenario):
21
22
  """
22
23
  A specialized Scenario subclass for managing file content and metadata.
23
-
24
+
24
25
  FileStore provides functionality for working with files in EDSL, handling various
25
26
  file formats with appropriate encoding, storage, and access methods. It extends
26
27
  Scenario to allow files to be included in surveys, questions, and other EDSL components.
27
-
28
+
28
29
  FileStore supports multiple file formats including text, PDF, Word documents, images,
29
30
  and more. It can load files from local paths or URLs, and provides methods for
30
31
  accessing file content, extracting text, and managing file operations.
31
-
32
+
32
33
  Key features:
33
34
  - Base64 encoding for portability and serialization
34
35
  - Lazy loading through temporary files when needed
35
36
  - Automatic MIME type detection
36
37
  - Text extraction from various file formats
37
38
  - Format-specific operations through specialized handlers
38
-
39
+
39
40
  Attributes:
40
41
  _path (str): The original file path.
41
42
  _temp_path (str): Path to any generated temporary file.
@@ -45,7 +46,7 @@ class FileStore(Scenario):
45
46
  base64_string (str): Base64-encoded file content.
46
47
  external_locations (dict): Dictionary of external locations.
47
48
  extracted_text (str): Text extracted from the file.
48
-
49
+
49
50
  Examples:
50
51
  >>> import tempfile
51
52
  >>> # Create a text file
@@ -53,13 +54,14 @@ class FileStore(Scenario):
53
54
  ... _ = f.write("Hello World")
54
55
  ... _ = f.flush()
55
56
  ... fs = FileStore(f.name)
56
-
57
+
57
58
  # The following example works locally but is commented out for CI environments
58
59
  # where dependencies like pandoc may not be available:
59
60
  # >>> # FileStore supports various formats
60
61
  # >>> formats = ["txt", "pdf", "docx", "pptx", "md", "py", "json", "csv", "html", "png", "db"]
61
62
  # >>> _ = [FileStore.example(format) for format in formats]
62
63
  """
64
+
63
65
  __documentation__ = "https://docs.expectedparrot.com/en/latest/filestore.html"
64
66
 
65
67
  def __init__(
@@ -75,11 +77,11 @@ class FileStore(Scenario):
75
77
  ):
76
78
  """
77
79
  Initialize a new FileStore object.
78
-
80
+
79
81
  This constructor creates a FileStore object from either a file path or a base64-encoded
80
82
  string representation of file content. It handles automatic detection of file properties
81
83
  like MIME type, extracts text content when possible, and manages file encoding.
82
-
84
+
83
85
  Args:
84
86
  path: Path to the file to load. Can be a local file path or URL.
85
87
  mime_type: MIME type of the file. If not provided, will be auto-detected.
@@ -93,7 +95,7 @@ class FileStore(Scenario):
93
95
  text will be extracted automatically if possible.
94
96
  **kwargs: Additional keyword arguments. 'filename' can be used as an
95
97
  alternative to 'path'.
96
-
98
+
97
99
  Note:
98
100
  If path is a URL (starts with http:// or https://), the file will be
99
101
  downloaded automatically.
@@ -138,15 +140,15 @@ class FileStore(Scenario):
138
140
  def path(self) -> str:
139
141
  """
140
142
  Returns a valid path to the file content, creating a temporary file if needed.
141
-
143
+
142
144
  This property ensures that a valid file path is always available for the file
143
145
  content, even if the original file is no longer accessible or if the FileStore
144
146
  was created from a base64 string without a path. If the original path doesn't
145
147
  exist, it automatically generates a temporary file from the base64 content.
146
-
148
+
147
149
  Returns:
148
150
  A string containing a valid file path to access the file content.
149
-
151
+
150
152
  Examples:
151
153
  >>> import tempfile, os
152
154
  >>> with tempfile.NamedTemporaryFile(suffix=".txt", mode="w") as f:
@@ -155,8 +157,8 @@ class FileStore(Scenario):
155
157
  ... fs = FileStore(f.name)
156
158
  ... os.path.isfile(fs.path)
157
159
  True
158
-
159
-
160
+
161
+
160
162
  Notes:
161
163
  - The path may point to a temporary file that will be cleaned up when the
162
164
  Python process exits
@@ -319,9 +321,10 @@ class FileStore(Scenario):
319
321
 
320
322
  link = ConstructDownloadLink(self).html_create_link(self.path, style=None)
321
323
  return f"{parent_html}<br>{link}"
322
-
324
+
323
325
  def download_link(self):
324
326
  from .construct_download_link import ConstructDownloadLink
327
+
325
328
  return ConstructDownloadLink(self).html_create_link(self.path, style=None)
326
329
 
327
330
  def encode_file_to_base64_string(self, file_path: str):
@@ -572,6 +575,53 @@ class FileStore(Scenario):
572
575
  f"Converting {self.suffix} files to pandas DataFrame is not supported"
573
576
  )
574
577
 
578
+ def is_image(self) -> bool:
579
+ """
580
+ Check if the file is an image by examining its MIME type.
581
+
582
+ Returns:
583
+ bool: True if the file is an image, False otherwise.
584
+
585
+ Examples:
586
+ >>> fs = FileStore.example("png")
587
+ >>> fs.is_image()
588
+ True
589
+ >>> fs = FileStore.example("txt")
590
+ >>> fs.is_image()
591
+ False
592
+ """
593
+ # Check if the mime type starts with 'image/'
594
+ return self.mime_type.startswith("image/")
595
+
596
+ def get_image_dimensions(self) -> tuple:
597
+ """
598
+ Get the dimensions (width, height) of an image file.
599
+
600
+ Returns:
601
+ tuple: A tuple containing the width and height of the image.
602
+
603
+ Raises:
604
+ ValueError: If the file is not an image or PIL is not installed.
605
+
606
+ Examples:
607
+ >>> fs = FileStore.example("png")
608
+ >>> width, height = fs.get_image_dimensions()
609
+ >>> isinstance(width, int) and isinstance(height, int)
610
+ True
611
+ """
612
+ if not self.is_image():
613
+ raise ValueError("This file is not an image")
614
+
615
+ try:
616
+ from PIL import Image
617
+ except ImportError:
618
+ raise ImportError(
619
+ "PIL (Pillow) is required to get image dimensions. Install it with: pip install pillow"
620
+ )
621
+
622
+ with Image.open(self.path) as img:
623
+ return img.size # Returns (width, height)
624
+
575
625
  def __getattr__(self, name):
576
626
  """
577
627
  Delegate pandas DataFrame methods to the underlying DataFrame if this is a CSV file
@@ -662,13 +712,13 @@ class FileStore(Scenario):
662
712
  # endobj
663
713
  # xref
664
714
  # 0 7
665
- # 0000000000 65535 f
666
- # 0000000010 00000 n
667
- # 0000000053 00000 n
668
- # 0000000100 00000 n
669
- # 0000000173 00000 n
670
- # 0000000232 00000 n
671
- # 0000000272 00000 n
715
+ # 0000000000 65535 f
716
+ # 0000000010 00000 n
717
+ # 0000000053 00000 n
718
+ # 0000000100 00000 n
719
+ # 0000000173 00000 n
720
+ # 0000000232 00000 n
721
+ # 0000000272 00000 n
672
722
  # trailer
673
723
  # << /Size 7 /Root 1 0 R >>
674
724
  # startxref
@@ -748,6 +798,7 @@ class FileStore(Scenario):
748
798
 
749
799
  if __name__ == "__main__":
750
800
  import doctest
801
+
751
802
  doctest.testmod()
752
803
 
753
804
  # formats = FileMethods.supported_file_types()
@@ -756,4 +807,3 @@ if __name__ == "__main__":
756
807
  # fs = FileStore.example(file_type)
757
808
  # fs.view()
758
809
  # input("Press Enter to continue...")
759
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: edsl
3
- Version: 0.1.52
3
+ Version: 0.1.53
4
4
  Summary: Create and analyze LLM-based surveys
5
5
  Home-page: https://www.expectedparrot.com/
6
6
  License: MIT
@@ -1,5 +1,5 @@
1
1
  edsl/__init__.py,sha256=SXi_Zm4kf6H2WW_YeTuF6zRNZEWKzpKa7NRXUzn2Ty4,4593
2
- edsl/__version__.py,sha256=xEFLBJ_IHewFRxpAOMcKAhQVPVf06lsExSa2472N2G8,23
2
+ edsl/__version__.py,sha256=FdQ5_-vfyHXSNAuzQXtxxDH2WjNJ3g581mH_zvly6Xo,23
3
3
  edsl/agents/__init__.py,sha256=AyhfXjygRHT1Pd9w16lcu5Bu0jnBmMPz86aKP1uRL3Y,93
4
4
  edsl/agents/agent.py,sha256=svTVvvg9eCMUhnb49Bxsf9nAwXragtRaeBkyB6q89EE,54423
5
5
  edsl/agents/agent_list.py,sha256=JA39_6RSmiD2mqJgWr2NWovNxNmu4mhZbYmn5be87NQ,21572
@@ -107,12 +107,12 @@ edsl/interviews/interview_status_dictionary.py,sha256=0ZvXLusfOA8xD_Fco4PjEBGwmR
107
107
  edsl/interviews/interview_status_enum.py,sha256=KJ-1yLAHdX-p8TiFnM0M3v1tnBwkq4aMCuBX6-ytrI8,229
108
108
  edsl/interviews/interview_status_log.py,sha256=sRiQ9kIT1WcF-8beETn6E7IsdRRrfbco-yjdAjkXncw,3587
109
109
  edsl/interviews/interview_task_manager.py,sha256=wPi5izhsVK5wI5HfMXMLL5NIoucHNCoGXfRuRzI-wYE,3665
110
- edsl/interviews/request_token_estimator.py,sha256=VATjVBcFyEyc9fhqySo1jIRm5bI8l3lSmX3_N-t8W3Y,1359
110
+ edsl/interviews/request_token_estimator.py,sha256=n_C-alSYOFi27cBcIRhtBX-fvklDcvM2Kowte-EDnzM,4833
111
111
  edsl/interviews/statistics.py,sha256=lZCtq79QrDKG3jXao_OWuBRhnly9VyuhM6IdTJaYqPg,2461
112
112
  edsl/invigilators/__init__.py,sha256=fKbZ7p9-kMelpvET3Ku2Owu-tL_apC-8gi9JychpMBY,1843
113
113
  edsl/invigilators/exceptions.py,sha256=ejoF-Gt-YcnW1yHyfpJ3jZm8AC_zD0GCYafRO2LlAMQ,2767
114
114
  edsl/invigilators/invigilator_base.py,sha256=DgrXTK4AAxXr4wg2pzc0p1aGPPf1UUt01C-JW1UBTvo,20099
115
- edsl/invigilators/invigilators.py,sha256=dc_H4WptOKzAaHiKBeW-FFBOB1ULVO-xamtjner_xGY,22005
115
+ edsl/invigilators/invigilators.py,sha256=UH8gy59qq0_f9jzumDbdugF0SvGW_eIr2GT5zCUO8V0,22355
116
116
  edsl/invigilators/prompt_constructor.py,sha256=THHGcZPI-QUOH8Z9cQEzH7bZEoo0V_Nc_Phlhc9AzL0,19115
117
117
  edsl/invigilators/prompt_helpers.py,sha256=LuMZFZkInPY8M7Rw9fG9rpJIcT89tr2_Iq10ZHH_Y4A,5409
118
118
  edsl/invigilators/question_instructions_prompt_builder.py,sha256=E5zpwctpt_5JjONkZRcMwB0MACAzDvvnzUhmuWTnjd0,9684
@@ -276,7 +276,7 @@ edsl/scenarios/directory_scanner.py,sha256=gnDXU1jKSjSE3LXEhE7ilfJUL_sxK2HHmsA2L
276
276
  edsl/scenarios/document_chunker.py,sha256=EpB0V0oxLzpKntl00Qa3VZNPS7sg9aXdYyqKxhFFzTM,7680
277
277
  edsl/scenarios/exceptions.py,sha256=FeORBm90UthKHDp7cE8I7KJgyA3-pFKNpoivZRr8ifc,10636
278
278
  edsl/scenarios/file_methods.py,sha256=cB_IPVTGz4_yJiRMTdNTvpW4l43lrTbyJOV3Pnm6UPs,2631
279
- edsl/scenarios/file_store.py,sha256=Zb9hA2i9jrDCONjcptWYj0dkSCAzNyL8EnHbNT3dtcE,26704
279
+ edsl/scenarios/file_store.py,sha256=slqSIENW6SP1dhnXTviq4umlvGHeYsDB3SM24t0ll_I,28033
280
280
  edsl/scenarios/handlers/__init__.py,sha256=9r1fDjUviGXso9h4d05wG9RECfqzfps55CQgb-ojCBo,848
281
281
  edsl/scenarios/handlers/csv_file_store.py,sha256=kXOms0ph5JJj6jSbpfQ-SZjuT4vvSRhq5AGpv1L4TPQ,1369
282
282
  edsl/scenarios/handlers/docx_file_store.py,sha256=KSKAAUIWF2K5xr92nx7UGQ9djgtDX4ke-Eyik8QAdlQ,2155
@@ -358,8 +358,8 @@ edsl/utilities/repair_functions.py,sha256=EXkXsqnmgPqj9b3dff1cZnJyaZw-qEvGENXCRH
358
358
  edsl/utilities/restricted_python.py,sha256=248N2p5EWHDSpcK1G-q7DUoJeWy4sB6aO-RV0-5O7uY,2038
359
359
  edsl/utilities/template_loader.py,sha256=SCAcnTnxNQ67MNSkmfz7F-S_u2peyGn2j1oRIqi1wfg,870
360
360
  edsl/utilities/utilities.py,sha256=irHheAGOnl_6RwI--Hi9StVzvsHcWCqB48PWsWJQYOw,12045
361
- edsl-0.1.52.dist-info/LICENSE,sha256=_qszBDs8KHShVYcYzdMz3HNMtH-fKN_p5zjoVAVumFc,1111
362
- edsl-0.1.52.dist-info/METADATA,sha256=18LEADuuU0qmUcarS9CorkQL7Fh59xC1D842ahXazFk,12670
363
- edsl-0.1.52.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
364
- edsl-0.1.52.dist-info/entry_points.txt,sha256=JnG7xqMtHaQu9BU-yPATxdyCeA48XJpuclnWCqMfIMU,38
365
- edsl-0.1.52.dist-info/RECORD,,
361
+ edsl-0.1.53.dist-info/LICENSE,sha256=_qszBDs8KHShVYcYzdMz3HNMtH-fKN_p5zjoVAVumFc,1111
362
+ edsl-0.1.53.dist-info/METADATA,sha256=WUkrcqMnf8uKQvTyrQTx0q3oVFDurmcI3HkXz9Ljk6g,12670
363
+ edsl-0.1.53.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
364
+ edsl-0.1.53.dist-info/entry_points.txt,sha256=JnG7xqMtHaQu9BU-yPATxdyCeA48XJpuclnWCqMfIMU,38
365
+ edsl-0.1.53.dist-info/RECORD,,
File without changes
File without changes