sdkrouter 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. sdkrouter/__init__.py +110 -0
  2. sdkrouter/_api/__init__.py +28 -0
  3. sdkrouter/_api/client.py +204 -0
  4. sdkrouter/_api/generated/__init__.py +21 -0
  5. sdkrouter/_api/generated/cdn/__init__.py +209 -0
  6. sdkrouter/_api/generated/cdn/cdn__api__cdn/__init__.py +7 -0
  7. sdkrouter/_api/generated/cdn/cdn__api__cdn/client.py +133 -0
  8. sdkrouter/_api/generated/cdn/cdn__api__cdn/models.py +163 -0
  9. sdkrouter/_api/generated/cdn/cdn__api__cdn/sync_client.py +132 -0
  10. sdkrouter/_api/generated/cdn/client.py +75 -0
  11. sdkrouter/_api/generated/cdn/logger.py +256 -0
  12. sdkrouter/_api/generated/cdn/pyproject.toml +55 -0
  13. sdkrouter/_api/generated/cdn/retry.py +272 -0
  14. sdkrouter/_api/generated/cdn/sync_client.py +58 -0
  15. sdkrouter/_api/generated/cleaner/__init__.py +212 -0
  16. sdkrouter/_api/generated/cleaner/cleaner__api__cleaner/__init__.py +7 -0
  17. sdkrouter/_api/generated/cleaner/cleaner__api__cleaner/client.py +83 -0
  18. sdkrouter/_api/generated/cleaner/cleaner__api__cleaner/models.py +117 -0
  19. sdkrouter/_api/generated/cleaner/cleaner__api__cleaner/sync_client.py +82 -0
  20. sdkrouter/_api/generated/cleaner/client.py +75 -0
  21. sdkrouter/_api/generated/cleaner/enums.py +55 -0
  22. sdkrouter/_api/generated/cleaner/logger.py +256 -0
  23. sdkrouter/_api/generated/cleaner/pyproject.toml +55 -0
  24. sdkrouter/_api/generated/cleaner/retry.py +272 -0
  25. sdkrouter/_api/generated/cleaner/sync_client.py +58 -0
  26. sdkrouter/_api/generated/keys/__init__.py +212 -0
  27. sdkrouter/_api/generated/keys/client.py +75 -0
  28. sdkrouter/_api/generated/keys/enums.py +64 -0
  29. sdkrouter/_api/generated/keys/keys__api__keys/__init__.py +7 -0
  30. sdkrouter/_api/generated/keys/keys__api__keys/client.py +150 -0
  31. sdkrouter/_api/generated/keys/keys__api__keys/models.py +152 -0
  32. sdkrouter/_api/generated/keys/keys__api__keys/sync_client.py +149 -0
  33. sdkrouter/_api/generated/keys/logger.py +256 -0
  34. sdkrouter/_api/generated/keys/pyproject.toml +55 -0
  35. sdkrouter/_api/generated/keys/retry.py +272 -0
  36. sdkrouter/_api/generated/keys/sync_client.py +58 -0
  37. sdkrouter/_api/generated/models/__init__.py +209 -0
  38. sdkrouter/_api/generated/models/client.py +75 -0
  39. sdkrouter/_api/generated/models/logger.py +256 -0
  40. sdkrouter/_api/generated/models/models__api__llm_models/__init__.py +7 -0
  41. sdkrouter/_api/generated/models/models__api__llm_models/client.py +99 -0
  42. sdkrouter/_api/generated/models/models__api__llm_models/models.py +206 -0
  43. sdkrouter/_api/generated/models/models__api__llm_models/sync_client.py +99 -0
  44. sdkrouter/_api/generated/models/pyproject.toml +55 -0
  45. sdkrouter/_api/generated/models/retry.py +272 -0
  46. sdkrouter/_api/generated/models/sync_client.py +58 -0
  47. sdkrouter/_api/generated/shortlinks/__init__.py +209 -0
  48. sdkrouter/_api/generated/shortlinks/client.py +75 -0
  49. sdkrouter/_api/generated/shortlinks/logger.py +256 -0
  50. sdkrouter/_api/generated/shortlinks/pyproject.toml +55 -0
  51. sdkrouter/_api/generated/shortlinks/retry.py +272 -0
  52. sdkrouter/_api/generated/shortlinks/shortlinks__api__shortlinks/__init__.py +7 -0
  53. sdkrouter/_api/generated/shortlinks/shortlinks__api__shortlinks/client.py +137 -0
  54. sdkrouter/_api/generated/shortlinks/shortlinks__api__shortlinks/models.py +153 -0
  55. sdkrouter/_api/generated/shortlinks/shortlinks__api__shortlinks/sync_client.py +136 -0
  56. sdkrouter/_api/generated/shortlinks/sync_client.py +58 -0
  57. sdkrouter/_api/generated/vision/__init__.py +212 -0
  58. sdkrouter/_api/generated/vision/client.py +75 -0
  59. sdkrouter/_api/generated/vision/enums.py +40 -0
  60. sdkrouter/_api/generated/vision/logger.py +256 -0
  61. sdkrouter/_api/generated/vision/pyproject.toml +55 -0
  62. sdkrouter/_api/generated/vision/retry.py +272 -0
  63. sdkrouter/_api/generated/vision/sync_client.py +58 -0
  64. sdkrouter/_api/generated/vision/vision__api__vision/__init__.py +7 -0
  65. sdkrouter/_api/generated/vision/vision__api__vision/client.py +65 -0
  66. sdkrouter/_api/generated/vision/vision__api__vision/models.py +138 -0
  67. sdkrouter/_api/generated/vision/vision__api__vision/sync_client.py +65 -0
  68. sdkrouter/_client.py +432 -0
  69. sdkrouter/_config.py +74 -0
  70. sdkrouter/_constants.py +21 -0
  71. sdkrouter/_internal/__init__.py +1 -0
  72. sdkrouter/_types/__init__.py +30 -0
  73. sdkrouter/_types/cdn.py +27 -0
  74. sdkrouter/_types/models.py +26 -0
  75. sdkrouter/_types/ocr.py +24 -0
  76. sdkrouter/_types/parsed.py +101 -0
  77. sdkrouter/_types/shortlinks.py +27 -0
  78. sdkrouter/_types/vision.py +29 -0
  79. sdkrouter/_version.py +3 -0
  80. sdkrouter/helpers/__init__.py +13 -0
  81. sdkrouter/helpers/formatting.py +15 -0
  82. sdkrouter/helpers/html.py +100 -0
  83. sdkrouter/helpers/json_cleaner.py +53 -0
  84. sdkrouter/tools/__init__.py +129 -0
  85. sdkrouter/tools/cdn.py +285 -0
  86. sdkrouter/tools/cleaner.py +186 -0
  87. sdkrouter/tools/keys.py +215 -0
  88. sdkrouter/tools/models.py +196 -0
  89. sdkrouter/tools/shortlinks.py +165 -0
  90. sdkrouter/tools/vision.py +173 -0
  91. sdkrouter/utils/__init__.py +27 -0
  92. sdkrouter/utils/parsing.py +109 -0
  93. sdkrouter/utils/tokens.py +375 -0
  94. sdkrouter-0.1.1.dist-info/METADATA +411 -0
  95. sdkrouter-0.1.1.dist-info/RECORD +96 -0
  96. sdkrouter-0.1.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,375 @@
1
+ """
2
+ Token counting utilities for LLM models.
3
+
4
+ Provides:
5
+ - Text token counting using tiktoken
6
+ - Image token estimation for vision models (OpenAI formula)
7
+ - Message-level token counting for chat completions
8
+ """
9
+
10
+ import math
11
+ from typing import Any, Dict, List, Literal, Optional, Tuple
12
+
13
+ import tiktoken
14
+
15
+
16
+ # ============================================================================
17
+ # Text Token Counting
18
+ # ============================================================================
19
+
20
+ # Model to encoding mapping
21
+ MODEL_ENCODINGS: Dict[str, str] = {
22
+ # GPT-4 family
23
+ "gpt-4": "cl100k_base",
24
+ "gpt-4o": "cl100k_base",
25
+ "gpt-4o-mini": "cl100k_base",
26
+ "gpt-4-turbo": "cl100k_base",
27
+ "gpt-4-vision": "cl100k_base",
28
+ # GPT-3.5 family
29
+ "gpt-3.5-turbo": "cl100k_base",
30
+ "gpt-3.5": "cl100k_base",
31
+ # Claude family (use cl100k_base as approximation)
32
+ "claude": "cl100k_base",
33
+ "claude-3": "cl100k_base",
34
+ "claude-3.5": "cl100k_base",
35
+ # Llama family
36
+ "llama": "cl100k_base",
37
+ "llama-3": "cl100k_base",
38
+ # Default
39
+ "default": "cl100k_base",
40
+ }
41
+
42
+ # Message overhead tokens per message
43
+ MESSAGE_OVERHEAD_TOKENS = 4
44
+
45
+
46
+ class TokenCounter:
47
+ """
48
+ Token counting utility using tiktoken.
49
+
50
+ Supports GPT-4, GPT-3.5, Claude (approximation), and other models.
51
+ Caches encoders for performance.
52
+
53
+ Example:
54
+ counter = TokenCounter()
55
+ tokens = counter.count("Hello, world!", model="gpt-4o")
56
+
57
+ messages = [{"role": "user", "content": "Hi"}]
58
+ total = counter.count_messages(messages, model="gpt-4o")
59
+ """
60
+
61
+ def __init__(self):
62
+ """Initialize tokenizer with encoder cache."""
63
+ self._encoders: Dict[str, object] = {}
64
+
65
+ def _get_encoding_name(self, model: str) -> str:
66
+ """Get tiktoken encoding name for model."""
67
+ model_lower = model.lower()
68
+
69
+ # Check exact matches first
70
+ for pattern, encoding in MODEL_ENCODINGS.items():
71
+ if pattern in model_lower:
72
+ return encoding
73
+
74
+ return MODEL_ENCODINGS["default"]
75
+
76
+ def _get_encoder(self, model: str) -> Any:
77
+ """Get tiktoken encoder for model (cached)."""
78
+ if model not in self._encoders:
79
+ encoding_name = self._get_encoding_name(model)
80
+ self._encoders[model] = tiktoken.get_encoding(encoding_name)
81
+
82
+ return self._encoders[model]
83
+
84
+ def count(self, text: str, model: str = "gpt-4o") -> int:
85
+ """
86
+ Count tokens in text.
87
+
88
+ Args:
89
+ text: Text to count tokens for
90
+ model: Model name for encoding selection
91
+
92
+ Returns:
93
+ Number of tokens
94
+ """
95
+ if not text:
96
+ return 0
97
+
98
+ encoder = self._get_encoder(model)
99
+
100
+ if encoder:
101
+ return len(encoder.encode(text))
102
+
103
+ # Fallback: approximate as ~4 chars per token
104
+ return len(text) // 4 + 1
105
+
106
+ def count_messages(
107
+ self,
108
+ messages: List[Dict[str, str]],
109
+ model: str = "gpt-4o"
110
+ ) -> int:
111
+ """
112
+ Count total tokens in chat messages.
113
+
114
+ Args:
115
+ messages: List of chat messages with 'role' and 'content'
116
+ model: Model name for encoding selection
117
+
118
+ Returns:
119
+ Total number of tokens including message overhead
120
+ """
121
+ total_tokens = 0
122
+
123
+ for message in messages:
124
+ role = message.get("role", "user")
125
+ content = message.get("content", "")
126
+
127
+ # Count role and content tokens
128
+ total_tokens += self.count(role, model)
129
+ total_tokens += self.count(content, model)
130
+
131
+ # Add message overhead
132
+ total_tokens += MESSAGE_OVERHEAD_TOKENS
133
+
134
+ return total_tokens
135
+
136
+ def encode(self, text: str, model: str = "gpt-4o") -> List[int]:
137
+ """
138
+ Encode text to token IDs.
139
+
140
+ Args:
141
+ text: Text to encode
142
+ model: Model name for encoding selection
143
+
144
+ Returns:
145
+ List of token IDs
146
+ """
147
+ encoder = self._get_encoder(model)
148
+
149
+ if encoder:
150
+ return encoder.encode(text)
151
+
152
+ # Fallback: return empty list
153
+ return []
154
+
155
+ def decode(self, tokens: List[int], model: str = "gpt-4o") -> str:
156
+ """
157
+ Decode token IDs to text.
158
+
159
+ Args:
160
+ tokens: List of token IDs
161
+ model: Model name for encoding selection
162
+
163
+ Returns:
164
+ Decoded text
165
+ """
166
+ encoder = self._get_encoder(model)
167
+
168
+ if encoder:
169
+ return encoder.decode(tokens)
170
+
171
+ return ""
172
+
173
+
174
+ # ============================================================================
175
+ # Image Token Estimation (OpenAI Vision formula)
176
+ # ============================================================================
177
+
178
+ DetailMode = Literal["low", "high", "auto"]
179
+
180
+ # Token constants (OpenAI formula)
181
+ LOW_DETAIL_TOKENS = 85
182
+ HIGH_DETAIL_BASE_TOKENS = 85
183
+ HIGH_DETAIL_TILE_TOKENS = 170
184
+ TILE_SIZE = 512
185
+
186
+ # Image size limits
187
+ MAX_DIMENSION = 2048
188
+ SHORT_SIDE_TARGET = 768
189
+
190
+
191
+ def _scale_image_dimensions(width: int, height: int) -> Tuple[int, int]:
192
+ """
193
+ Scale image dimensions according to OpenAI processing rules.
194
+
195
+ 1. Scale down if larger than 2048 on any side
196
+ 2. Scale to fit 768px on shortest side
197
+
198
+ Args:
199
+ width: Original width
200
+ height: Original height
201
+
202
+ Returns:
203
+ Tuple of (scaled_width, scaled_height)
204
+ """
205
+ # Step 1: Scale down if larger than 2048
206
+ max_dim = max(width, height)
207
+ if max_dim > MAX_DIMENSION:
208
+ scale = MAX_DIMENSION / max_dim
209
+ width = int(width * scale)
210
+ height = int(height * scale)
211
+
212
+ # Step 2: Scale to fit 768px on shortest side
213
+ min_dim = min(width, height)
214
+ if min_dim > SHORT_SIDE_TARGET:
215
+ scale = SHORT_SIDE_TARGET / min_dim
216
+ width = int(width * scale)
217
+ height = int(height * scale)
218
+
219
+ return width, height
220
+
221
+
222
+ def estimate_image_tokens(
223
+ width: int = 1024,
224
+ height: int = 1024,
225
+ detail: DetailMode = "high",
226
+ ) -> int:
227
+ """
228
+ Estimate tokens for image based on OpenAI formula.
229
+
230
+ Low detail: 85 tokens fixed
231
+ High detail: 170 tokens per 512x512 tile + 85 base
232
+
233
+ Args:
234
+ width: Image width in pixels
235
+ height: Image height in pixels
236
+ detail: Detail mode (low/high/auto)
237
+
238
+ Returns:
239
+ Estimated token count
240
+
241
+ Example:
242
+ tokens = estimate_image_tokens(1024, 768, "high")
243
+ print(f"Image will use ~{tokens} tokens")
244
+ """
245
+ # Auto mode: use high for larger images
246
+ if detail == "auto":
247
+ detail = "high" if max(width, height) > 512 else "low"
248
+
249
+ if detail == "low":
250
+ return LOW_DETAIL_TOKENS
251
+
252
+ # High detail processing
253
+ scaled_width, scaled_height = _scale_image_dimensions(width, height)
254
+
255
+ # Count 512x512 tiles
256
+ tiles_x = math.ceil(scaled_width / TILE_SIZE)
257
+ tiles_y = math.ceil(scaled_height / TILE_SIZE)
258
+
259
+ return HIGH_DETAIL_BASE_TOKENS + (HIGH_DETAIL_TILE_TOKENS * tiles_x * tiles_y)
260
+
261
+
262
+ def get_tile_count(width: int, height: int) -> Tuple[int, int]:
263
+ """
264
+ Get number of 512x512 tiles for image.
265
+
266
+ Args:
267
+ width: Image width
268
+ height: Image height
269
+
270
+ Returns:
271
+ Tuple of (tiles_x, tiles_y)
272
+ """
273
+ scaled_width, scaled_height = _scale_image_dimensions(width, height)
274
+ tiles_x = math.ceil(scaled_width / TILE_SIZE)
275
+ tiles_y = math.ceil(scaled_height / TILE_SIZE)
276
+ return tiles_x, tiles_y
277
+
278
+
279
+ def get_optimal_detail_mode(
280
+ width: int,
281
+ height: int,
282
+ max_tokens: Optional[int] = None,
283
+ ) -> DetailMode:
284
+ """
285
+ Determine optimal detail mode based on image size and token budget.
286
+
287
+ Args:
288
+ width: Image width
289
+ height: Image height
290
+ max_tokens: Optional maximum token budget for image
291
+
292
+ Returns:
293
+ Recommended detail mode ('low' or 'high')
294
+
295
+ Example:
296
+ mode = get_optimal_detail_mode(2048, 1536, max_tokens=500)
297
+ # Returns 'low' if high detail would exceed 500 tokens
298
+ """
299
+ high_tokens = estimate_image_tokens(width, height, "high")
300
+
301
+ # If max_tokens specified and high would exceed it, use low
302
+ if max_tokens and high_tokens > max_tokens:
303
+ return "low"
304
+
305
+ # For small images, low detail is sufficient
306
+ if max(width, height) <= 512:
307
+ return "low"
308
+
309
+ return "high"
310
+
311
+
312
+ # ============================================================================
313
+ # Convenience Functions
314
+ # ============================================================================
315
+
316
+ # Global tokenizer instance
317
+ _tokenizer: Optional[TokenCounter] = None
318
+
319
+
320
+ def _get_tokenizer() -> TokenCounter:
321
+ """Get global tokenizer instance."""
322
+ global _tokenizer
323
+ if _tokenizer is None:
324
+ _tokenizer = TokenCounter()
325
+ return _tokenizer
326
+
327
+
328
+ def count_tokens(text: str, model: str = "gpt-4o") -> int:
329
+ """
330
+ Count tokens in text (convenience function).
331
+
332
+ Args:
333
+ text: Text to count tokens for
334
+ model: Model name for encoding selection
335
+
336
+ Returns:
337
+ Number of tokens
338
+ """
339
+ return _get_tokenizer().count(text, model)
340
+
341
+
342
+ def count_messages_tokens(
343
+ messages: List[Dict[str, str]],
344
+ model: str = "gpt-4o"
345
+ ) -> int:
346
+ """
347
+ Count total tokens in chat messages (convenience function).
348
+
349
+ Args:
350
+ messages: List of chat messages
351
+ model: Model name for encoding selection
352
+
353
+ Returns:
354
+ Total number of tokens
355
+ """
356
+ return _get_tokenizer().count_messages(messages, model)
357
+
358
+
359
+ __all__ = [
360
+ # Classes
361
+ "TokenCounter",
362
+ # Text token functions
363
+ "count_tokens",
364
+ "count_messages_tokens",
365
+ # Image token functions
366
+ "estimate_image_tokens",
367
+ "get_tile_count",
368
+ "get_optimal_detail_mode",
369
+ # Types
370
+ "DetailMode",
371
+ # Constants
372
+ "LOW_DETAIL_TOKENS",
373
+ "HIGH_DETAIL_BASE_TOKENS",
374
+ "HIGH_DETAIL_TILE_TOKENS",
375
+ ]