payi 0.1.0a83__py3-none-any.whl → 0.1.0a84__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of payi might be problematic. Click here for more details.
- payi/_version.py +1 -1
- payi/lib/AnthropicInstrumentor.py +90 -67
- payi/lib/BedrockInstrumentor.py +82 -100
- payi/lib/GoogleGenAiInstrumentor.py +25 -111
- payi/lib/VertexInstrumentor.py +131 -98
- payi/lib/instrument.py +16 -9
- payi/resources/categories/__init__.py +14 -0
- payi/resources/categories/categories.py +32 -0
- payi/resources/categories/fixed_cost_resources.py +196 -0
- payi/resources/ingest.py +14 -0
- payi/resources/limits/limits.py +4 -0
- payi/types/categories/__init__.py +1 -0
- payi/types/categories/fixed_cost_resource_create_params.py +21 -0
- payi/types/ingest_event_param.py +13 -1
- payi/types/ingest_units_params.py +11 -1
- payi/types/limit_create_params.py +2 -0
- payi/types/limit_history_response.py +3 -3
- {payi-0.1.0a83.dist-info → payi-0.1.0a84.dist-info}/METADATA +1 -1
- {payi-0.1.0a83.dist-info → payi-0.1.0a84.dist-info}/RECORD +21 -19
- {payi-0.1.0a83.dist-info → payi-0.1.0a84.dist-info}/WHEEL +0 -0
- {payi-0.1.0a83.dist-info → payi-0.1.0a84.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import math
|
|
3
2
|
from typing import Any, List, Union, Optional, Sequence
|
|
4
3
|
from typing_extensions import override
|
|
5
4
|
|
|
6
5
|
from wrapt import wrap_function_wrapper # type: ignore
|
|
7
6
|
|
|
8
7
|
from payi.lib.helpers import PayiCategories
|
|
9
|
-
from payi.types.ingest_units_params import Units
|
|
10
8
|
|
|
11
9
|
from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
12
10
|
|
|
@@ -115,9 +113,6 @@ async def agenerate_stream_wrapper(
|
|
|
115
113
|
kwargs,
|
|
116
114
|
)
|
|
117
115
|
|
|
118
|
-
def count_chars_skip_spaces(text: str) -> int:
|
|
119
|
-
return sum(1 for c in text if not c.isspace())
|
|
120
|
-
|
|
121
116
|
class _GoogleGenAiRequest(_ProviderRequest):
|
|
122
117
|
def __init__(self, instrumentor: _PayiInstrumentor):
|
|
123
118
|
super().__init__(
|
|
@@ -126,7 +121,7 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
126
121
|
streaming_type=_StreamingType.generator,
|
|
127
122
|
)
|
|
128
123
|
self._prompt_character_count = 0
|
|
129
|
-
self.
|
|
124
|
+
self._candidates_character_count = 0
|
|
130
125
|
|
|
131
126
|
@override
|
|
132
127
|
def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
|
|
@@ -158,6 +153,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
158
153
|
if isinstance(value, list):
|
|
159
154
|
items = value # type: ignore
|
|
160
155
|
|
|
156
|
+
from .VertexInstrumentor import count_chars_skip_spaces
|
|
157
|
+
|
|
161
158
|
for item in items: # type: ignore
|
|
162
159
|
text = ""
|
|
163
160
|
if isinstance(item, Part):
|
|
@@ -249,6 +246,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
249
246
|
|
|
250
247
|
@override
|
|
251
248
|
def process_chunk(self, chunk: Any) -> _ChunkResult:
|
|
249
|
+
from .VertexInstrumentor import vertex_compute_usage, count_chars_skip_spaces
|
|
250
|
+
|
|
252
251
|
ingest = False
|
|
253
252
|
response_dict: dict[str, Any] = chunk.to_json_dict()
|
|
254
253
|
if "provider_response_id" not in self._ingest:
|
|
@@ -260,22 +259,25 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
260
259
|
|
|
261
260
|
self._ingest["resource"] = "google." + model
|
|
262
261
|
|
|
262
|
+
|
|
263
263
|
for candidate in response_dict.get("candidates", []):
|
|
264
264
|
parts = candidate.get("content", {}).get("parts", [])
|
|
265
265
|
for part in parts:
|
|
266
|
-
self.
|
|
266
|
+
self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
|
|
267
267
|
|
|
268
268
|
usage = response_dict.get("usage_metadata", {})
|
|
269
269
|
if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
|
|
270
|
-
|
|
270
|
+
vertex_compute_usage(
|
|
271
|
+
request=self,
|
|
272
|
+
model=model,
|
|
273
|
+
response_dict=response_dict,
|
|
274
|
+
prompt_character_count=self._prompt_character_count,
|
|
275
|
+
streaming_candidates_characters=self._candidates_character_count
|
|
276
|
+
)
|
|
271
277
|
ingest = True
|
|
272
278
|
|
|
273
279
|
return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
|
|
274
280
|
|
|
275
|
-
@staticmethod
|
|
276
|
-
def _is_character_billing_model(model: str) -> bool:
|
|
277
|
-
return model.startswith("gemini-1.")
|
|
278
|
-
|
|
279
281
|
@override
|
|
280
282
|
def process_synchronous_response(
|
|
281
283
|
self,
|
|
@@ -284,6 +286,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
284
286
|
kwargs: Any) -> Any:
|
|
285
287
|
response_dict = response.to_json_dict()
|
|
286
288
|
|
|
289
|
+
from .VertexInstrumentor import vertex_compute_usage
|
|
290
|
+
|
|
287
291
|
id: Optional[str] = response_dict.get("response_id", None)
|
|
288
292
|
if id:
|
|
289
293
|
self._ingest["provider_response_id"] = id
|
|
@@ -292,105 +296,15 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
292
296
|
if model:
|
|
293
297
|
self._ingest["resource"] = "google." + model
|
|
294
298
|
|
|
295
|
-
|
|
296
|
-
|
|
299
|
+
vertex_compute_usage(
|
|
300
|
+
request=self,
|
|
301
|
+
model=model,
|
|
302
|
+
response_dict=response_dict,
|
|
303
|
+
prompt_character_count=self._prompt_character_count,
|
|
304
|
+
streaming_candidates_characters=self._candidates_character_count
|
|
305
|
+
)
|
|
306
|
+
|
|
297
307
|
if log_prompt_and_response:
|
|
298
308
|
self._ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
299
309
|
|
|
300
|
-
return None
|
|
301
|
-
|
|
302
|
-
def add_units(self, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
303
|
-
if key not in self._ingest["units"]:
|
|
304
|
-
self._ingest["units"][key] = {}
|
|
305
|
-
if input is not None:
|
|
306
|
-
self._ingest["units"][key]["input"] = input
|
|
307
|
-
if output is not None:
|
|
308
|
-
self._ingest["units"][key]["output"] = output
|
|
309
|
-
|
|
310
|
-
def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
|
|
311
|
-
usage = response_dict.get("usage_metadata", {})
|
|
312
|
-
input = usage.get("prompt_token_count", 0)
|
|
313
|
-
|
|
314
|
-
prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
|
|
315
|
-
candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
|
|
316
|
-
|
|
317
|
-
model: str = response_dict.get("model_version", "")
|
|
318
|
-
|
|
319
|
-
# for character billing only
|
|
320
|
-
large_context = "" if input < 128000 else "_large_context"
|
|
321
|
-
|
|
322
|
-
if self._is_character_billing_model(model):
|
|
323
|
-
for details in prompt_tokens_details:
|
|
324
|
-
modality = details.get("modality", "")
|
|
325
|
-
if not modality:
|
|
326
|
-
continue
|
|
327
|
-
|
|
328
|
-
modality_token_count = details.get("token_count", 0)
|
|
329
|
-
if modality == "TEXT":
|
|
330
|
-
input = self._prompt_character_count
|
|
331
|
-
if input == 0:
|
|
332
|
-
# back up calc if nothing was calculated from the prompt
|
|
333
|
-
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
334
|
-
|
|
335
|
-
output = 0
|
|
336
|
-
if streaming_candidates_characters is None:
|
|
337
|
-
for candidate in response_dict.get("candidates", []):
|
|
338
|
-
parts = candidate.get("content", {}).get("parts", [])
|
|
339
|
-
for part in parts:
|
|
340
|
-
output += count_chars_skip_spaces(part.get("text", ""))
|
|
341
|
-
|
|
342
|
-
if output == 0:
|
|
343
|
-
# back up calc if no parts
|
|
344
|
-
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
345
|
-
else:
|
|
346
|
-
output = streaming_candidates_characters
|
|
347
|
-
|
|
348
|
-
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
349
|
-
|
|
350
|
-
elif modality == "IMAGE":
|
|
351
|
-
num_images = math.ceil(modality_token_count / 258)
|
|
352
|
-
self.add_units("vision"+large_context, input=num_images)
|
|
353
|
-
|
|
354
|
-
elif modality == "VIDEO":
|
|
355
|
-
video_seconds = math.ceil(modality_token_count / 285)
|
|
356
|
-
self.add_units("video"+large_context, input=video_seconds)
|
|
357
|
-
|
|
358
|
-
elif modality == "AUDIO":
|
|
359
|
-
audio_seconds = math.ceil(modality_token_count / 25)
|
|
360
|
-
self.add_units("audio"+large_context, input=audio_seconds)
|
|
361
|
-
|
|
362
|
-
else:
|
|
363
|
-
for details in prompt_tokens_details:
|
|
364
|
-
modality = details.get("modality", "")
|
|
365
|
-
if not modality:
|
|
366
|
-
continue
|
|
367
|
-
|
|
368
|
-
modality_token_count = details.get("token_count", 0)
|
|
369
|
-
if modality == "IMAGE":
|
|
370
|
-
self.add_units("vision", input=modality_token_count)
|
|
371
|
-
elif modality in ("VIDEO", "AUDIO", "TEXT"):
|
|
372
|
-
self.add_units(modality.lower(), input=modality_token_count)
|
|
373
|
-
for details in candidates_tokens_details:
|
|
374
|
-
modality = details.get("modality", "")
|
|
375
|
-
if not modality:
|
|
376
|
-
continue
|
|
377
|
-
|
|
378
|
-
modality_token_count = details.get("token_count", 0)
|
|
379
|
-
if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
|
|
380
|
-
self.add_units(modality.lower(), output=modality_token_count)
|
|
381
|
-
|
|
382
|
-
if not self._ingest["units"]:
|
|
383
|
-
input = usage.get("prompt_token_count", 0)
|
|
384
|
-
output = usage.get("candidates_token_count", 0) * 4
|
|
385
|
-
|
|
386
|
-
if self._is_character_billing_model(model):
|
|
387
|
-
if self._prompt_character_count > 0:
|
|
388
|
-
input = self._prompt_character_count
|
|
389
|
-
else:
|
|
390
|
-
input *= 4
|
|
391
|
-
|
|
392
|
-
# if no units were added, add a default unit and assume 4 characters per token
|
|
393
|
-
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
394
|
-
else:
|
|
395
|
-
# if no units were added, add a default unit
|
|
396
|
-
self._ingest["units"]["text"] = Units(input=input, output=output)
|
|
310
|
+
return None
|
payi/lib/VertexInstrumentor.py
CHANGED
|
@@ -96,7 +96,7 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
96
96
|
streaming_type=_StreamingType.generator,
|
|
97
97
|
)
|
|
98
98
|
self._prompt_character_count = 0
|
|
99
|
-
self.
|
|
99
|
+
self._candidates_character_count = 0
|
|
100
100
|
self._model_name: Optional[str] = None
|
|
101
101
|
|
|
102
102
|
@override
|
|
@@ -227,19 +227,21 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
227
227
|
for candidate in response_dict.get("candidates", []):
|
|
228
228
|
parts = candidate.get("content", {}).get("parts", [])
|
|
229
229
|
for part in parts:
|
|
230
|
-
self.
|
|
230
|
+
self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
|
|
231
231
|
|
|
232
232
|
usage = response_dict.get("usage_metadata", {})
|
|
233
233
|
if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
|
|
234
|
-
|
|
234
|
+
vertex_compute_usage(
|
|
235
|
+
request=self,
|
|
236
|
+
model=self._get_model_name(response_dict),
|
|
237
|
+
response_dict=response_dict,
|
|
238
|
+
prompt_character_count=self._prompt_character_count,
|
|
239
|
+
streaming_candidates_characters=self._candidates_character_count,
|
|
240
|
+
)
|
|
235
241
|
ingest = True
|
|
236
242
|
|
|
237
243
|
return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
|
|
238
244
|
|
|
239
|
-
@staticmethod
|
|
240
|
-
def _is_character_billing_model(model: str) -> bool:
|
|
241
|
-
return model.startswith("gemini-1.")
|
|
242
|
-
|
|
243
245
|
@override
|
|
244
246
|
def process_synchronous_response(
|
|
245
247
|
self,
|
|
@@ -256,108 +258,139 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
256
258
|
if model:
|
|
257
259
|
self._ingest["resource"] = "google." + model
|
|
258
260
|
|
|
259
|
-
|
|
261
|
+
vertex_compute_usage(
|
|
262
|
+
request=self,
|
|
263
|
+
model=model,
|
|
264
|
+
response_dict=response_dict,
|
|
265
|
+
prompt_character_count=self._prompt_character_count,
|
|
266
|
+
streaming_candidates_characters=self._candidates_character_count
|
|
267
|
+
)
|
|
260
268
|
|
|
261
269
|
if log_prompt_and_response:
|
|
262
270
|
self._ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
263
271
|
|
|
264
272
|
return None
|
|
265
273
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
274
|
+
def vertex_compute_usage(
|
|
275
|
+
request: _ProviderRequest,
|
|
276
|
+
model: Optional[str],
|
|
277
|
+
response_dict: 'dict[str, Any]',
|
|
278
|
+
prompt_character_count: int = 0,
|
|
279
|
+
streaming_candidates_characters: Optional[int] = None) -> None:
|
|
280
|
+
|
|
281
|
+
def is_character_billing_model(model: str) -> bool:
|
|
282
|
+
return model.startswith("gemini-1.")
|
|
283
|
+
|
|
284
|
+
def is_large_context_token_model(model: str, input_tokens: int) -> bool:
|
|
285
|
+
return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
|
|
286
|
+
|
|
287
|
+
def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
288
|
+
if key not in request._ingest["units"]:
|
|
289
|
+
request._ingest["units"][key] = {}
|
|
269
290
|
if input is not None:
|
|
270
|
-
|
|
291
|
+
request._ingest["units"][key]["input"] = input
|
|
271
292
|
if output is not None:
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
|
|
275
|
-
usage = response_dict.get("usage_metadata", {})
|
|
276
|
-
input = usage.get("prompt_token_count", 0)
|
|
293
|
+
request._ingest["units"][key]["output"] = output
|
|
277
294
|
|
|
278
|
-
|
|
279
|
-
|
|
295
|
+
usage = response_dict.get("usage_metadata", {})
|
|
296
|
+
input = usage.get("prompt_token_count", 0)
|
|
280
297
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
model = ""
|
|
284
|
-
|
|
285
|
-
# for character billing only
|
|
286
|
-
large_context = "" if input < 128000 else "_large_context"
|
|
287
|
-
|
|
288
|
-
if self._is_character_billing_model(model):
|
|
289
|
-
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
290
|
-
for details in prompt_tokens_details:
|
|
291
|
-
modality = details.get("modality", "")
|
|
292
|
-
if not modality:
|
|
293
|
-
continue
|
|
294
|
-
|
|
295
|
-
modality_token_count = details.get("token_count", 0)
|
|
296
|
-
if modality == "TEXT":
|
|
297
|
-
input = self._prompt_character_count
|
|
298
|
-
if input == 0:
|
|
299
|
-
# back up calc if nothing was calculated from the prompt
|
|
300
|
-
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
301
|
-
|
|
302
|
-
output = 0
|
|
303
|
-
if streaming_candidates_characters is None:
|
|
304
|
-
for candidate in response_dict.get("candidates", []):
|
|
305
|
-
parts = candidate.get("content", {}).get("parts", [])
|
|
306
|
-
for part in parts:
|
|
307
|
-
output += count_chars_skip_spaces(part.get("text", ""))
|
|
308
|
-
|
|
309
|
-
if output == 0:
|
|
310
|
-
# back up calc if no parts
|
|
311
|
-
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
312
|
-
else:
|
|
313
|
-
output = streaming_candidates_characters
|
|
314
|
-
|
|
315
|
-
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
316
|
-
|
|
317
|
-
elif modality == "IMAGE":
|
|
318
|
-
num_images = math.ceil(modality_token_count / 258)
|
|
319
|
-
self.add_units("vision"+large_context, input=num_images)
|
|
320
|
-
|
|
321
|
-
elif modality == "VIDEO":
|
|
322
|
-
video_seconds = math.ceil(modality_token_count / 285)
|
|
323
|
-
self.add_units("video"+large_context, input=video_seconds)
|
|
324
|
-
|
|
325
|
-
elif modality == "AUDIO":
|
|
326
|
-
audio_seconds = math.ceil(modality_token_count / 25)
|
|
327
|
-
self.add_units("audio"+large_context, input=audio_seconds)
|
|
298
|
+
prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
|
|
299
|
+
candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
|
|
328
300
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
301
|
+
if not model:
|
|
302
|
+
model = ""
|
|
303
|
+
|
|
304
|
+
large_context = ""
|
|
305
|
+
|
|
306
|
+
if is_character_billing_model(model):
|
|
307
|
+
if input > 128000:
|
|
308
|
+
large_context = "_large_context"
|
|
309
|
+
|
|
310
|
+
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
311
|
+
for details in prompt_tokens_details:
|
|
312
|
+
modality = details.get("modality", "")
|
|
313
|
+
if not modality:
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
modality_token_count = details.get("token_count", 0)
|
|
317
|
+
if modality == "TEXT":
|
|
318
|
+
input = prompt_character_count
|
|
319
|
+
if input == 0:
|
|
320
|
+
# back up calc if nothing was calculated from the prompt
|
|
321
|
+
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
322
|
+
|
|
323
|
+
output = 0
|
|
324
|
+
if streaming_candidates_characters is None:
|
|
325
|
+
for candidate in response_dict.get("candidates", []):
|
|
326
|
+
parts = candidate.get("content", {}).get("parts", [])
|
|
327
|
+
for part in parts:
|
|
328
|
+
output += count_chars_skip_spaces(part.get("text", ""))
|
|
329
|
+
|
|
330
|
+
if output == 0:
|
|
331
|
+
# back up calc if no parts
|
|
332
|
+
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
356
333
|
else:
|
|
357
|
-
|
|
334
|
+
output = streaming_candidates_characters
|
|
358
335
|
|
|
359
|
-
|
|
360
|
-
|
|
336
|
+
request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
337
|
+
|
|
338
|
+
elif modality == "IMAGE":
|
|
339
|
+
num_images = math.ceil(modality_token_count / 258)
|
|
340
|
+
add_units(request, "vision"+large_context, input=num_images)
|
|
341
|
+
|
|
342
|
+
elif modality == "VIDEO":
|
|
343
|
+
video_seconds = math.ceil(modality_token_count / 285)
|
|
344
|
+
add_units(request, "video"+large_context, input=video_seconds)
|
|
345
|
+
|
|
346
|
+
elif modality == "AUDIO":
|
|
347
|
+
audio_seconds = math.ceil(modality_token_count / 25)
|
|
348
|
+
add_units(request, "audio"+large_context, input=audio_seconds)
|
|
349
|
+
|
|
350
|
+
# No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
|
|
351
|
+
# for details in candidates_tokens_details:
|
|
352
|
+
|
|
353
|
+
else:
|
|
354
|
+
# thinking tokens introduced in 2.5 after the transition to token based billing
|
|
355
|
+
thinking_token_count = usage.get("thoughts_token_count", 0)
|
|
356
|
+
|
|
357
|
+
if is_large_context_token_model(model, input):
|
|
358
|
+
large_context = "_large_context"
|
|
359
|
+
|
|
360
|
+
for details in prompt_tokens_details:
|
|
361
|
+
modality = details.get("modality", "")
|
|
362
|
+
if not modality:
|
|
363
|
+
continue
|
|
364
|
+
|
|
365
|
+
modality_token_count = details.get("token_count", 0)
|
|
366
|
+
if modality == "IMAGE":
|
|
367
|
+
add_units(request, "vision"+large_context, input=modality_token_count)
|
|
368
|
+
elif modality in ("VIDEO", "AUDIO", "TEXT"):
|
|
369
|
+
add_units(request, modality.lower()+large_context, input=modality_token_count)
|
|
370
|
+
for details in candidates_tokens_details:
|
|
371
|
+
modality = details.get("modality", "")
|
|
372
|
+
if not modality:
|
|
373
|
+
continue
|
|
374
|
+
|
|
375
|
+
modality_token_count = details.get("token_count", 0)
|
|
376
|
+
if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
|
|
377
|
+
add_units(request, modality.lower()+large_context, output=modality_token_count)
|
|
378
|
+
|
|
379
|
+
if thinking_token_count > 0:
|
|
380
|
+
add_units(request, "reasoning"+large_context, output=thinking_token_count)
|
|
381
|
+
|
|
382
|
+
if not request._ingest["units"]:
|
|
383
|
+
input = usage.get("prompt_token_count", 0)
|
|
384
|
+
output = usage.get("candidates_token_count", 0) * 4
|
|
385
|
+
|
|
386
|
+
if is_character_billing_model(model):
|
|
387
|
+
if prompt_character_count > 0:
|
|
388
|
+
input = prompt_character_count
|
|
361
389
|
else:
|
|
362
|
-
|
|
363
|
-
|
|
390
|
+
input *= 4
|
|
391
|
+
|
|
392
|
+
# if no units were added, add a default unit and assume 4 characters per token
|
|
393
|
+
request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
394
|
+
else:
|
|
395
|
+
# if no units were added, add a default unit
|
|
396
|
+
request._ingest["units"]["text"] = Units(input=input, output=output)
|
payi/lib/instrument.py
CHANGED
|
@@ -55,9 +55,14 @@ class _ProviderRequest:
|
|
|
55
55
|
def process_request_prompt(self, prompt: 'dict[str, Any]', args: Sequence[Any], kwargs: 'dict[str, Any]') -> None:
|
|
56
56
|
...
|
|
57
57
|
|
|
58
|
+
def process_initial_stream_response(self, response: Any) -> None:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
@property
|
|
58
62
|
def is_bedrock(self) -> bool:
|
|
59
63
|
return self._category == PayiCategories.aws_bedrock
|
|
60
64
|
|
|
65
|
+
@property
|
|
61
66
|
def is_vertex(self) -> bool:
|
|
62
67
|
return self._category == PayiCategories.google_vertex
|
|
63
68
|
|
|
@@ -65,6 +70,10 @@ class _ProviderRequest:
|
|
|
65
70
|
self.exception_to_semantic_failure(exception)
|
|
66
71
|
return True
|
|
67
72
|
|
|
73
|
+
@property
|
|
74
|
+
def supports_extra_headers(self) -> bool:
|
|
75
|
+
return not self.is_bedrock and not self.is_vertex
|
|
76
|
+
|
|
68
77
|
@property
|
|
69
78
|
def streaming_type(self) -> '_StreamingType':
|
|
70
79
|
return self._streaming_type
|
|
@@ -806,8 +815,7 @@ class _PayiInstrumentor:
|
|
|
806
815
|
context = self.get_context()
|
|
807
816
|
|
|
808
817
|
if not context:
|
|
809
|
-
if request.
|
|
810
|
-
# boto3 doesn't allow extra_headers
|
|
818
|
+
if not request.supports_extra_headers:
|
|
811
819
|
kwargs.pop("extra_headers", None)
|
|
812
820
|
|
|
813
821
|
self._logger.debug(f"invoke_wrapper: no instrumentation context, exit early")
|
|
@@ -822,8 +830,7 @@ class _PayiInstrumentor:
|
|
|
822
830
|
self._update_extra_headers(context, extra_headers)
|
|
823
831
|
|
|
824
832
|
if context.get("proxy", self._proxy_default):
|
|
825
|
-
if request.
|
|
826
|
-
# boto3 doesn't allow extra_headers
|
|
833
|
+
if not request.supports_extra_headers:
|
|
827
834
|
kwargs.pop("extra_headers", None)
|
|
828
835
|
elif "extra_headers" not in kwargs and extra_headers:
|
|
829
836
|
# assumes anthropic and openai clients
|
|
@@ -899,7 +906,7 @@ class _PayiInstrumentor:
|
|
|
899
906
|
request=request,
|
|
900
907
|
)
|
|
901
908
|
|
|
902
|
-
if request.is_bedrock
|
|
909
|
+
if request.is_bedrock:
|
|
903
910
|
if "body" in response:
|
|
904
911
|
response["body"] = stream_result
|
|
905
912
|
else:
|
|
@@ -1084,9 +1091,10 @@ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
|
|
|
1084
1091
|
|
|
1085
1092
|
instrumentor._logger.debug(f"StreamIteratorWrapper: instance {instance}, category {request._category}")
|
|
1086
1093
|
|
|
1094
|
+
request.process_initial_stream_response(response)
|
|
1095
|
+
|
|
1087
1096
|
bedrock_from_stream: bool = False
|
|
1088
|
-
if request.is_bedrock
|
|
1089
|
-
request._ingest["provider_response_id"] = response["ResponseMetadata"]["RequestId"]
|
|
1097
|
+
if request.is_bedrock:
|
|
1090
1098
|
stream = response.get("stream", None)
|
|
1091
1099
|
|
|
1092
1100
|
if stream:
|
|
@@ -1108,7 +1116,6 @@ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
|
|
|
1108
1116
|
self._request: _ProviderRequest = request
|
|
1109
1117
|
|
|
1110
1118
|
self._first_token: bool = True
|
|
1111
|
-
self._is_bedrock: bool = request.is_bedrock()
|
|
1112
1119
|
self._bedrock_from_stream: bool = bedrock_from_stream
|
|
1113
1120
|
self._ingested: bool = False
|
|
1114
1121
|
self._iter_started: bool = False
|
|
@@ -1131,7 +1138,7 @@ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
|
|
|
1131
1138
|
|
|
1132
1139
|
def __iter__(self) -> Any:
|
|
1133
1140
|
self._iter_started = True
|
|
1134
|
-
if self.
|
|
1141
|
+
if self._request.is_bedrock:
|
|
1135
1142
|
# MUST reside in a separate function so that the yield statement (e.g. the generator) doesn't implicitly return its own iterator and overriding self
|
|
1136
1143
|
self._instrumentor._logger.debug(f"StreamIteratorWrapper: bedrock __iter__")
|
|
1137
1144
|
return self._iter_bedrock()
|
|
@@ -16,6 +16,14 @@ from .categories import (
|
|
|
16
16
|
CategoriesResourceWithStreamingResponse,
|
|
17
17
|
AsyncCategoriesResourceWithStreamingResponse,
|
|
18
18
|
)
|
|
19
|
+
from .fixed_cost_resources import (
|
|
20
|
+
FixedCostResourcesResource,
|
|
21
|
+
AsyncFixedCostResourcesResource,
|
|
22
|
+
FixedCostResourcesResourceWithRawResponse,
|
|
23
|
+
AsyncFixedCostResourcesResourceWithRawResponse,
|
|
24
|
+
FixedCostResourcesResourceWithStreamingResponse,
|
|
25
|
+
AsyncFixedCostResourcesResourceWithStreamingResponse,
|
|
26
|
+
)
|
|
19
27
|
|
|
20
28
|
__all__ = [
|
|
21
29
|
"ResourcesResource",
|
|
@@ -24,6 +32,12 @@ __all__ = [
|
|
|
24
32
|
"AsyncResourcesResourceWithRawResponse",
|
|
25
33
|
"ResourcesResourceWithStreamingResponse",
|
|
26
34
|
"AsyncResourcesResourceWithStreamingResponse",
|
|
35
|
+
"FixedCostResourcesResource",
|
|
36
|
+
"AsyncFixedCostResourcesResource",
|
|
37
|
+
"FixedCostResourcesResourceWithRawResponse",
|
|
38
|
+
"AsyncFixedCostResourcesResourceWithRawResponse",
|
|
39
|
+
"FixedCostResourcesResourceWithStreamingResponse",
|
|
40
|
+
"AsyncFixedCostResourcesResourceWithStreamingResponse",
|
|
27
41
|
"CategoriesResource",
|
|
28
42
|
"AsyncCategoriesResource",
|
|
29
43
|
"CategoriesResourceWithRawResponse",
|
|
@@ -25,6 +25,14 @@ from ..._response import (
|
|
|
25
25
|
)
|
|
26
26
|
from ...pagination import SyncCursorPage, AsyncCursorPage
|
|
27
27
|
from ..._base_client import AsyncPaginator, make_request_options
|
|
28
|
+
from .fixed_cost_resources import (
|
|
29
|
+
FixedCostResourcesResource,
|
|
30
|
+
AsyncFixedCostResourcesResource,
|
|
31
|
+
FixedCostResourcesResourceWithRawResponse,
|
|
32
|
+
AsyncFixedCostResourcesResourceWithRawResponse,
|
|
33
|
+
FixedCostResourcesResourceWithStreamingResponse,
|
|
34
|
+
AsyncFixedCostResourcesResourceWithStreamingResponse,
|
|
35
|
+
)
|
|
28
36
|
from ...types.category_response import CategoryResponse
|
|
29
37
|
from ...types.category_delete_response import CategoryDeleteResponse
|
|
30
38
|
from ...types.category_resource_response import CategoryResourceResponse
|
|
@@ -38,6 +46,10 @@ class CategoriesResource(SyncAPIResource):
|
|
|
38
46
|
def resources(self) -> ResourcesResource:
|
|
39
47
|
return ResourcesResource(self._client)
|
|
40
48
|
|
|
49
|
+
@cached_property
|
|
50
|
+
def fixed_cost_resources(self) -> FixedCostResourcesResource:
|
|
51
|
+
return FixedCostResourcesResource(self._client)
|
|
52
|
+
|
|
41
53
|
@cached_property
|
|
42
54
|
def with_raw_response(self) -> CategoriesResourceWithRawResponse:
|
|
43
55
|
"""
|
|
@@ -225,6 +237,10 @@ class AsyncCategoriesResource(AsyncAPIResource):
|
|
|
225
237
|
def resources(self) -> AsyncResourcesResource:
|
|
226
238
|
return AsyncResourcesResource(self._client)
|
|
227
239
|
|
|
240
|
+
@cached_property
|
|
241
|
+
def fixed_cost_resources(self) -> AsyncFixedCostResourcesResource:
|
|
242
|
+
return AsyncFixedCostResourcesResource(self._client)
|
|
243
|
+
|
|
228
244
|
@cached_property
|
|
229
245
|
def with_raw_response(self) -> AsyncCategoriesResourceWithRawResponse:
|
|
230
246
|
"""
|
|
@@ -428,6 +444,10 @@ class CategoriesResourceWithRawResponse:
|
|
|
428
444
|
def resources(self) -> ResourcesResourceWithRawResponse:
|
|
429
445
|
return ResourcesResourceWithRawResponse(self._categories.resources)
|
|
430
446
|
|
|
447
|
+
@cached_property
|
|
448
|
+
def fixed_cost_resources(self) -> FixedCostResourcesResourceWithRawResponse:
|
|
449
|
+
return FixedCostResourcesResourceWithRawResponse(self._categories.fixed_cost_resources)
|
|
450
|
+
|
|
431
451
|
|
|
432
452
|
class AsyncCategoriesResourceWithRawResponse:
|
|
433
453
|
def __init__(self, categories: AsyncCategoriesResource) -> None:
|
|
@@ -450,6 +470,10 @@ class AsyncCategoriesResourceWithRawResponse:
|
|
|
450
470
|
def resources(self) -> AsyncResourcesResourceWithRawResponse:
|
|
451
471
|
return AsyncResourcesResourceWithRawResponse(self._categories.resources)
|
|
452
472
|
|
|
473
|
+
@cached_property
|
|
474
|
+
def fixed_cost_resources(self) -> AsyncFixedCostResourcesResourceWithRawResponse:
|
|
475
|
+
return AsyncFixedCostResourcesResourceWithRawResponse(self._categories.fixed_cost_resources)
|
|
476
|
+
|
|
453
477
|
|
|
454
478
|
class CategoriesResourceWithStreamingResponse:
|
|
455
479
|
def __init__(self, categories: CategoriesResource) -> None:
|
|
@@ -472,6 +496,10 @@ class CategoriesResourceWithStreamingResponse:
|
|
|
472
496
|
def resources(self) -> ResourcesResourceWithStreamingResponse:
|
|
473
497
|
return ResourcesResourceWithStreamingResponse(self._categories.resources)
|
|
474
498
|
|
|
499
|
+
@cached_property
|
|
500
|
+
def fixed_cost_resources(self) -> FixedCostResourcesResourceWithStreamingResponse:
|
|
501
|
+
return FixedCostResourcesResourceWithStreamingResponse(self._categories.fixed_cost_resources)
|
|
502
|
+
|
|
475
503
|
|
|
476
504
|
class AsyncCategoriesResourceWithStreamingResponse:
|
|
477
505
|
def __init__(self, categories: AsyncCategoriesResource) -> None:
|
|
@@ -493,3 +521,7 @@ class AsyncCategoriesResourceWithStreamingResponse:
|
|
|
493
521
|
@cached_property
|
|
494
522
|
def resources(self) -> AsyncResourcesResourceWithStreamingResponse:
|
|
495
523
|
return AsyncResourcesResourceWithStreamingResponse(self._categories.resources)
|
|
524
|
+
|
|
525
|
+
@cached_property
|
|
526
|
+
def fixed_cost_resources(self) -> AsyncFixedCostResourcesResourceWithStreamingResponse:
|
|
527
|
+
return AsyncFixedCostResourcesResourceWithStreamingResponse(self._categories.fixed_cost_resources)
|