payi 0.1.0a83__py3-none-any.whl → 0.1.0a85__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of payi might be problematic. Click here for more details.
- payi/_version.py +1 -1
- payi/lib/AnthropicInstrumentor.py +111 -70
- payi/lib/BedrockInstrumentor.py +83 -100
- payi/lib/GoogleGenAiInstrumentor.py +26 -111
- payi/lib/VertexInstrumentor.py +132 -98
- payi/lib/instrument.py +52 -15
- payi/resources/categories/__init__.py +14 -0
- payi/resources/categories/categories.py +32 -0
- payi/resources/categories/fixed_cost_resources.py +196 -0
- payi/resources/ingest.py +14 -0
- payi/resources/limits/limits.py +4 -0
- payi/types/categories/__init__.py +1 -0
- payi/types/categories/fixed_cost_resource_create_params.py +21 -0
- payi/types/ingest_event_param.py +13 -1
- payi/types/ingest_units_params.py +11 -1
- payi/types/limit_create_params.py +2 -0
- payi/types/limit_history_response.py +3 -3
- {payi-0.1.0a83.dist-info → payi-0.1.0a85.dist-info}/METADATA +1 -1
- {payi-0.1.0a83.dist-info → payi-0.1.0a85.dist-info}/RECORD +21 -19
- {payi-0.1.0a83.dist-info → payi-0.1.0a85.dist-info}/WHEEL +0 -0
- {payi-0.1.0a83.dist-info → payi-0.1.0a85.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import math
|
|
3
2
|
from typing import Any, List, Union, Optional, Sequence
|
|
4
3
|
from typing_extensions import override
|
|
5
4
|
|
|
6
5
|
from wrapt import wrap_function_wrapper # type: ignore
|
|
7
6
|
|
|
8
7
|
from payi.lib.helpers import PayiCategories
|
|
9
|
-
from payi.types.ingest_units_params import Units
|
|
10
8
|
|
|
11
9
|
from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
12
10
|
|
|
@@ -115,18 +113,16 @@ async def agenerate_stream_wrapper(
|
|
|
115
113
|
kwargs,
|
|
116
114
|
)
|
|
117
115
|
|
|
118
|
-
def count_chars_skip_spaces(text: str) -> int:
|
|
119
|
-
return sum(1 for c in text if not c.isspace())
|
|
120
|
-
|
|
121
116
|
class _GoogleGenAiRequest(_ProviderRequest):
|
|
122
117
|
def __init__(self, instrumentor: _PayiInstrumentor):
|
|
123
118
|
super().__init__(
|
|
124
119
|
instrumentor=instrumentor,
|
|
125
120
|
category=PayiCategories.google_vertex,
|
|
126
121
|
streaming_type=_StreamingType.generator,
|
|
122
|
+
is_google_vertex_or_genai_client=True,
|
|
127
123
|
)
|
|
128
124
|
self._prompt_character_count = 0
|
|
129
|
-
self.
|
|
125
|
+
self._candidates_character_count = 0
|
|
130
126
|
|
|
131
127
|
@override
|
|
132
128
|
def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
|
|
@@ -158,6 +154,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
158
154
|
if isinstance(value, list):
|
|
159
155
|
items = value # type: ignore
|
|
160
156
|
|
|
157
|
+
from .VertexInstrumentor import count_chars_skip_spaces
|
|
158
|
+
|
|
161
159
|
for item in items: # type: ignore
|
|
162
160
|
text = ""
|
|
163
161
|
if isinstance(item, Part):
|
|
@@ -249,6 +247,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
249
247
|
|
|
250
248
|
@override
|
|
251
249
|
def process_chunk(self, chunk: Any) -> _ChunkResult:
|
|
250
|
+
from .VertexInstrumentor import vertex_compute_usage, count_chars_skip_spaces
|
|
251
|
+
|
|
252
252
|
ingest = False
|
|
253
253
|
response_dict: dict[str, Any] = chunk.to_json_dict()
|
|
254
254
|
if "provider_response_id" not in self._ingest:
|
|
@@ -260,22 +260,25 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
260
260
|
|
|
261
261
|
self._ingest["resource"] = "google." + model
|
|
262
262
|
|
|
263
|
+
|
|
263
264
|
for candidate in response_dict.get("candidates", []):
|
|
264
265
|
parts = candidate.get("content", {}).get("parts", [])
|
|
265
266
|
for part in parts:
|
|
266
|
-
self.
|
|
267
|
+
self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
|
|
267
268
|
|
|
268
269
|
usage = response_dict.get("usage_metadata", {})
|
|
269
270
|
if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
|
|
270
|
-
|
|
271
|
+
vertex_compute_usage(
|
|
272
|
+
request=self,
|
|
273
|
+
model=model,
|
|
274
|
+
response_dict=response_dict,
|
|
275
|
+
prompt_character_count=self._prompt_character_count,
|
|
276
|
+
streaming_candidates_characters=self._candidates_character_count
|
|
277
|
+
)
|
|
271
278
|
ingest = True
|
|
272
279
|
|
|
273
280
|
return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
|
|
274
281
|
|
|
275
|
-
@staticmethod
|
|
276
|
-
def _is_character_billing_model(model: str) -> bool:
|
|
277
|
-
return model.startswith("gemini-1.")
|
|
278
|
-
|
|
279
282
|
@override
|
|
280
283
|
def process_synchronous_response(
|
|
281
284
|
self,
|
|
@@ -284,6 +287,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
284
287
|
kwargs: Any) -> Any:
|
|
285
288
|
response_dict = response.to_json_dict()
|
|
286
289
|
|
|
290
|
+
from .VertexInstrumentor import vertex_compute_usage
|
|
291
|
+
|
|
287
292
|
id: Optional[str] = response_dict.get("response_id", None)
|
|
288
293
|
if id:
|
|
289
294
|
self._ingest["provider_response_id"] = id
|
|
@@ -292,105 +297,15 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
292
297
|
if model:
|
|
293
298
|
self._ingest["resource"] = "google." + model
|
|
294
299
|
|
|
295
|
-
|
|
296
|
-
|
|
300
|
+
vertex_compute_usage(
|
|
301
|
+
request=self,
|
|
302
|
+
model=model,
|
|
303
|
+
response_dict=response_dict,
|
|
304
|
+
prompt_character_count=self._prompt_character_count,
|
|
305
|
+
streaming_candidates_characters=self._candidates_character_count
|
|
306
|
+
)
|
|
307
|
+
|
|
297
308
|
if log_prompt_and_response:
|
|
298
309
|
self._ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
299
310
|
|
|
300
|
-
return None
|
|
301
|
-
|
|
302
|
-
def add_units(self, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
303
|
-
if key not in self._ingest["units"]:
|
|
304
|
-
self._ingest["units"][key] = {}
|
|
305
|
-
if input is not None:
|
|
306
|
-
self._ingest["units"][key]["input"] = input
|
|
307
|
-
if output is not None:
|
|
308
|
-
self._ingest["units"][key]["output"] = output
|
|
309
|
-
|
|
310
|
-
def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
|
|
311
|
-
usage = response_dict.get("usage_metadata", {})
|
|
312
|
-
input = usage.get("prompt_token_count", 0)
|
|
313
|
-
|
|
314
|
-
prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
|
|
315
|
-
candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
|
|
316
|
-
|
|
317
|
-
model: str = response_dict.get("model_version", "")
|
|
318
|
-
|
|
319
|
-
# for character billing only
|
|
320
|
-
large_context = "" if input < 128000 else "_large_context"
|
|
321
|
-
|
|
322
|
-
if self._is_character_billing_model(model):
|
|
323
|
-
for details in prompt_tokens_details:
|
|
324
|
-
modality = details.get("modality", "")
|
|
325
|
-
if not modality:
|
|
326
|
-
continue
|
|
327
|
-
|
|
328
|
-
modality_token_count = details.get("token_count", 0)
|
|
329
|
-
if modality == "TEXT":
|
|
330
|
-
input = self._prompt_character_count
|
|
331
|
-
if input == 0:
|
|
332
|
-
# back up calc if nothing was calculated from the prompt
|
|
333
|
-
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
334
|
-
|
|
335
|
-
output = 0
|
|
336
|
-
if streaming_candidates_characters is None:
|
|
337
|
-
for candidate in response_dict.get("candidates", []):
|
|
338
|
-
parts = candidate.get("content", {}).get("parts", [])
|
|
339
|
-
for part in parts:
|
|
340
|
-
output += count_chars_skip_spaces(part.get("text", ""))
|
|
341
|
-
|
|
342
|
-
if output == 0:
|
|
343
|
-
# back up calc if no parts
|
|
344
|
-
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
345
|
-
else:
|
|
346
|
-
output = streaming_candidates_characters
|
|
347
|
-
|
|
348
|
-
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
349
|
-
|
|
350
|
-
elif modality == "IMAGE":
|
|
351
|
-
num_images = math.ceil(modality_token_count / 258)
|
|
352
|
-
self.add_units("vision"+large_context, input=num_images)
|
|
353
|
-
|
|
354
|
-
elif modality == "VIDEO":
|
|
355
|
-
video_seconds = math.ceil(modality_token_count / 285)
|
|
356
|
-
self.add_units("video"+large_context, input=video_seconds)
|
|
357
|
-
|
|
358
|
-
elif modality == "AUDIO":
|
|
359
|
-
audio_seconds = math.ceil(modality_token_count / 25)
|
|
360
|
-
self.add_units("audio"+large_context, input=audio_seconds)
|
|
361
|
-
|
|
362
|
-
else:
|
|
363
|
-
for details in prompt_tokens_details:
|
|
364
|
-
modality = details.get("modality", "")
|
|
365
|
-
if not modality:
|
|
366
|
-
continue
|
|
367
|
-
|
|
368
|
-
modality_token_count = details.get("token_count", 0)
|
|
369
|
-
if modality == "IMAGE":
|
|
370
|
-
self.add_units("vision", input=modality_token_count)
|
|
371
|
-
elif modality in ("VIDEO", "AUDIO", "TEXT"):
|
|
372
|
-
self.add_units(modality.lower(), input=modality_token_count)
|
|
373
|
-
for details in candidates_tokens_details:
|
|
374
|
-
modality = details.get("modality", "")
|
|
375
|
-
if not modality:
|
|
376
|
-
continue
|
|
377
|
-
|
|
378
|
-
modality_token_count = details.get("token_count", 0)
|
|
379
|
-
if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
|
|
380
|
-
self.add_units(modality.lower(), output=modality_token_count)
|
|
381
|
-
|
|
382
|
-
if not self._ingest["units"]:
|
|
383
|
-
input = usage.get("prompt_token_count", 0)
|
|
384
|
-
output = usage.get("candidates_token_count", 0) * 4
|
|
385
|
-
|
|
386
|
-
if self._is_character_billing_model(model):
|
|
387
|
-
if self._prompt_character_count > 0:
|
|
388
|
-
input = self._prompt_character_count
|
|
389
|
-
else:
|
|
390
|
-
input *= 4
|
|
391
|
-
|
|
392
|
-
# if no units were added, add a default unit and assume 4 characters per token
|
|
393
|
-
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
394
|
-
else:
|
|
395
|
-
# if no units were added, add a default unit
|
|
396
|
-
self._ingest["units"]["text"] = Units(input=input, output=output)
|
|
311
|
+
return None
|
payi/lib/VertexInstrumentor.py
CHANGED
|
@@ -94,9 +94,10 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
94
94
|
instrumentor=instrumentor,
|
|
95
95
|
category=PayiCategories.google_vertex,
|
|
96
96
|
streaming_type=_StreamingType.generator,
|
|
97
|
+
is_google_vertex_or_genai_client=True,
|
|
97
98
|
)
|
|
98
99
|
self._prompt_character_count = 0
|
|
99
|
-
self.
|
|
100
|
+
self._candidates_character_count = 0
|
|
100
101
|
self._model_name: Optional[str] = None
|
|
101
102
|
|
|
102
103
|
@override
|
|
@@ -227,19 +228,21 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
227
228
|
for candidate in response_dict.get("candidates", []):
|
|
228
229
|
parts = candidate.get("content", {}).get("parts", [])
|
|
229
230
|
for part in parts:
|
|
230
|
-
self.
|
|
231
|
+
self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
|
|
231
232
|
|
|
232
233
|
usage = response_dict.get("usage_metadata", {})
|
|
233
234
|
if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
|
|
234
|
-
|
|
235
|
+
vertex_compute_usage(
|
|
236
|
+
request=self,
|
|
237
|
+
model=self._get_model_name(response_dict),
|
|
238
|
+
response_dict=response_dict,
|
|
239
|
+
prompt_character_count=self._prompt_character_count,
|
|
240
|
+
streaming_candidates_characters=self._candidates_character_count,
|
|
241
|
+
)
|
|
235
242
|
ingest = True
|
|
236
243
|
|
|
237
244
|
return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
|
|
238
245
|
|
|
239
|
-
@staticmethod
|
|
240
|
-
def _is_character_billing_model(model: str) -> bool:
|
|
241
|
-
return model.startswith("gemini-1.")
|
|
242
|
-
|
|
243
246
|
@override
|
|
244
247
|
def process_synchronous_response(
|
|
245
248
|
self,
|
|
@@ -256,108 +259,139 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
256
259
|
if model:
|
|
257
260
|
self._ingest["resource"] = "google." + model
|
|
258
261
|
|
|
259
|
-
|
|
262
|
+
vertex_compute_usage(
|
|
263
|
+
request=self,
|
|
264
|
+
model=model,
|
|
265
|
+
response_dict=response_dict,
|
|
266
|
+
prompt_character_count=self._prompt_character_count,
|
|
267
|
+
streaming_candidates_characters=self._candidates_character_count
|
|
268
|
+
)
|
|
260
269
|
|
|
261
270
|
if log_prompt_and_response:
|
|
262
271
|
self._ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
263
272
|
|
|
264
273
|
return None
|
|
265
274
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
275
|
+
def vertex_compute_usage(
|
|
276
|
+
request: _ProviderRequest,
|
|
277
|
+
model: Optional[str],
|
|
278
|
+
response_dict: 'dict[str, Any]',
|
|
279
|
+
prompt_character_count: int = 0,
|
|
280
|
+
streaming_candidates_characters: Optional[int] = None) -> None:
|
|
281
|
+
|
|
282
|
+
def is_character_billing_model(model: str) -> bool:
|
|
283
|
+
return model.startswith("gemini-1.")
|
|
284
|
+
|
|
285
|
+
def is_large_context_token_model(model: str, input_tokens: int) -> bool:
|
|
286
|
+
return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
|
|
287
|
+
|
|
288
|
+
def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
289
|
+
if key not in request._ingest["units"]:
|
|
290
|
+
request._ingest["units"][key] = {}
|
|
269
291
|
if input is not None:
|
|
270
|
-
|
|
292
|
+
request._ingest["units"][key]["input"] = input
|
|
271
293
|
if output is not None:
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
|
|
275
|
-
usage = response_dict.get("usage_metadata", {})
|
|
276
|
-
input = usage.get("prompt_token_count", 0)
|
|
294
|
+
request._ingest["units"][key]["output"] = output
|
|
277
295
|
|
|
278
|
-
|
|
279
|
-
|
|
296
|
+
usage = response_dict.get("usage_metadata", {})
|
|
297
|
+
input = usage.get("prompt_token_count", 0)
|
|
280
298
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
model = ""
|
|
284
|
-
|
|
285
|
-
# for character billing only
|
|
286
|
-
large_context = "" if input < 128000 else "_large_context"
|
|
287
|
-
|
|
288
|
-
if self._is_character_billing_model(model):
|
|
289
|
-
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
290
|
-
for details in prompt_tokens_details:
|
|
291
|
-
modality = details.get("modality", "")
|
|
292
|
-
if not modality:
|
|
293
|
-
continue
|
|
294
|
-
|
|
295
|
-
modality_token_count = details.get("token_count", 0)
|
|
296
|
-
if modality == "TEXT":
|
|
297
|
-
input = self._prompt_character_count
|
|
298
|
-
if input == 0:
|
|
299
|
-
# back up calc if nothing was calculated from the prompt
|
|
300
|
-
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
301
|
-
|
|
302
|
-
output = 0
|
|
303
|
-
if streaming_candidates_characters is None:
|
|
304
|
-
for candidate in response_dict.get("candidates", []):
|
|
305
|
-
parts = candidate.get("content", {}).get("parts", [])
|
|
306
|
-
for part in parts:
|
|
307
|
-
output += count_chars_skip_spaces(part.get("text", ""))
|
|
308
|
-
|
|
309
|
-
if output == 0:
|
|
310
|
-
# back up calc if no parts
|
|
311
|
-
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
312
|
-
else:
|
|
313
|
-
output = streaming_candidates_characters
|
|
314
|
-
|
|
315
|
-
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
316
|
-
|
|
317
|
-
elif modality == "IMAGE":
|
|
318
|
-
num_images = math.ceil(modality_token_count / 258)
|
|
319
|
-
self.add_units("vision"+large_context, input=num_images)
|
|
320
|
-
|
|
321
|
-
elif modality == "VIDEO":
|
|
322
|
-
video_seconds = math.ceil(modality_token_count / 285)
|
|
323
|
-
self.add_units("video"+large_context, input=video_seconds)
|
|
324
|
-
|
|
325
|
-
elif modality == "AUDIO":
|
|
326
|
-
audio_seconds = math.ceil(modality_token_count / 25)
|
|
327
|
-
self.add_units("audio"+large_context, input=audio_seconds)
|
|
299
|
+
prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
|
|
300
|
+
candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
|
|
328
301
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
302
|
+
if not model:
|
|
303
|
+
model = ""
|
|
304
|
+
|
|
305
|
+
large_context = ""
|
|
306
|
+
|
|
307
|
+
if is_character_billing_model(model):
|
|
308
|
+
if input > 128000:
|
|
309
|
+
large_context = "_large_context"
|
|
310
|
+
|
|
311
|
+
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
312
|
+
for details in prompt_tokens_details:
|
|
313
|
+
modality = details.get("modality", "")
|
|
314
|
+
if not modality:
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
modality_token_count = details.get("token_count", 0)
|
|
318
|
+
if modality == "TEXT":
|
|
319
|
+
input = prompt_character_count
|
|
320
|
+
if input == 0:
|
|
321
|
+
# back up calc if nothing was calculated from the prompt
|
|
322
|
+
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
323
|
+
|
|
324
|
+
output = 0
|
|
325
|
+
if streaming_candidates_characters is None:
|
|
326
|
+
for candidate in response_dict.get("candidates", []):
|
|
327
|
+
parts = candidate.get("content", {}).get("parts", [])
|
|
328
|
+
for part in parts:
|
|
329
|
+
output += count_chars_skip_spaces(part.get("text", ""))
|
|
330
|
+
|
|
331
|
+
if output == 0:
|
|
332
|
+
# back up calc if no parts
|
|
333
|
+
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
356
334
|
else:
|
|
357
|
-
|
|
335
|
+
output = streaming_candidates_characters
|
|
358
336
|
|
|
359
|
-
|
|
360
|
-
|
|
337
|
+
request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
338
|
+
|
|
339
|
+
elif modality == "IMAGE":
|
|
340
|
+
num_images = math.ceil(modality_token_count / 258)
|
|
341
|
+
add_units(request, "vision"+large_context, input=num_images)
|
|
342
|
+
|
|
343
|
+
elif modality == "VIDEO":
|
|
344
|
+
video_seconds = math.ceil(modality_token_count / 285)
|
|
345
|
+
add_units(request, "video"+large_context, input=video_seconds)
|
|
346
|
+
|
|
347
|
+
elif modality == "AUDIO":
|
|
348
|
+
audio_seconds = math.ceil(modality_token_count / 25)
|
|
349
|
+
add_units(request, "audio"+large_context, input=audio_seconds)
|
|
350
|
+
|
|
351
|
+
# No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
|
|
352
|
+
# for details in candidates_tokens_details:
|
|
353
|
+
|
|
354
|
+
else:
|
|
355
|
+
# thinking tokens introduced in 2.5 after the transition to token based billing
|
|
356
|
+
thinking_token_count = usage.get("thoughts_token_count", 0)
|
|
357
|
+
|
|
358
|
+
if is_large_context_token_model(model, input):
|
|
359
|
+
large_context = "_large_context"
|
|
360
|
+
|
|
361
|
+
for details in prompt_tokens_details:
|
|
362
|
+
modality = details.get("modality", "")
|
|
363
|
+
if not modality:
|
|
364
|
+
continue
|
|
365
|
+
|
|
366
|
+
modality_token_count = details.get("token_count", 0)
|
|
367
|
+
if modality == "IMAGE":
|
|
368
|
+
add_units(request, "vision"+large_context, input=modality_token_count)
|
|
369
|
+
elif modality in ("VIDEO", "AUDIO", "TEXT"):
|
|
370
|
+
add_units(request, modality.lower()+large_context, input=modality_token_count)
|
|
371
|
+
for details in candidates_tokens_details:
|
|
372
|
+
modality = details.get("modality", "")
|
|
373
|
+
if not modality:
|
|
374
|
+
continue
|
|
375
|
+
|
|
376
|
+
modality_token_count = details.get("token_count", 0)
|
|
377
|
+
if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
|
|
378
|
+
add_units(request, modality.lower()+large_context, output=modality_token_count)
|
|
379
|
+
|
|
380
|
+
if thinking_token_count > 0:
|
|
381
|
+
add_units(request, "reasoning"+large_context, output=thinking_token_count)
|
|
382
|
+
|
|
383
|
+
if not request._ingest["units"]:
|
|
384
|
+
input = usage.get("prompt_token_count", 0)
|
|
385
|
+
output = usage.get("candidates_token_count", 0) * 4
|
|
386
|
+
|
|
387
|
+
if is_character_billing_model(model):
|
|
388
|
+
if prompt_character_count > 0:
|
|
389
|
+
input = prompt_character_count
|
|
361
390
|
else:
|
|
362
|
-
|
|
363
|
-
|
|
391
|
+
input *= 4
|
|
392
|
+
|
|
393
|
+
# if no units were added, add a default unit and assume 4 characters per token
|
|
394
|
+
request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
395
|
+
else:
|
|
396
|
+
# if no units were added, add a default unit
|
|
397
|
+
request._ingest["units"]["text"] = Units(input=input, output=output)
|
payi/lib/instrument.py
CHANGED
|
@@ -35,12 +35,20 @@ class _ChunkResult:
|
|
|
35
35
|
ingest: bool = False
|
|
36
36
|
|
|
37
37
|
class _ProviderRequest:
|
|
38
|
-
def __init__(
|
|
38
|
+
def __init__(
|
|
39
|
+
self, instrumentor: '_PayiInstrumentor',
|
|
40
|
+
category: str,
|
|
41
|
+
streaming_type: '_StreamingType',
|
|
42
|
+
is_aws_client: Optional[bool] = None,
|
|
43
|
+
is_google_vertex_or_genai_client: Optional[bool] = None,
|
|
44
|
+
) -> None:
|
|
39
45
|
self._instrumentor: '_PayiInstrumentor' = instrumentor
|
|
40
46
|
self._estimated_prompt_tokens: Optional[int] = None
|
|
41
47
|
self._category: str = category
|
|
42
48
|
self._ingest: IngestUnitsParams = { "category": category, "units": {} } # type: ignore
|
|
43
49
|
self._streaming_type: '_StreamingType' = streaming_type
|
|
50
|
+
self._is_aws_client: Optional[bool] = is_aws_client
|
|
51
|
+
self._is_google_vertex_or_genai_client: Optional[bool] = is_google_vertex_or_genai_client
|
|
44
52
|
|
|
45
53
|
def process_chunk(self, _chunk: Any) -> _ChunkResult:
|
|
46
54
|
return _ChunkResult(send_chunk_to_caller=True)
|
|
@@ -55,16 +63,25 @@ class _ProviderRequest:
|
|
|
55
63
|
def process_request_prompt(self, prompt: 'dict[str, Any]', args: Sequence[Any], kwargs: 'dict[str, Any]') -> None:
|
|
56
64
|
...
|
|
57
65
|
|
|
58
|
-
def
|
|
59
|
-
|
|
66
|
+
def process_initial_stream_response(self, response: Any) -> None:
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def is_aws_client(self) -> bool:
|
|
71
|
+
return self._is_aws_client if self._is_aws_client is not None else False
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def is_google_vertex_or_genai_client(self) -> bool:
|
|
75
|
+
return self._is_google_vertex_or_genai_client if self._is_google_vertex_or_genai_client is not None else False
|
|
60
76
|
|
|
61
|
-
def is_vertex(self) -> bool:
|
|
62
|
-
return self._category == PayiCategories.google_vertex
|
|
63
|
-
|
|
64
77
|
def process_exception(self, exception: Exception, kwargs: Any, ) -> bool: # noqa: ARG002
|
|
65
78
|
self.exception_to_semantic_failure(exception)
|
|
66
79
|
return True
|
|
67
80
|
|
|
81
|
+
@property
|
|
82
|
+
def supports_extra_headers(self) -> bool:
|
|
83
|
+
return not self.is_aws_client and not self.is_google_vertex_or_genai_client
|
|
84
|
+
|
|
68
85
|
@property
|
|
69
86
|
def streaming_type(self) -> '_StreamingType':
|
|
70
87
|
return self._streaming_type
|
|
@@ -277,6 +294,22 @@ class _PayiInstrumentor:
|
|
|
277
294
|
except Exception as e:
|
|
278
295
|
self._logger.error(f"Error instrumenting Google GenAi: {e}")
|
|
279
296
|
|
|
297
|
+
@staticmethod
|
|
298
|
+
def _create_logged_ingest_units(
|
|
299
|
+
ingest_units: IngestUnitsParams,
|
|
300
|
+
) -> IngestUnitsParams:
|
|
301
|
+
# remove large and potentially sensitive data from the log
|
|
302
|
+
log_ingest_units: IngestUnitsParams = ingest_units.copy()
|
|
303
|
+
|
|
304
|
+
log_ingest_units.pop('provider_request_json', None)
|
|
305
|
+
log_ingest_units.pop('provider_response_json', None)
|
|
306
|
+
|
|
307
|
+
# Pop system.stack_trace from properties if it exists
|
|
308
|
+
if 'properties' in log_ingest_units and isinstance(log_ingest_units['properties'], dict):
|
|
309
|
+
log_ingest_units['properties'].pop('system.stack_trace', None)
|
|
310
|
+
|
|
311
|
+
return log_ingest_units
|
|
312
|
+
|
|
280
313
|
def _process_ingest_units(self, ingest_units: IngestUnitsParams, log_data: 'dict[str, str]') -> bool:
|
|
281
314
|
if int(ingest_units.get("http_status_code") or 0) < 400:
|
|
282
315
|
units = ingest_units.get("units", {})
|
|
@@ -327,6 +360,9 @@ class _PayiInstrumentor:
|
|
|
327
360
|
return None
|
|
328
361
|
|
|
329
362
|
try:
|
|
363
|
+
if self._logger.isEnabledFor(logging.DEBUG):
|
|
364
|
+
self._logger.debug(f"_aingest_units: sending ({self._create_logged_ingest_units(ingest_units)})")
|
|
365
|
+
|
|
330
366
|
if self._apayi:
|
|
331
367
|
ingest_response = await self._apayi.ingest.units(**ingest_units)
|
|
332
368
|
elif self._payi:
|
|
@@ -399,6 +435,9 @@ class _PayiInstrumentor:
|
|
|
399
435
|
|
|
400
436
|
try:
|
|
401
437
|
if self._payi:
|
|
438
|
+
if self._logger.isEnabledFor(logging.DEBUG):
|
|
439
|
+
self._logger.debug(f"_ingest_units: sending ({self._create_logged_ingest_units(ingest_units)})")
|
|
440
|
+
|
|
402
441
|
ingest_response = self._payi.ingest.units(**ingest_units)
|
|
403
442
|
self._logger.debug(f"_ingest_units: success ({ingest_response})")
|
|
404
443
|
|
|
@@ -806,8 +845,7 @@ class _PayiInstrumentor:
|
|
|
806
845
|
context = self.get_context()
|
|
807
846
|
|
|
808
847
|
if not context:
|
|
809
|
-
if request.
|
|
810
|
-
# boto3 doesn't allow extra_headers
|
|
848
|
+
if not request.supports_extra_headers:
|
|
811
849
|
kwargs.pop("extra_headers", None)
|
|
812
850
|
|
|
813
851
|
self._logger.debug(f"invoke_wrapper: no instrumentation context, exit early")
|
|
@@ -822,8 +860,7 @@ class _PayiInstrumentor:
|
|
|
822
860
|
self._update_extra_headers(context, extra_headers)
|
|
823
861
|
|
|
824
862
|
if context.get("proxy", self._proxy_default):
|
|
825
|
-
if request.
|
|
826
|
-
# boto3 doesn't allow extra_headers
|
|
863
|
+
if not request.supports_extra_headers:
|
|
827
864
|
kwargs.pop("extra_headers", None)
|
|
828
865
|
elif "extra_headers" not in kwargs and extra_headers:
|
|
829
866
|
# assumes anthropic and openai clients
|
|
@@ -899,7 +936,7 @@ class _PayiInstrumentor:
|
|
|
899
936
|
request=request,
|
|
900
937
|
)
|
|
901
938
|
|
|
902
|
-
if request.
|
|
939
|
+
if request.is_aws_client:
|
|
903
940
|
if "body" in response:
|
|
904
941
|
response["body"] = stream_result
|
|
905
942
|
else:
|
|
@@ -1084,9 +1121,10 @@ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
|
|
|
1084
1121
|
|
|
1085
1122
|
instrumentor._logger.debug(f"StreamIteratorWrapper: instance {instance}, category {request._category}")
|
|
1086
1123
|
|
|
1124
|
+
request.process_initial_stream_response(response)
|
|
1125
|
+
|
|
1087
1126
|
bedrock_from_stream: bool = False
|
|
1088
|
-
if request.
|
|
1089
|
-
request._ingest["provider_response_id"] = response["ResponseMetadata"]["RequestId"]
|
|
1127
|
+
if request.is_aws_client:
|
|
1090
1128
|
stream = response.get("stream", None)
|
|
1091
1129
|
|
|
1092
1130
|
if stream:
|
|
@@ -1108,7 +1146,6 @@ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
|
|
|
1108
1146
|
self._request: _ProviderRequest = request
|
|
1109
1147
|
|
|
1110
1148
|
self._first_token: bool = True
|
|
1111
|
-
self._is_bedrock: bool = request.is_bedrock()
|
|
1112
1149
|
self._bedrock_from_stream: bool = bedrock_from_stream
|
|
1113
1150
|
self._ingested: bool = False
|
|
1114
1151
|
self._iter_started: bool = False
|
|
@@ -1131,7 +1168,7 @@ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
|
|
|
1131
1168
|
|
|
1132
1169
|
def __iter__(self) -> Any:
|
|
1133
1170
|
self._iter_started = True
|
|
1134
|
-
if self.
|
|
1171
|
+
if self._request.is_aws_client:
|
|
1135
1172
|
# MUST reside in a separate function so that the yield statement (e.g. the generator) doesn't implicitly return its own iterator and overriding self
|
|
1136
1173
|
self._instrumentor._logger.debug(f"StreamIteratorWrapper: bedrock __iter__")
|
|
1137
1174
|
return self._iter_bedrock()
|
|
@@ -16,6 +16,14 @@ from .categories import (
|
|
|
16
16
|
CategoriesResourceWithStreamingResponse,
|
|
17
17
|
AsyncCategoriesResourceWithStreamingResponse,
|
|
18
18
|
)
|
|
19
|
+
from .fixed_cost_resources import (
|
|
20
|
+
FixedCostResourcesResource,
|
|
21
|
+
AsyncFixedCostResourcesResource,
|
|
22
|
+
FixedCostResourcesResourceWithRawResponse,
|
|
23
|
+
AsyncFixedCostResourcesResourceWithRawResponse,
|
|
24
|
+
FixedCostResourcesResourceWithStreamingResponse,
|
|
25
|
+
AsyncFixedCostResourcesResourceWithStreamingResponse,
|
|
26
|
+
)
|
|
19
27
|
|
|
20
28
|
__all__ = [
|
|
21
29
|
"ResourcesResource",
|
|
@@ -24,6 +32,12 @@ __all__ = [
|
|
|
24
32
|
"AsyncResourcesResourceWithRawResponse",
|
|
25
33
|
"ResourcesResourceWithStreamingResponse",
|
|
26
34
|
"AsyncResourcesResourceWithStreamingResponse",
|
|
35
|
+
"FixedCostResourcesResource",
|
|
36
|
+
"AsyncFixedCostResourcesResource",
|
|
37
|
+
"FixedCostResourcesResourceWithRawResponse",
|
|
38
|
+
"AsyncFixedCostResourcesResourceWithRawResponse",
|
|
39
|
+
"FixedCostResourcesResourceWithStreamingResponse",
|
|
40
|
+
"AsyncFixedCostResourcesResourceWithStreamingResponse",
|
|
27
41
|
"CategoriesResource",
|
|
28
42
|
"AsyncCategoriesResource",
|
|
29
43
|
"CategoriesResourceWithRawResponse",
|