payi 0.1.0a82__py3-none-any.whl → 0.1.0a84__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of payi might be problematic. Click here for more details.
- payi/_version.py +1 -1
- payi/lib/AnthropicInstrumentor.py +92 -62
- payi/lib/BedrockInstrumentor.py +95 -108
- payi/lib/GoogleGenAiInstrumentor.py +31 -115
- payi/lib/OpenAIInstrumentor.py +13 -9
- payi/lib/VertexInstrumentor.py +168 -111
- payi/lib/instrument.py +135 -73
- payi/resources/categories/__init__.py +14 -0
- payi/resources/categories/categories.py +32 -0
- payi/resources/categories/fixed_cost_resources.py +196 -0
- payi/resources/ingest.py +14 -0
- payi/resources/limits/limits.py +4 -0
- payi/types/categories/__init__.py +1 -0
- payi/types/categories/fixed_cost_resource_create_params.py +21 -0
- payi/types/ingest_event_param.py +13 -1
- payi/types/ingest_units_params.py +11 -1
- payi/types/limit_create_params.py +2 -0
- payi/types/limit_history_response.py +3 -3
- {payi-0.1.0a82.dist-info → payi-0.1.0a84.dist-info}/METADATA +1 -1
- {payi-0.1.0a82.dist-info → payi-0.1.0a84.dist-info}/RECORD +22 -20
- {payi-0.1.0a82.dist-info → payi-0.1.0a84.dist-info}/WHEEL +0 -0
- {payi-0.1.0a82.dist-info → payi-0.1.0a84.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import math
|
|
3
2
|
from typing import Any, List, Union, Optional, Sequence
|
|
4
3
|
from typing_extensions import override
|
|
5
4
|
|
|
6
5
|
from wrapt import wrap_function_wrapper # type: ignore
|
|
7
6
|
|
|
8
7
|
from payi.lib.helpers import PayiCategories
|
|
9
|
-
from payi.types.ingest_units_params import Units
|
|
10
8
|
|
|
11
|
-
from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
9
|
+
from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
class GoogleGenAiInstrumentor:
|
|
@@ -115,9 +113,6 @@ async def agenerate_stream_wrapper(
|
|
|
115
113
|
kwargs,
|
|
116
114
|
)
|
|
117
115
|
|
|
118
|
-
def count_chars_skip_spaces(text: str) -> int:
|
|
119
|
-
return sum(1 for c in text if not c.isspace())
|
|
120
|
-
|
|
121
116
|
class _GoogleGenAiRequest(_ProviderRequest):
|
|
122
117
|
def __init__(self, instrumentor: _PayiInstrumentor):
|
|
123
118
|
super().__init__(
|
|
@@ -126,7 +121,7 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
126
121
|
streaming_type=_StreamingType.generator,
|
|
127
122
|
)
|
|
128
123
|
self._prompt_character_count = 0
|
|
129
|
-
self.
|
|
124
|
+
self._candidates_character_count = 0
|
|
130
125
|
|
|
131
126
|
@override
|
|
132
127
|
def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
|
|
@@ -158,6 +153,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
158
153
|
if isinstance(value, list):
|
|
159
154
|
items = value # type: ignore
|
|
160
155
|
|
|
156
|
+
from .VertexInstrumentor import count_chars_skip_spaces
|
|
157
|
+
|
|
161
158
|
for item in items: # type: ignore
|
|
162
159
|
text = ""
|
|
163
160
|
if isinstance(item, Part):
|
|
@@ -248,7 +245,10 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
248
245
|
prompt["tool_config"] = tool_config
|
|
249
246
|
|
|
250
247
|
@override
|
|
251
|
-
def process_chunk(self, chunk: Any) ->
|
|
248
|
+
def process_chunk(self, chunk: Any) -> _ChunkResult:
|
|
249
|
+
from .VertexInstrumentor import vertex_compute_usage, count_chars_skip_spaces
|
|
250
|
+
|
|
251
|
+
ingest = False
|
|
252
252
|
response_dict: dict[str, Any] = chunk.to_json_dict()
|
|
253
253
|
if "provider_response_id" not in self._ingest:
|
|
254
254
|
id = response_dict.get("response_id", None)
|
|
@@ -259,20 +259,24 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
259
259
|
|
|
260
260
|
self._ingest["resource"] = "google." + model
|
|
261
261
|
|
|
262
|
+
|
|
262
263
|
for candidate in response_dict.get("candidates", []):
|
|
263
264
|
parts = candidate.get("content", {}).get("parts", [])
|
|
264
265
|
for part in parts:
|
|
265
|
-
self.
|
|
266
|
+
self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
|
|
266
267
|
|
|
267
268
|
usage = response_dict.get("usage_metadata", {})
|
|
268
269
|
if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
270
|
+
vertex_compute_usage(
|
|
271
|
+
request=self,
|
|
272
|
+
model=model,
|
|
273
|
+
response_dict=response_dict,
|
|
274
|
+
prompt_character_count=self._prompt_character_count,
|
|
275
|
+
streaming_candidates_characters=self._candidates_character_count
|
|
276
|
+
)
|
|
277
|
+
ingest = True
|
|
278
|
+
|
|
279
|
+
return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
|
|
276
280
|
|
|
277
281
|
@override
|
|
278
282
|
def process_synchronous_response(
|
|
@@ -282,6 +286,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
282
286
|
kwargs: Any) -> Any:
|
|
283
287
|
response_dict = response.to_json_dict()
|
|
284
288
|
|
|
289
|
+
from .VertexInstrumentor import vertex_compute_usage
|
|
290
|
+
|
|
285
291
|
id: Optional[str] = response_dict.get("response_id", None)
|
|
286
292
|
if id:
|
|
287
293
|
self._ingest["provider_response_id"] = id
|
|
@@ -290,105 +296,15 @@ class _GoogleGenAiRequest(_ProviderRequest):
|
|
|
290
296
|
if model:
|
|
291
297
|
self._ingest["resource"] = "google." + model
|
|
292
298
|
|
|
293
|
-
|
|
294
|
-
|
|
299
|
+
vertex_compute_usage(
|
|
300
|
+
request=self,
|
|
301
|
+
model=model,
|
|
302
|
+
response_dict=response_dict,
|
|
303
|
+
prompt_character_count=self._prompt_character_count,
|
|
304
|
+
streaming_candidates_characters=self._candidates_character_count
|
|
305
|
+
)
|
|
306
|
+
|
|
295
307
|
if log_prompt_and_response:
|
|
296
308
|
self._ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
297
309
|
|
|
298
|
-
return None
|
|
299
|
-
|
|
300
|
-
def add_units(self, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
301
|
-
if key not in self._ingest["units"]:
|
|
302
|
-
self._ingest["units"][key] = {}
|
|
303
|
-
if input is not None:
|
|
304
|
-
self._ingest["units"][key]["input"] = input
|
|
305
|
-
if output is not None:
|
|
306
|
-
self._ingest["units"][key]["output"] = output
|
|
307
|
-
|
|
308
|
-
def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
|
|
309
|
-
usage = response_dict.get("usage_metadata", {})
|
|
310
|
-
input = usage.get("prompt_token_count", 0)
|
|
311
|
-
|
|
312
|
-
prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
|
|
313
|
-
candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
|
|
314
|
-
|
|
315
|
-
model: str = response_dict.get("model_version", "")
|
|
316
|
-
|
|
317
|
-
# for character billing only
|
|
318
|
-
large_context = "" if input < 128000 else "_large_context"
|
|
319
|
-
|
|
320
|
-
if self._is_character_billing_model(model):
|
|
321
|
-
for details in prompt_tokens_details:
|
|
322
|
-
modality = details.get("modality", "")
|
|
323
|
-
if not modality:
|
|
324
|
-
continue
|
|
325
|
-
|
|
326
|
-
modality_token_count = details.get("token_count", 0)
|
|
327
|
-
if modality == "TEXT":
|
|
328
|
-
input = self._prompt_character_count
|
|
329
|
-
if input == 0:
|
|
330
|
-
# back up calc if nothing was calculated from the prompt
|
|
331
|
-
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
332
|
-
|
|
333
|
-
output = 0
|
|
334
|
-
if streaming_candidates_characters is None:
|
|
335
|
-
for candidate in response_dict.get("candidates", []):
|
|
336
|
-
parts = candidate.get("content", {}).get("parts", [])
|
|
337
|
-
for part in parts:
|
|
338
|
-
output += count_chars_skip_spaces(part.get("text", ""))
|
|
339
|
-
|
|
340
|
-
if output == 0:
|
|
341
|
-
# back up calc if no parts
|
|
342
|
-
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
343
|
-
else:
|
|
344
|
-
output = streaming_candidates_characters
|
|
345
|
-
|
|
346
|
-
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
347
|
-
|
|
348
|
-
elif modality == "IMAGE":
|
|
349
|
-
num_images = math.ceil(modality_token_count / 258)
|
|
350
|
-
self.add_units("vision"+large_context, input=num_images)
|
|
351
|
-
|
|
352
|
-
elif modality == "VIDEO":
|
|
353
|
-
video_seconds = math.ceil(modality_token_count / 285)
|
|
354
|
-
self.add_units("video"+large_context, input=video_seconds)
|
|
355
|
-
|
|
356
|
-
elif modality == "AUDIO":
|
|
357
|
-
audio_seconds = math.ceil(modality_token_count / 25)
|
|
358
|
-
self.add_units("audio"+large_context, input=audio_seconds)
|
|
359
|
-
|
|
360
|
-
else:
|
|
361
|
-
for details in prompt_tokens_details:
|
|
362
|
-
modality = details.get("modality", "")
|
|
363
|
-
if not modality:
|
|
364
|
-
continue
|
|
365
|
-
|
|
366
|
-
modality_token_count = details.get("token_count", 0)
|
|
367
|
-
if modality == "IMAGE":
|
|
368
|
-
self.add_units("vision", input=modality_token_count)
|
|
369
|
-
elif modality in ("VIDEO", "AUDIO", "TEXT"):
|
|
370
|
-
self.add_units(modality.lower(), input=modality_token_count)
|
|
371
|
-
for details in candidates_tokens_details:
|
|
372
|
-
modality = details.get("modality", "")
|
|
373
|
-
if not modality:
|
|
374
|
-
continue
|
|
375
|
-
|
|
376
|
-
modality_token_count = details.get("token_count", 0)
|
|
377
|
-
if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
|
|
378
|
-
self.add_units(modality.lower(), output=modality_token_count)
|
|
379
|
-
|
|
380
|
-
if not self._ingest["units"]:
|
|
381
|
-
input = usage.get("prompt_token_count", 0)
|
|
382
|
-
output = usage.get("candidates_token_count", 0) * 4
|
|
383
|
-
|
|
384
|
-
if self._is_character_billing_model(model):
|
|
385
|
-
if self._prompt_character_count > 0:
|
|
386
|
-
input = self._prompt_character_count
|
|
387
|
-
else:
|
|
388
|
-
input *= 4
|
|
389
|
-
|
|
390
|
-
# if no units were added, add a default unit and assume 4 characters per token
|
|
391
|
-
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
392
|
-
else:
|
|
393
|
-
# if no units were added, add a default unit
|
|
394
|
-
self._ingest["units"]["text"] = Units(input=input, output=output)
|
|
310
|
+
return None
|
payi/lib/OpenAIInstrumentor.py
CHANGED
|
@@ -9,7 +9,7 @@ from wrapt import wrap_function_wrapper # type: ignore
|
|
|
9
9
|
from payi.lib.helpers import PayiCategories, PayiHeaderNames
|
|
10
10
|
from payi.types.ingest_units_params import Units
|
|
11
11
|
|
|
12
|
-
from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
12
|
+
from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class OpenAiInstrumentor:
|
|
@@ -22,8 +22,6 @@ class OpenAiInstrumentor:
|
|
|
22
22
|
@staticmethod
|
|
23
23
|
def instrument(instrumentor: _PayiInstrumentor) -> None:
|
|
24
24
|
try:
|
|
25
|
-
from openai import OpenAI # type: ignore # noqa: F401 I001
|
|
26
|
-
|
|
27
25
|
wrap_function_wrapper(
|
|
28
26
|
"openai.resources.chat.completions",
|
|
29
27
|
"Completions.create",
|
|
@@ -47,7 +45,11 @@ class OpenAiInstrumentor:
|
|
|
47
45
|
"AsyncEmbeddings.create",
|
|
48
46
|
aembeddings_wrapper(instrumentor),
|
|
49
47
|
)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
instrumentor._logger.debug(f"Error instrumenting openai: {e}")
|
|
50
50
|
|
|
51
|
+
# responses separately as they are relatively new and the client may not be using the latest openai module
|
|
52
|
+
try:
|
|
51
53
|
wrap_function_wrapper(
|
|
52
54
|
"openai.resources.responses",
|
|
53
55
|
"Responses.create",
|
|
@@ -62,8 +64,6 @@ class OpenAiInstrumentor:
|
|
|
62
64
|
|
|
63
65
|
except Exception as e:
|
|
64
66
|
instrumentor._logger.debug(f"Error instrumenting openai: {e}")
|
|
65
|
-
return
|
|
66
|
-
|
|
67
67
|
|
|
68
68
|
@_PayiInstrumentor.payi_wrapper
|
|
69
69
|
def embeddings_wrapper(
|
|
@@ -338,7 +338,8 @@ class _OpenAiChatProviderRequest(_OpenAiProviderRequest):
|
|
|
338
338
|
self._include_usage_added = False
|
|
339
339
|
|
|
340
340
|
@override
|
|
341
|
-
def process_chunk(self, chunk: Any) ->
|
|
341
|
+
def process_chunk(self, chunk: Any) -> _ChunkResult:
|
|
342
|
+
ingest = False
|
|
342
343
|
model = model_to_dict(chunk)
|
|
343
344
|
|
|
344
345
|
if "provider_response_id" not in self._ingest:
|
|
@@ -356,8 +357,9 @@ class _OpenAiChatProviderRequest(_OpenAiProviderRequest):
|
|
|
356
357
|
# packet which contains the usage to the client as they are not expecting the data
|
|
357
358
|
if self._include_usage_added:
|
|
358
359
|
send_chunk_to_client = False
|
|
360
|
+
ingest = True
|
|
359
361
|
|
|
360
|
-
return send_chunk_to_client
|
|
362
|
+
return _ChunkResult(send_chunk_to_caller=send_chunk_to_client, ingest=ingest)
|
|
361
363
|
|
|
362
364
|
@override
|
|
363
365
|
def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
|
|
@@ -420,7 +422,8 @@ class _OpenAiResponsesProviderRequest(_OpenAiProviderRequest):
|
|
|
420
422
|
input_tokens_details_key=_OpenAiProviderRequest.responses_input_tokens_details_key)
|
|
421
423
|
|
|
422
424
|
@override
|
|
423
|
-
def process_chunk(self, chunk: Any) ->
|
|
425
|
+
def process_chunk(self, chunk: Any) -> _ChunkResult:
|
|
426
|
+
ingest = False
|
|
424
427
|
model = model_to_dict(chunk)
|
|
425
428
|
response: dict[str, Any] = model.get("response", {})
|
|
426
429
|
|
|
@@ -432,8 +435,9 @@ class _OpenAiResponsesProviderRequest(_OpenAiProviderRequest):
|
|
|
432
435
|
usage = response.get("usage")
|
|
433
436
|
if usage:
|
|
434
437
|
self.add_usage_units(usage)
|
|
438
|
+
ingest = True
|
|
435
439
|
|
|
436
|
-
return True
|
|
440
|
+
return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
|
|
437
441
|
|
|
438
442
|
@override
|
|
439
443
|
def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
|
payi/lib/VertexInstrumentor.py
CHANGED
|
@@ -8,33 +8,37 @@ from wrapt import wrap_function_wrapper # type: ignore
|
|
|
8
8
|
from payi.lib.helpers import PayiCategories
|
|
9
9
|
from payi.types.ingest_units_params import Units
|
|
10
10
|
|
|
11
|
-
from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
11
|
+
from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class VertexInstrumentor:
|
|
15
15
|
@staticmethod
|
|
16
16
|
def instrument(instrumentor: _PayiInstrumentor) -> None:
|
|
17
17
|
try:
|
|
18
|
-
import vertexai # type: ignore # noqa: F401 I001
|
|
19
|
-
|
|
20
18
|
wrap_function_wrapper(
|
|
21
19
|
"vertexai.generative_models",
|
|
22
20
|
"GenerativeModel.generate_content",
|
|
23
21
|
generate_wrapper(instrumentor),
|
|
24
22
|
)
|
|
25
23
|
|
|
26
|
-
wrap_function_wrapper(
|
|
27
|
-
"vertexai.preview.generative_models",
|
|
28
|
-
"GenerativeModel.generate_content",
|
|
29
|
-
generate_wrapper(instrumentor),
|
|
30
|
-
)
|
|
31
|
-
|
|
32
24
|
wrap_function_wrapper(
|
|
33
25
|
"vertexai.generative_models",
|
|
34
26
|
"GenerativeModel.generate_content_async",
|
|
35
27
|
agenerate_wrapper(instrumentor),
|
|
36
28
|
)
|
|
37
29
|
|
|
30
|
+
except Exception as e:
|
|
31
|
+
instrumentor._logger.debug(f"Error instrumenting vertex: {e}")
|
|
32
|
+
return
|
|
33
|
+
|
|
34
|
+
# separate instrumetning preview functionality from released in case it fails
|
|
35
|
+
try:
|
|
36
|
+
wrap_function_wrapper(
|
|
37
|
+
"vertexai.preview.generative_models",
|
|
38
|
+
"GenerativeModel.generate_content",
|
|
39
|
+
generate_wrapper(instrumentor),
|
|
40
|
+
)
|
|
41
|
+
|
|
38
42
|
wrap_function_wrapper(
|
|
39
43
|
"vertexai.preview.generative_models",
|
|
40
44
|
"GenerativeModel.generate_content_async",
|
|
@@ -92,12 +96,20 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
92
96
|
streaming_type=_StreamingType.generator,
|
|
93
97
|
)
|
|
94
98
|
self._prompt_character_count = 0
|
|
95
|
-
self.
|
|
99
|
+
self._candidates_character_count = 0
|
|
100
|
+
self._model_name: Optional[str] = None
|
|
96
101
|
|
|
97
102
|
@override
|
|
98
103
|
def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
|
|
99
104
|
from vertexai.generative_models import Content, Image, Part # type: ignore # noqa: F401 I001
|
|
100
105
|
|
|
106
|
+
# Try to extra the model name as a backup if the response does not provide it (older vertexai versions do not)
|
|
107
|
+
if instance and hasattr(instance, "_model_name"):
|
|
108
|
+
model = instance._model_name
|
|
109
|
+
if model and isinstance(model, str):
|
|
110
|
+
# Extract the model name after the last slash
|
|
111
|
+
self._model_name = model.split('/')[-1]
|
|
112
|
+
|
|
101
113
|
if not args:
|
|
102
114
|
return True
|
|
103
115
|
|
|
@@ -191,32 +203,44 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
191
203
|
# tool_config does not have to_dict or any other serializable object
|
|
192
204
|
prompt["tool_config"] = str(tool_config) # type: ignore
|
|
193
205
|
|
|
206
|
+
def _get_model_name(self, response: 'dict[str, Any]') -> Optional[str]:
|
|
207
|
+
model: Optional[str] = response.get("model_version", None)
|
|
208
|
+
if model:
|
|
209
|
+
return model
|
|
210
|
+
|
|
211
|
+
return self._model_name
|
|
212
|
+
|
|
194
213
|
@override
|
|
195
|
-
def process_chunk(self, chunk: Any) ->
|
|
214
|
+
def process_chunk(self, chunk: Any) -> _ChunkResult:
|
|
215
|
+
ingest = False
|
|
196
216
|
response_dict: dict[str, Any] = chunk.to_dict()
|
|
197
217
|
if "provider_response_id" not in self._ingest:
|
|
198
218
|
id = response_dict.get("response_id", None)
|
|
199
219
|
if id:
|
|
200
220
|
self._ingest["provider_response_id"] = id
|
|
201
221
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
222
|
+
if "resource" not in self._ingest:
|
|
223
|
+
model: Optional[str] = self._get_model_name(response_dict) # type: ignore[unreachable]
|
|
224
|
+
if model:
|
|
225
|
+
self._ingest["resource"] = "google." + model
|
|
205
226
|
|
|
206
227
|
for candidate in response_dict.get("candidates", []):
|
|
207
228
|
parts = candidate.get("content", {}).get("parts", [])
|
|
208
229
|
for part in parts:
|
|
209
|
-
self.
|
|
230
|
+
self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
|
|
210
231
|
|
|
211
232
|
usage = response_dict.get("usage_metadata", {})
|
|
212
233
|
if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
|
|
213
|
-
|
|
234
|
+
vertex_compute_usage(
|
|
235
|
+
request=self,
|
|
236
|
+
model=self._get_model_name(response_dict),
|
|
237
|
+
response_dict=response_dict,
|
|
238
|
+
prompt_character_count=self._prompt_character_count,
|
|
239
|
+
streaming_candidates_characters=self._candidates_character_count,
|
|
240
|
+
)
|
|
241
|
+
ingest = True
|
|
214
242
|
|
|
215
|
-
return True
|
|
216
|
-
|
|
217
|
-
@staticmethod
|
|
218
|
-
def _is_character_billing_model(model: str) -> bool:
|
|
219
|
-
return model.startswith("gemini-1.")
|
|
243
|
+
return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
|
|
220
244
|
|
|
221
245
|
@override
|
|
222
246
|
def process_synchronous_response(
|
|
@@ -230,110 +254,143 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
230
254
|
if id:
|
|
231
255
|
self._ingest["provider_response_id"] = id
|
|
232
256
|
|
|
233
|
-
model: Optional[str] =
|
|
257
|
+
model: Optional[str] = self._get_model_name(response_dict)
|
|
234
258
|
if model:
|
|
235
259
|
self._ingest["resource"] = "google." + model
|
|
236
260
|
|
|
237
|
-
|
|
261
|
+
vertex_compute_usage(
|
|
262
|
+
request=self,
|
|
263
|
+
model=model,
|
|
264
|
+
response_dict=response_dict,
|
|
265
|
+
prompt_character_count=self._prompt_character_count,
|
|
266
|
+
streaming_candidates_characters=self._candidates_character_count
|
|
267
|
+
)
|
|
238
268
|
|
|
239
269
|
if log_prompt_and_response:
|
|
240
270
|
self._ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
241
271
|
|
|
242
272
|
return None
|
|
243
273
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
274
|
+
def vertex_compute_usage(
|
|
275
|
+
request: _ProviderRequest,
|
|
276
|
+
model: Optional[str],
|
|
277
|
+
response_dict: 'dict[str, Any]',
|
|
278
|
+
prompt_character_count: int = 0,
|
|
279
|
+
streaming_candidates_characters: Optional[int] = None) -> None:
|
|
280
|
+
|
|
281
|
+
def is_character_billing_model(model: str) -> bool:
|
|
282
|
+
return model.startswith("gemini-1.")
|
|
283
|
+
|
|
284
|
+
def is_large_context_token_model(model: str, input_tokens: int) -> bool:
|
|
285
|
+
return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
|
|
286
|
+
|
|
287
|
+
def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
288
|
+
if key not in request._ingest["units"]:
|
|
289
|
+
request._ingest["units"][key] = {}
|
|
247
290
|
if input is not None:
|
|
248
|
-
|
|
291
|
+
request._ingest["units"][key]["input"] = input
|
|
249
292
|
if output is not None:
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
|
|
253
|
-
usage = response_dict.get("usage_metadata", {})
|
|
254
|
-
input = usage.get("prompt_token_count", 0)
|
|
293
|
+
request._ingest["units"][key]["output"] = output
|
|
255
294
|
|
|
256
|
-
|
|
257
|
-
|
|
295
|
+
usage = response_dict.get("usage_metadata", {})
|
|
296
|
+
input = usage.get("prompt_token_count", 0)
|
|
258
297
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
# for character billing only
|
|
262
|
-
large_context = "" if input < 128000 else "_large_context"
|
|
263
|
-
|
|
264
|
-
if self._is_character_billing_model(model):
|
|
265
|
-
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
266
|
-
for details in prompt_tokens_details:
|
|
267
|
-
modality = details.get("modality", "")
|
|
268
|
-
if not modality:
|
|
269
|
-
continue
|
|
270
|
-
|
|
271
|
-
modality_token_count = details.get("token_count", 0)
|
|
272
|
-
if modality == "TEXT":
|
|
273
|
-
input = self._prompt_character_count
|
|
274
|
-
if input == 0:
|
|
275
|
-
# back up calc if nothing was calculated from the prompt
|
|
276
|
-
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
277
|
-
|
|
278
|
-
output = 0
|
|
279
|
-
if streaming_candidates_characters is None:
|
|
280
|
-
for candidate in response_dict.get("candidates", []):
|
|
281
|
-
parts = candidate.get("content", {}).get("parts", [])
|
|
282
|
-
for part in parts:
|
|
283
|
-
output += count_chars_skip_spaces(part.get("text", ""))
|
|
284
|
-
|
|
285
|
-
if output == 0:
|
|
286
|
-
# back up calc if no parts
|
|
287
|
-
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
288
|
-
else:
|
|
289
|
-
output = streaming_candidates_characters
|
|
290
|
-
|
|
291
|
-
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
292
|
-
|
|
293
|
-
elif modality == "IMAGE":
|
|
294
|
-
num_images = math.ceil(modality_token_count / 258)
|
|
295
|
-
self.add_units("vision"+large_context, input=num_images)
|
|
296
|
-
|
|
297
|
-
elif modality == "VIDEO":
|
|
298
|
-
video_seconds = math.ceil(modality_token_count / 285)
|
|
299
|
-
self.add_units("video"+large_context, input=video_seconds)
|
|
300
|
-
|
|
301
|
-
elif modality == "AUDIO":
|
|
302
|
-
audio_seconds = math.ceil(modality_token_count / 25)
|
|
303
|
-
self.add_units("audio"+large_context, input=audio_seconds)
|
|
298
|
+
prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
|
|
299
|
+
candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
|
|
304
300
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
301
|
+
if not model:
|
|
302
|
+
model = ""
|
|
303
|
+
|
|
304
|
+
large_context = ""
|
|
305
|
+
|
|
306
|
+
if is_character_billing_model(model):
|
|
307
|
+
if input > 128000:
|
|
308
|
+
large_context = "_large_context"
|
|
309
|
+
|
|
310
|
+
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
311
|
+
for details in prompt_tokens_details:
|
|
312
|
+
modality = details.get("modality", "")
|
|
313
|
+
if not modality:
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
modality_token_count = details.get("token_count", 0)
|
|
317
|
+
if modality == "TEXT":
|
|
318
|
+
input = prompt_character_count
|
|
319
|
+
if input == 0:
|
|
320
|
+
# back up calc if nothing was calculated from the prompt
|
|
321
|
+
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
322
|
+
|
|
323
|
+
output = 0
|
|
324
|
+
if streaming_candidates_characters is None:
|
|
325
|
+
for candidate in response_dict.get("candidates", []):
|
|
326
|
+
parts = candidate.get("content", {}).get("parts", [])
|
|
327
|
+
for part in parts:
|
|
328
|
+
output += count_chars_skip_spaces(part.get("text", ""))
|
|
329
|
+
|
|
330
|
+
if output == 0:
|
|
331
|
+
# back up calc if no parts
|
|
332
|
+
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
332
333
|
else:
|
|
333
|
-
|
|
334
|
+
output = streaming_candidates_characters
|
|
335
|
+
|
|
336
|
+
request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
337
|
+
|
|
338
|
+
elif modality == "IMAGE":
|
|
339
|
+
num_images = math.ceil(modality_token_count / 258)
|
|
340
|
+
add_units(request, "vision"+large_context, input=num_images)
|
|
334
341
|
|
|
335
|
-
|
|
336
|
-
|
|
342
|
+
elif modality == "VIDEO":
|
|
343
|
+
video_seconds = math.ceil(modality_token_count / 285)
|
|
344
|
+
add_units(request, "video"+large_context, input=video_seconds)
|
|
345
|
+
|
|
346
|
+
elif modality == "AUDIO":
|
|
347
|
+
audio_seconds = math.ceil(modality_token_count / 25)
|
|
348
|
+
add_units(request, "audio"+large_context, input=audio_seconds)
|
|
349
|
+
|
|
350
|
+
# No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
|
|
351
|
+
# for details in candidates_tokens_details:
|
|
352
|
+
|
|
353
|
+
else:
|
|
354
|
+
# thinking tokens introduced in 2.5 after the transition to token based billing
|
|
355
|
+
thinking_token_count = usage.get("thoughts_token_count", 0)
|
|
356
|
+
|
|
357
|
+
if is_large_context_token_model(model, input):
|
|
358
|
+
large_context = "_large_context"
|
|
359
|
+
|
|
360
|
+
for details in prompt_tokens_details:
|
|
361
|
+
modality = details.get("modality", "")
|
|
362
|
+
if not modality:
|
|
363
|
+
continue
|
|
364
|
+
|
|
365
|
+
modality_token_count = details.get("token_count", 0)
|
|
366
|
+
if modality == "IMAGE":
|
|
367
|
+
add_units(request, "vision"+large_context, input=modality_token_count)
|
|
368
|
+
elif modality in ("VIDEO", "AUDIO", "TEXT"):
|
|
369
|
+
add_units(request, modality.lower()+large_context, input=modality_token_count)
|
|
370
|
+
for details in candidates_tokens_details:
|
|
371
|
+
modality = details.get("modality", "")
|
|
372
|
+
if not modality:
|
|
373
|
+
continue
|
|
374
|
+
|
|
375
|
+
modality_token_count = details.get("token_count", 0)
|
|
376
|
+
if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
|
|
377
|
+
add_units(request, modality.lower()+large_context, output=modality_token_count)
|
|
378
|
+
|
|
379
|
+
if thinking_token_count > 0:
|
|
380
|
+
add_units(request, "reasoning"+large_context, output=thinking_token_count)
|
|
381
|
+
|
|
382
|
+
if not request._ingest["units"]:
|
|
383
|
+
input = usage.get("prompt_token_count", 0)
|
|
384
|
+
output = usage.get("candidates_token_count", 0) * 4
|
|
385
|
+
|
|
386
|
+
if is_character_billing_model(model):
|
|
387
|
+
if prompt_character_count > 0:
|
|
388
|
+
input = prompt_character_count
|
|
337
389
|
else:
|
|
338
|
-
|
|
339
|
-
|
|
390
|
+
input *= 4
|
|
391
|
+
|
|
392
|
+
# if no units were added, add a default unit and assume 4 characters per token
|
|
393
|
+
request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
394
|
+
else:
|
|
395
|
+
# if no units were added, add a default unit
|
|
396
|
+
request._ingest["units"]["text"] = Units(input=input, output=output)
|