payi 0.1.0a85__py3-none-any.whl → 0.1.0a87__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of payi might be problematic. Click here for more details.
- payi/_version.py +1 -1
- payi/lib/AnthropicInstrumentor.py +56 -3
- payi/lib/BedrockInstrumentor.py +90 -16
- payi/lib/GoogleGenAiInstrumentor.py +11 -62
- payi/lib/OpenAIInstrumentor.py +56 -2
- payi/lib/VertexInstrumentor.py +10 -196
- payi/lib/VertexRequest.py +237 -0
- payi/lib/instrument.py +54 -222
- {payi-0.1.0a85.dist-info → payi-0.1.0a87.dist-info}/METADATA +1 -1
- {payi-0.1.0a85.dist-info → payi-0.1.0a87.dist-info}/RECORD +12 -11
- {payi-0.1.0a85.dist-info → payi-0.1.0a87.dist-info}/WHEEL +0 -0
- {payi-0.1.0a85.dist-info → payi-0.1.0a87.dist-info}/licenses/LICENSE +0 -0
payi/lib/VertexInstrumentor.py
CHANGED
|
@@ -1,14 +1,10 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import math
|
|
3
1
|
from typing import Any, List, Union, Optional, Sequence
|
|
4
2
|
from typing_extensions import override
|
|
5
3
|
|
|
6
4
|
from wrapt import wrap_function_wrapper # type: ignore
|
|
7
5
|
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
|
|
11
|
-
from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
6
|
+
from .instrument import _ChunkResult, _IsStreaming, _PayiInstrumentor
|
|
7
|
+
from .VertexRequest import _VertexRequest
|
|
12
8
|
|
|
13
9
|
|
|
14
10
|
class VertexInstrumentor:
|
|
@@ -85,16 +81,10 @@ async def agenerate_wrapper(
|
|
|
85
81
|
kwargs,
|
|
86
82
|
)
|
|
87
83
|
|
|
88
|
-
|
|
89
|
-
return sum(1 for c in text if not c.isspace())
|
|
90
|
-
|
|
91
|
-
class _GoogleVertexRequest(_ProviderRequest):
|
|
84
|
+
class _GoogleVertexRequest(_VertexRequest):
|
|
92
85
|
def __init__(self, instrumentor: _PayiInstrumentor):
|
|
93
86
|
super().__init__(
|
|
94
87
|
instrumentor=instrumentor,
|
|
95
|
-
category=PayiCategories.google_vertex,
|
|
96
|
-
streaming_type=_StreamingType.generator,
|
|
97
|
-
is_google_vertex_or_genai_client=True,
|
|
98
88
|
)
|
|
99
89
|
self._prompt_character_count = 0
|
|
100
90
|
self._candidates_character_count = 0
|
|
@@ -144,7 +134,7 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
144
134
|
text = item
|
|
145
135
|
|
|
146
136
|
if text != "":
|
|
147
|
-
self._prompt_character_count += count_chars_skip_spaces(text) # type: ignore
|
|
137
|
+
self._prompt_character_count += self.count_chars_skip_spaces(text) # type: ignore
|
|
148
138
|
|
|
149
139
|
return True
|
|
150
140
|
|
|
@@ -204,44 +194,9 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
204
194
|
# tool_config does not have to_dict or any other serializable object
|
|
205
195
|
prompt["tool_config"] = str(tool_config) # type: ignore
|
|
206
196
|
|
|
207
|
-
def _get_model_name(self, response: 'dict[str, Any]') -> Optional[str]:
|
|
208
|
-
model: Optional[str] = response.get("model_version", None)
|
|
209
|
-
if model:
|
|
210
|
-
return model
|
|
211
|
-
|
|
212
|
-
return self._model_name
|
|
213
|
-
|
|
214
197
|
@override
|
|
215
198
|
def process_chunk(self, chunk: Any) -> _ChunkResult:
|
|
216
|
-
|
|
217
|
-
response_dict: dict[str, Any] = chunk.to_dict()
|
|
218
|
-
if "provider_response_id" not in self._ingest:
|
|
219
|
-
id = response_dict.get("response_id", None)
|
|
220
|
-
if id:
|
|
221
|
-
self._ingest["provider_response_id"] = id
|
|
222
|
-
|
|
223
|
-
if "resource" not in self._ingest:
|
|
224
|
-
model: Optional[str] = self._get_model_name(response_dict) # type: ignore[unreachable]
|
|
225
|
-
if model:
|
|
226
|
-
self._ingest["resource"] = "google." + model
|
|
227
|
-
|
|
228
|
-
for candidate in response_dict.get("candidates", []):
|
|
229
|
-
parts = candidate.get("content", {}).get("parts", [])
|
|
230
|
-
for part in parts:
|
|
231
|
-
self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
|
|
232
|
-
|
|
233
|
-
usage = response_dict.get("usage_metadata", {})
|
|
234
|
-
if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
|
|
235
|
-
vertex_compute_usage(
|
|
236
|
-
request=self,
|
|
237
|
-
model=self._get_model_name(response_dict),
|
|
238
|
-
response_dict=response_dict,
|
|
239
|
-
prompt_character_count=self._prompt_character_count,
|
|
240
|
-
streaming_candidates_characters=self._candidates_character_count,
|
|
241
|
-
)
|
|
242
|
-
ingest = True
|
|
243
|
-
|
|
244
|
-
return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
|
|
199
|
+
return self.process_chunk_dict(response_dict=chunk.to_dict())
|
|
245
200
|
|
|
246
201
|
@override
|
|
247
202
|
def process_synchronous_response(
|
|
@@ -249,149 +204,8 @@ class _GoogleVertexRequest(_ProviderRequest):
|
|
|
249
204
|
response: Any,
|
|
250
205
|
log_prompt_and_response: bool,
|
|
251
206
|
kwargs: Any) -> Any:
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
model: Optional[str] = self._get_model_name(response_dict)
|
|
259
|
-
if model:
|
|
260
|
-
self._ingest["resource"] = "google." + model
|
|
261
|
-
|
|
262
|
-
vertex_compute_usage(
|
|
263
|
-
request=self,
|
|
264
|
-
model=model,
|
|
265
|
-
response_dict=response_dict,
|
|
266
|
-
prompt_character_count=self._prompt_character_count,
|
|
267
|
-
streaming_candidates_characters=self._candidates_character_count
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
if log_prompt_and_response:
|
|
271
|
-
self._ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
272
|
-
|
|
273
|
-
return None
|
|
274
|
-
|
|
275
|
-
def vertex_compute_usage(
|
|
276
|
-
request: _ProviderRequest,
|
|
277
|
-
model: Optional[str],
|
|
278
|
-
response_dict: 'dict[str, Any]',
|
|
279
|
-
prompt_character_count: int = 0,
|
|
280
|
-
streaming_candidates_characters: Optional[int] = None) -> None:
|
|
281
|
-
|
|
282
|
-
def is_character_billing_model(model: str) -> bool:
|
|
283
|
-
return model.startswith("gemini-1.")
|
|
284
|
-
|
|
285
|
-
def is_large_context_token_model(model: str, input_tokens: int) -> bool:
|
|
286
|
-
return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
|
|
287
|
-
|
|
288
|
-
def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
289
|
-
if key not in request._ingest["units"]:
|
|
290
|
-
request._ingest["units"][key] = {}
|
|
291
|
-
if input is not None:
|
|
292
|
-
request._ingest["units"][key]["input"] = input
|
|
293
|
-
if output is not None:
|
|
294
|
-
request._ingest["units"][key]["output"] = output
|
|
295
|
-
|
|
296
|
-
usage = response_dict.get("usage_metadata", {})
|
|
297
|
-
input = usage.get("prompt_token_count", 0)
|
|
298
|
-
|
|
299
|
-
prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
|
|
300
|
-
candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
|
|
301
|
-
|
|
302
|
-
if not model:
|
|
303
|
-
model = ""
|
|
304
|
-
|
|
305
|
-
large_context = ""
|
|
306
|
-
|
|
307
|
-
if is_character_billing_model(model):
|
|
308
|
-
if input > 128000:
|
|
309
|
-
large_context = "_large_context"
|
|
310
|
-
|
|
311
|
-
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
312
|
-
for details in prompt_tokens_details:
|
|
313
|
-
modality = details.get("modality", "")
|
|
314
|
-
if not modality:
|
|
315
|
-
continue
|
|
316
|
-
|
|
317
|
-
modality_token_count = details.get("token_count", 0)
|
|
318
|
-
if modality == "TEXT":
|
|
319
|
-
input = prompt_character_count
|
|
320
|
-
if input == 0:
|
|
321
|
-
# back up calc if nothing was calculated from the prompt
|
|
322
|
-
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
323
|
-
|
|
324
|
-
output = 0
|
|
325
|
-
if streaming_candidates_characters is None:
|
|
326
|
-
for candidate in response_dict.get("candidates", []):
|
|
327
|
-
parts = candidate.get("content", {}).get("parts", [])
|
|
328
|
-
for part in parts:
|
|
329
|
-
output += count_chars_skip_spaces(part.get("text", ""))
|
|
330
|
-
|
|
331
|
-
if output == 0:
|
|
332
|
-
# back up calc if no parts
|
|
333
|
-
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
334
|
-
else:
|
|
335
|
-
output = streaming_candidates_characters
|
|
336
|
-
|
|
337
|
-
request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
338
|
-
|
|
339
|
-
elif modality == "IMAGE":
|
|
340
|
-
num_images = math.ceil(modality_token_count / 258)
|
|
341
|
-
add_units(request, "vision"+large_context, input=num_images)
|
|
342
|
-
|
|
343
|
-
elif modality == "VIDEO":
|
|
344
|
-
video_seconds = math.ceil(modality_token_count / 285)
|
|
345
|
-
add_units(request, "video"+large_context, input=video_seconds)
|
|
346
|
-
|
|
347
|
-
elif modality == "AUDIO":
|
|
348
|
-
audio_seconds = math.ceil(modality_token_count / 25)
|
|
349
|
-
add_units(request, "audio"+large_context, input=audio_seconds)
|
|
350
|
-
|
|
351
|
-
# No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
|
|
352
|
-
# for details in candidates_tokens_details:
|
|
353
|
-
|
|
354
|
-
else:
|
|
355
|
-
# thinking tokens introduced in 2.5 after the transition to token based billing
|
|
356
|
-
thinking_token_count = usage.get("thoughts_token_count", 0)
|
|
357
|
-
|
|
358
|
-
if is_large_context_token_model(model, input):
|
|
359
|
-
large_context = "_large_context"
|
|
360
|
-
|
|
361
|
-
for details in prompt_tokens_details:
|
|
362
|
-
modality = details.get("modality", "")
|
|
363
|
-
if not modality:
|
|
364
|
-
continue
|
|
365
|
-
|
|
366
|
-
modality_token_count = details.get("token_count", 0)
|
|
367
|
-
if modality == "IMAGE":
|
|
368
|
-
add_units(request, "vision"+large_context, input=modality_token_count)
|
|
369
|
-
elif modality in ("VIDEO", "AUDIO", "TEXT"):
|
|
370
|
-
add_units(request, modality.lower()+large_context, input=modality_token_count)
|
|
371
|
-
for details in candidates_tokens_details:
|
|
372
|
-
modality = details.get("modality", "")
|
|
373
|
-
if not modality:
|
|
374
|
-
continue
|
|
375
|
-
|
|
376
|
-
modality_token_count = details.get("token_count", 0)
|
|
377
|
-
if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
|
|
378
|
-
add_units(request, modality.lower()+large_context, output=modality_token_count)
|
|
379
|
-
|
|
380
|
-
if thinking_token_count > 0:
|
|
381
|
-
add_units(request, "reasoning"+large_context, output=thinking_token_count)
|
|
382
|
-
|
|
383
|
-
if not request._ingest["units"]:
|
|
384
|
-
input = usage.get("prompt_token_count", 0)
|
|
385
|
-
output = usage.get("candidates_token_count", 0) * 4
|
|
386
|
-
|
|
387
|
-
if is_character_billing_model(model):
|
|
388
|
-
if prompt_character_count > 0:
|
|
389
|
-
input = prompt_character_count
|
|
390
|
-
else:
|
|
391
|
-
input *= 4
|
|
392
|
-
|
|
393
|
-
# if no units were added, add a default unit and assume 4 characters per token
|
|
394
|
-
request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
395
|
-
else:
|
|
396
|
-
# if no units were added, add a default unit
|
|
397
|
-
request._ingest["units"]["text"] = Units(input=input, output=output)
|
|
207
|
+
return self.vertex_process_synchronous_response(
|
|
208
|
+
response_dict=response.to_dict(),
|
|
209
|
+
log_prompt_and_response=log_prompt_and_response,
|
|
210
|
+
)
|
|
211
|
+
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import math
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from payi.lib.helpers import PayiCategories
|
|
6
|
+
from payi.types.ingest_units_params import Units
|
|
7
|
+
|
|
8
|
+
from .instrument import _ChunkResult, _StreamingType, _ProviderRequest, _PayiInstrumentor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class _VertexRequest(_ProviderRequest): # type: ignore
|
|
12
|
+
def __init__(self, instrumentor: _PayiInstrumentor):
|
|
13
|
+
super().__init__(
|
|
14
|
+
instrumentor=instrumentor,
|
|
15
|
+
category=PayiCategories.google_vertex,
|
|
16
|
+
streaming_type=_StreamingType.generator,
|
|
17
|
+
is_google_vertex_or_genai_client=True,
|
|
18
|
+
)
|
|
19
|
+
self._prompt_character_count = 0
|
|
20
|
+
self._streaming_candidates_character_count: Optional[int] = None
|
|
21
|
+
self._model_name: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
def _get_model_name(self, response: 'dict[str, Any]') -> Optional[str]:
|
|
24
|
+
model: Optional[str] = response.get("model_version", None)
|
|
25
|
+
if model:
|
|
26
|
+
return model
|
|
27
|
+
|
|
28
|
+
return self._model_name
|
|
29
|
+
|
|
30
|
+
def process_chunk_dict(self, response_dict: 'dict[str, Any]') -> _ChunkResult:
|
|
31
|
+
ingest = False
|
|
32
|
+
if "provider_response_id" not in self._ingest:
|
|
33
|
+
id = response_dict.get("response_id", None)
|
|
34
|
+
if id:
|
|
35
|
+
self._ingest["provider_response_id"] = id
|
|
36
|
+
|
|
37
|
+
if "resource" not in self._ingest:
|
|
38
|
+
model: Optional[str] = self._get_model_name(response_dict) # type: ignore[unreachable]
|
|
39
|
+
if model:
|
|
40
|
+
self._ingest["resource"] = "google." + model
|
|
41
|
+
|
|
42
|
+
for candidate in response_dict.get("candidates", []):
|
|
43
|
+
parts = candidate.get("content", {}).get("parts", [])
|
|
44
|
+
for part in parts:
|
|
45
|
+
|
|
46
|
+
count = self.count_chars_skip_spaces(part.get("text", ""))
|
|
47
|
+
if count > 0:
|
|
48
|
+
if self._streaming_candidates_character_count is None:
|
|
49
|
+
self._streaming_candidates_character_count = 0
|
|
50
|
+
self._streaming_candidates_character_count += count
|
|
51
|
+
|
|
52
|
+
self.process_response_part_for_function_call(part)
|
|
53
|
+
|
|
54
|
+
usage = response_dict.get("usage_metadata", {})
|
|
55
|
+
if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
|
|
56
|
+
self.compute_usage(
|
|
57
|
+
model=self._get_model_name(response_dict),
|
|
58
|
+
response_dict=response_dict,
|
|
59
|
+
prompt_character_count=self._prompt_character_count,
|
|
60
|
+
streaming_candidates_characters=self._streaming_candidates_character_count,
|
|
61
|
+
)
|
|
62
|
+
ingest = True
|
|
63
|
+
|
|
64
|
+
return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
|
|
65
|
+
|
|
66
|
+
def process_response_part_for_function_call(self, part: 'dict[str, Any]') -> None:
|
|
67
|
+
function = part.get("function_call", {})
|
|
68
|
+
if not function:
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
name = function.get("name", "")
|
|
72
|
+
args = function.get("args", {})
|
|
73
|
+
arguments: Optional[str] = None
|
|
74
|
+
if args and isinstance(args, dict):
|
|
75
|
+
arguments = json.dumps(args)
|
|
76
|
+
|
|
77
|
+
if name:
|
|
78
|
+
self.add_synchronous_function_call(name=name, arguments=arguments)
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def count_chars_skip_spaces(text: str) -> int:
|
|
82
|
+
return sum(1 for c in text if not c.isspace())
|
|
83
|
+
|
|
84
|
+
def vertex_process_synchronous_response(
|
|
85
|
+
self,
|
|
86
|
+
response_dict: 'dict[str, Any]',
|
|
87
|
+
log_prompt_and_response: bool) -> Any:
|
|
88
|
+
|
|
89
|
+
id: Optional[str] = response_dict.get("response_id", None)
|
|
90
|
+
if id:
|
|
91
|
+
self._ingest["provider_response_id"] = id
|
|
92
|
+
|
|
93
|
+
model: Optional[str] = self._get_model_name(response_dict)
|
|
94
|
+
if model:
|
|
95
|
+
self._ingest["resource"] = "google." + model
|
|
96
|
+
|
|
97
|
+
candidates = response_dict.get("candidates", [])
|
|
98
|
+
for candidate in candidates:
|
|
99
|
+
parts = candidate.get("content", {}).get("parts", [])
|
|
100
|
+
for part in parts:
|
|
101
|
+
self.process_response_part_for_function_call(part)
|
|
102
|
+
|
|
103
|
+
self.compute_usage(
|
|
104
|
+
model=model,
|
|
105
|
+
response_dict=response_dict,
|
|
106
|
+
prompt_character_count=self._prompt_character_count,
|
|
107
|
+
streaming_candidates_characters=self._streaming_candidates_character_count
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if log_prompt_and_response:
|
|
111
|
+
self._ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
112
|
+
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def compute_usage(
|
|
116
|
+
self,
|
|
117
|
+
model: Optional[str],
|
|
118
|
+
response_dict: 'dict[str, Any]',
|
|
119
|
+
prompt_character_count: int,
|
|
120
|
+
streaming_candidates_characters: Optional[int]) -> None:
|
|
121
|
+
|
|
122
|
+
def is_character_billing_model(model: str) -> bool:
|
|
123
|
+
return model.startswith("gemini-1.")
|
|
124
|
+
|
|
125
|
+
def is_large_context_token_model(model: str, input_tokens: int) -> bool:
|
|
126
|
+
return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
|
|
127
|
+
|
|
128
|
+
def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
129
|
+
if key not in request._ingest["units"]:
|
|
130
|
+
request._ingest["units"][key] = {}
|
|
131
|
+
if input is not None:
|
|
132
|
+
request._ingest["units"][key]["input"] = input
|
|
133
|
+
if output is not None:
|
|
134
|
+
request._ingest["units"][key]["output"] = output
|
|
135
|
+
|
|
136
|
+
usage = response_dict.get("usage_metadata", {})
|
|
137
|
+
input = usage.get("prompt_token_count", 0)
|
|
138
|
+
|
|
139
|
+
prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
|
|
140
|
+
candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
|
|
141
|
+
|
|
142
|
+
if not model:
|
|
143
|
+
model = ""
|
|
144
|
+
|
|
145
|
+
large_context = ""
|
|
146
|
+
|
|
147
|
+
if is_character_billing_model(model):
|
|
148
|
+
if input > 128000:
|
|
149
|
+
large_context = "_large_context"
|
|
150
|
+
|
|
151
|
+
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
152
|
+
for details in prompt_tokens_details:
|
|
153
|
+
modality = details.get("modality", "")
|
|
154
|
+
if not modality:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
modality_token_count = details.get("token_count", 0)
|
|
158
|
+
if modality == "TEXT":
|
|
159
|
+
input = prompt_character_count
|
|
160
|
+
if input == 0:
|
|
161
|
+
# back up calc if nothing was calculated from the prompt
|
|
162
|
+
input = response_dict["usage_metadata"]["prompt_token_count"] * 4
|
|
163
|
+
|
|
164
|
+
output = 0
|
|
165
|
+
if streaming_candidates_characters is None:
|
|
166
|
+
for candidate in response_dict.get("candidates", []):
|
|
167
|
+
parts = candidate.get("content", {}).get("parts", [])
|
|
168
|
+
for part in parts:
|
|
169
|
+
output += self.count_chars_skip_spaces(part.get("text", ""))
|
|
170
|
+
|
|
171
|
+
if output == 0:
|
|
172
|
+
# back up calc if no parts
|
|
173
|
+
output = response_dict["usage_metadata"]["candidates_token_count"] * 4
|
|
174
|
+
else:
|
|
175
|
+
output = streaming_candidates_characters
|
|
176
|
+
|
|
177
|
+
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
178
|
+
|
|
179
|
+
elif modality == "IMAGE":
|
|
180
|
+
num_images = math.ceil(modality_token_count / 258)
|
|
181
|
+
add_units(self, "vision"+large_context, input=num_images)
|
|
182
|
+
|
|
183
|
+
elif modality == "VIDEO":
|
|
184
|
+
video_seconds = math.ceil(modality_token_count / 285)
|
|
185
|
+
add_units(self, "video"+large_context, input=video_seconds)
|
|
186
|
+
|
|
187
|
+
elif modality == "AUDIO":
|
|
188
|
+
audio_seconds = math.ceil(modality_token_count / 25)
|
|
189
|
+
add_units(self, "audio"+large_context, input=audio_seconds)
|
|
190
|
+
|
|
191
|
+
# No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
|
|
192
|
+
# for details in candidates_tokens_details:
|
|
193
|
+
|
|
194
|
+
else:
|
|
195
|
+
# thinking tokens introduced in 2.5 after the transition to token based billing
|
|
196
|
+
thinking_token_count = usage.get("thoughts_token_count", 0)
|
|
197
|
+
|
|
198
|
+
if is_large_context_token_model(model, input):
|
|
199
|
+
large_context = "_large_context"
|
|
200
|
+
|
|
201
|
+
for details in prompt_tokens_details:
|
|
202
|
+
modality = details.get("modality", "")
|
|
203
|
+
if not modality:
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
modality_token_count = details.get("token_count", 0)
|
|
207
|
+
if modality == "IMAGE":
|
|
208
|
+
add_units(self, "vision"+large_context, input=modality_token_count)
|
|
209
|
+
elif modality in ("VIDEO", "AUDIO", "TEXT"):
|
|
210
|
+
add_units(self, modality.lower()+large_context, input=modality_token_count)
|
|
211
|
+
for details in candidates_tokens_details:
|
|
212
|
+
modality = details.get("modality", "")
|
|
213
|
+
if not modality:
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
modality_token_count = details.get("token_count", 0)
|
|
217
|
+
if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
|
|
218
|
+
add_units(self, modality.lower()+large_context, output=modality_token_count)
|
|
219
|
+
|
|
220
|
+
if thinking_token_count > 0:
|
|
221
|
+
add_units(self, "reasoning"+large_context, output=thinking_token_count)
|
|
222
|
+
|
|
223
|
+
if not self._ingest["units"]:
|
|
224
|
+
input = usage.get("prompt_token_count", 0)
|
|
225
|
+
output = usage.get("candidates_token_count", 0) * 4
|
|
226
|
+
|
|
227
|
+
if is_character_billing_model(model):
|
|
228
|
+
if prompt_character_count > 0:
|
|
229
|
+
input = prompt_character_count
|
|
230
|
+
else:
|
|
231
|
+
input *= 4
|
|
232
|
+
|
|
233
|
+
# if no units were added, add a default unit and assume 4 characters per token
|
|
234
|
+
self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
|
|
235
|
+
else:
|
|
236
|
+
# if no units were added, add a default unit
|
|
237
|
+
self._ingest["units"]["text"] = Units(input=input, output=output)
|