payi 0.1.0a82__py3-none-any.whl → 0.1.0a84__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of payi might be problematic. Click here for more details.

@@ -1,14 +1,12 @@
1
1
  import json
2
- import math
3
2
  from typing import Any, List, Union, Optional, Sequence
4
3
  from typing_extensions import override
5
4
 
6
5
  from wrapt import wrap_function_wrapper # type: ignore
7
6
 
8
7
  from payi.lib.helpers import PayiCategories
9
- from payi.types.ingest_units_params import Units
10
8
 
11
- from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
9
+ from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
12
10
 
13
11
 
14
12
  class GoogleGenAiInstrumentor:
@@ -115,9 +113,6 @@ async def agenerate_stream_wrapper(
115
113
  kwargs,
116
114
  )
117
115
 
118
- def count_chars_skip_spaces(text: str) -> int:
119
- return sum(1 for c in text if not c.isspace())
120
-
121
116
  class _GoogleGenAiRequest(_ProviderRequest):
122
117
  def __init__(self, instrumentor: _PayiInstrumentor):
123
118
  super().__init__(
@@ -126,7 +121,7 @@ class _GoogleGenAiRequest(_ProviderRequest):
126
121
  streaming_type=_StreamingType.generator,
127
122
  )
128
123
  self._prompt_character_count = 0
129
- self._candiates_character_count = 0
124
+ self._candidates_character_count = 0
130
125
 
131
126
  @override
132
127
  def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
@@ -158,6 +153,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
158
153
  if isinstance(value, list):
159
154
  items = value # type: ignore
160
155
 
156
+ from .VertexInstrumentor import count_chars_skip_spaces
157
+
161
158
  for item in items: # type: ignore
162
159
  text = ""
163
160
  if isinstance(item, Part):
@@ -248,7 +245,10 @@ class _GoogleGenAiRequest(_ProviderRequest):
248
245
  prompt["tool_config"] = tool_config
249
246
 
250
247
  @override
251
- def process_chunk(self, chunk: Any) -> bool:
248
+ def process_chunk(self, chunk: Any) -> _ChunkResult:
249
+ from .VertexInstrumentor import vertex_compute_usage, count_chars_skip_spaces
250
+
251
+ ingest = False
252
252
  response_dict: dict[str, Any] = chunk.to_json_dict()
253
253
  if "provider_response_id" not in self._ingest:
254
254
  id = response_dict.get("response_id", None)
@@ -259,20 +259,24 @@ class _GoogleGenAiRequest(_ProviderRequest):
259
259
 
260
260
  self._ingest["resource"] = "google." + model
261
261
 
262
+
262
263
  for candidate in response_dict.get("candidates", []):
263
264
  parts = candidate.get("content", {}).get("parts", [])
264
265
  for part in parts:
265
- self._candiates_character_count += count_chars_skip_spaces(part.get("text", ""))
266
+ self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
266
267
 
267
268
  usage = response_dict.get("usage_metadata", {})
268
269
  if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
269
- self._compute_usage(response_dict, streaming_candidates_characters=self._candiates_character_count)
270
-
271
- return True
272
-
273
- @staticmethod
274
- def _is_character_billing_model(model: str) -> bool:
275
- return model.startswith("gemini-1.")
270
+ vertex_compute_usage(
271
+ request=self,
272
+ model=model,
273
+ response_dict=response_dict,
274
+ prompt_character_count=self._prompt_character_count,
275
+ streaming_candidates_characters=self._candidates_character_count
276
+ )
277
+ ingest = True
278
+
279
+ return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
276
280
 
277
281
  @override
278
282
  def process_synchronous_response(
@@ -282,6 +286,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
282
286
  kwargs: Any) -> Any:
283
287
  response_dict = response.to_json_dict()
284
288
 
289
+ from .VertexInstrumentor import vertex_compute_usage
290
+
285
291
  id: Optional[str] = response_dict.get("response_id", None)
286
292
  if id:
287
293
  self._ingest["provider_response_id"] = id
@@ -290,105 +296,15 @@ class _GoogleGenAiRequest(_ProviderRequest):
290
296
  if model:
291
297
  self._ingest["resource"] = "google." + model
292
298
 
293
- self._compute_usage(response_dict)
294
-
299
+ vertex_compute_usage(
300
+ request=self,
301
+ model=model,
302
+ response_dict=response_dict,
303
+ prompt_character_count=self._prompt_character_count,
304
+ streaming_candidates_characters=self._candidates_character_count
305
+ )
306
+
295
307
  if log_prompt_and_response:
296
308
  self._ingest["provider_response_json"] = [json.dumps(response_dict)]
297
309
 
298
- return None
299
-
300
- def add_units(self, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
301
- if key not in self._ingest["units"]:
302
- self._ingest["units"][key] = {}
303
- if input is not None:
304
- self._ingest["units"][key]["input"] = input
305
- if output is not None:
306
- self._ingest["units"][key]["output"] = output
307
-
308
- def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
309
- usage = response_dict.get("usage_metadata", {})
310
- input = usage.get("prompt_token_count", 0)
311
-
312
- prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
313
- candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
314
-
315
- model: str = response_dict.get("model_version", "")
316
-
317
- # for character billing only
318
- large_context = "" if input < 128000 else "_large_context"
319
-
320
- if self._is_character_billing_model(model):
321
- for details in prompt_tokens_details:
322
- modality = details.get("modality", "")
323
- if not modality:
324
- continue
325
-
326
- modality_token_count = details.get("token_count", 0)
327
- if modality == "TEXT":
328
- input = self._prompt_character_count
329
- if input == 0:
330
- # back up calc if nothing was calculated from the prompt
331
- input = response_dict["usage_metadata"]["prompt_token_count"] * 4
332
-
333
- output = 0
334
- if streaming_candidates_characters is None:
335
- for candidate in response_dict.get("candidates", []):
336
- parts = candidate.get("content", {}).get("parts", [])
337
- for part in parts:
338
- output += count_chars_skip_spaces(part.get("text", ""))
339
-
340
- if output == 0:
341
- # back up calc if no parts
342
- output = response_dict["usage_metadata"]["candidates_token_count"] * 4
343
- else:
344
- output = streaming_candidates_characters
345
-
346
- self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
347
-
348
- elif modality == "IMAGE":
349
- num_images = math.ceil(modality_token_count / 258)
350
- self.add_units("vision"+large_context, input=num_images)
351
-
352
- elif modality == "VIDEO":
353
- video_seconds = math.ceil(modality_token_count / 285)
354
- self.add_units("video"+large_context, input=video_seconds)
355
-
356
- elif modality == "AUDIO":
357
- audio_seconds = math.ceil(modality_token_count / 25)
358
- self.add_units("audio"+large_context, input=audio_seconds)
359
-
360
- else:
361
- for details in prompt_tokens_details:
362
- modality = details.get("modality", "")
363
- if not modality:
364
- continue
365
-
366
- modality_token_count = details.get("token_count", 0)
367
- if modality == "IMAGE":
368
- self.add_units("vision", input=modality_token_count)
369
- elif modality in ("VIDEO", "AUDIO", "TEXT"):
370
- self.add_units(modality.lower(), input=modality_token_count)
371
- for details in candidates_tokens_details:
372
- modality = details.get("modality", "")
373
- if not modality:
374
- continue
375
-
376
- modality_token_count = details.get("token_count", 0)
377
- if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
378
- self.add_units(modality.lower(), output=modality_token_count)
379
-
380
- if not self._ingest["units"]:
381
- input = usage.get("prompt_token_count", 0)
382
- output = usage.get("candidates_token_count", 0) * 4
383
-
384
- if self._is_character_billing_model(model):
385
- if self._prompt_character_count > 0:
386
- input = self._prompt_character_count
387
- else:
388
- input *= 4
389
-
390
- # if no units were added, add a default unit and assume 4 characters per token
391
- self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
392
- else:
393
- # if no units were added, add a default unit
394
- self._ingest["units"]["text"] = Units(input=input, output=output)
310
+ return None
@@ -9,7 +9,7 @@ from wrapt import wrap_function_wrapper # type: ignore
9
9
  from payi.lib.helpers import PayiCategories, PayiHeaderNames
10
10
  from payi.types.ingest_units_params import Units
11
11
 
12
- from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
12
+ from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
13
13
 
14
14
 
15
15
  class OpenAiInstrumentor:
@@ -22,8 +22,6 @@ class OpenAiInstrumentor:
22
22
  @staticmethod
23
23
  def instrument(instrumentor: _PayiInstrumentor) -> None:
24
24
  try:
25
- from openai import OpenAI # type: ignore # noqa: F401 I001
26
-
27
25
  wrap_function_wrapper(
28
26
  "openai.resources.chat.completions",
29
27
  "Completions.create",
@@ -47,7 +45,11 @@ class OpenAiInstrumentor:
47
45
  "AsyncEmbeddings.create",
48
46
  aembeddings_wrapper(instrumentor),
49
47
  )
48
+ except Exception as e:
49
+ instrumentor._logger.debug(f"Error instrumenting openai: {e}")
50
50
 
51
+ # responses separately as they are relatively new and the client may not be using the latest openai module
52
+ try:
51
53
  wrap_function_wrapper(
52
54
  "openai.resources.responses",
53
55
  "Responses.create",
@@ -62,8 +64,6 @@ class OpenAiInstrumentor:
62
64
 
63
65
  except Exception as e:
64
66
  instrumentor._logger.debug(f"Error instrumenting openai: {e}")
65
- return
66
-
67
67
 
68
68
  @_PayiInstrumentor.payi_wrapper
69
69
  def embeddings_wrapper(
@@ -338,7 +338,8 @@ class _OpenAiChatProviderRequest(_OpenAiProviderRequest):
338
338
  self._include_usage_added = False
339
339
 
340
340
  @override
341
- def process_chunk(self, chunk: Any) -> bool:
341
+ def process_chunk(self, chunk: Any) -> _ChunkResult:
342
+ ingest = False
342
343
  model = model_to_dict(chunk)
343
344
 
344
345
  if "provider_response_id" not in self._ingest:
@@ -356,8 +357,9 @@ class _OpenAiChatProviderRequest(_OpenAiProviderRequest):
356
357
  # packet which contains the usage to the client as they are not expecting the data
357
358
  if self._include_usage_added:
358
359
  send_chunk_to_client = False
360
+ ingest = True
359
361
 
360
- return send_chunk_to_client
362
+ return _ChunkResult(send_chunk_to_caller=send_chunk_to_client, ingest=ingest)
361
363
 
362
364
  @override
363
365
  def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
@@ -420,7 +422,8 @@ class _OpenAiResponsesProviderRequest(_OpenAiProviderRequest):
420
422
  input_tokens_details_key=_OpenAiProviderRequest.responses_input_tokens_details_key)
421
423
 
422
424
  @override
423
- def process_chunk(self, chunk: Any) -> bool:
425
+ def process_chunk(self, chunk: Any) -> _ChunkResult:
426
+ ingest = False
424
427
  model = model_to_dict(chunk)
425
428
  response: dict[str, Any] = model.get("response", {})
426
429
 
@@ -432,8 +435,9 @@ class _OpenAiResponsesProviderRequest(_OpenAiProviderRequest):
432
435
  usage = response.get("usage")
433
436
  if usage:
434
437
  self.add_usage_units(usage)
438
+ ingest = True
435
439
 
436
- return True
440
+ return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
437
441
 
438
442
  @override
439
443
  def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
@@ -8,33 +8,37 @@ from wrapt import wrap_function_wrapper # type: ignore
8
8
  from payi.lib.helpers import PayiCategories
9
9
  from payi.types.ingest_units_params import Units
10
10
 
11
- from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
11
+ from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
12
12
 
13
13
 
14
14
  class VertexInstrumentor:
15
15
  @staticmethod
16
16
  def instrument(instrumentor: _PayiInstrumentor) -> None:
17
17
  try:
18
- import vertexai # type: ignore # noqa: F401 I001
19
-
20
18
  wrap_function_wrapper(
21
19
  "vertexai.generative_models",
22
20
  "GenerativeModel.generate_content",
23
21
  generate_wrapper(instrumentor),
24
22
  )
25
23
 
26
- wrap_function_wrapper(
27
- "vertexai.preview.generative_models",
28
- "GenerativeModel.generate_content",
29
- generate_wrapper(instrumentor),
30
- )
31
-
32
24
  wrap_function_wrapper(
33
25
  "vertexai.generative_models",
34
26
  "GenerativeModel.generate_content_async",
35
27
  agenerate_wrapper(instrumentor),
36
28
  )
37
29
 
30
+ except Exception as e:
31
+ instrumentor._logger.debug(f"Error instrumenting vertex: {e}")
32
+ return
33
+
34
+ # separate instrumetning preview functionality from released in case it fails
35
+ try:
36
+ wrap_function_wrapper(
37
+ "vertexai.preview.generative_models",
38
+ "GenerativeModel.generate_content",
39
+ generate_wrapper(instrumentor),
40
+ )
41
+
38
42
  wrap_function_wrapper(
39
43
  "vertexai.preview.generative_models",
40
44
  "GenerativeModel.generate_content_async",
@@ -92,12 +96,20 @@ class _GoogleVertexRequest(_ProviderRequest):
92
96
  streaming_type=_StreamingType.generator,
93
97
  )
94
98
  self._prompt_character_count = 0
95
- self._candiates_character_count = 0
99
+ self._candidates_character_count = 0
100
+ self._model_name: Optional[str] = None
96
101
 
97
102
  @override
98
103
  def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
99
104
  from vertexai.generative_models import Content, Image, Part # type: ignore # noqa: F401 I001
100
105
 
106
+ # Try to extra the model name as a backup if the response does not provide it (older vertexai versions do not)
107
+ if instance and hasattr(instance, "_model_name"):
108
+ model = instance._model_name
109
+ if model and isinstance(model, str):
110
+ # Extract the model name after the last slash
111
+ self._model_name = model.split('/')[-1]
112
+
101
113
  if not args:
102
114
  return True
103
115
 
@@ -191,32 +203,44 @@ class _GoogleVertexRequest(_ProviderRequest):
191
203
  # tool_config does not have to_dict or any other serializable object
192
204
  prompt["tool_config"] = str(tool_config) # type: ignore
193
205
 
206
+ def _get_model_name(self, response: 'dict[str, Any]') -> Optional[str]:
207
+ model: Optional[str] = response.get("model_version", None)
208
+ if model:
209
+ return model
210
+
211
+ return self._model_name
212
+
194
213
  @override
195
- def process_chunk(self, chunk: Any) -> bool:
214
+ def process_chunk(self, chunk: Any) -> _ChunkResult:
215
+ ingest = False
196
216
  response_dict: dict[str, Any] = chunk.to_dict()
197
217
  if "provider_response_id" not in self._ingest:
198
218
  id = response_dict.get("response_id", None)
199
219
  if id:
200
220
  self._ingest["provider_response_id"] = id
201
221
 
202
- model: str = response_dict.get("model_version", "")
203
-
204
- self._ingest["resource"] = "google." + model
222
+ if "resource" not in self._ingest:
223
+ model: Optional[str] = self._get_model_name(response_dict) # type: ignore[unreachable]
224
+ if model:
225
+ self._ingest["resource"] = "google." + model
205
226
 
206
227
  for candidate in response_dict.get("candidates", []):
207
228
  parts = candidate.get("content", {}).get("parts", [])
208
229
  for part in parts:
209
- self._candiates_character_count += count_chars_skip_spaces(part.get("text", ""))
230
+ self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
210
231
 
211
232
  usage = response_dict.get("usage_metadata", {})
212
233
  if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
213
- self._compute_usage(response_dict, streaming_candidates_characters=self._candiates_character_count)
234
+ vertex_compute_usage(
235
+ request=self,
236
+ model=self._get_model_name(response_dict),
237
+ response_dict=response_dict,
238
+ prompt_character_count=self._prompt_character_count,
239
+ streaming_candidates_characters=self._candidates_character_count,
240
+ )
241
+ ingest = True
214
242
 
215
- return True
216
-
217
- @staticmethod
218
- def _is_character_billing_model(model: str) -> bool:
219
- return model.startswith("gemini-1.")
243
+ return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
220
244
 
221
245
  @override
222
246
  def process_synchronous_response(
@@ -230,110 +254,143 @@ class _GoogleVertexRequest(_ProviderRequest):
230
254
  if id:
231
255
  self._ingest["provider_response_id"] = id
232
256
 
233
- model: Optional[str] = response_dict.get("model_version", None)
257
+ model: Optional[str] = self._get_model_name(response_dict)
234
258
  if model:
235
259
  self._ingest["resource"] = "google." + model
236
260
 
237
- self._compute_usage(response_dict)
261
+ vertex_compute_usage(
262
+ request=self,
263
+ model=model,
264
+ response_dict=response_dict,
265
+ prompt_character_count=self._prompt_character_count,
266
+ streaming_candidates_characters=self._candidates_character_count
267
+ )
238
268
 
239
269
  if log_prompt_and_response:
240
270
  self._ingest["provider_response_json"] = [json.dumps(response_dict)]
241
271
 
242
272
  return None
243
273
 
244
- def add_units(self, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
245
- if key not in self._ingest["units"]:
246
- self._ingest["units"][key] = {}
274
+ def vertex_compute_usage(
275
+ request: _ProviderRequest,
276
+ model: Optional[str],
277
+ response_dict: 'dict[str, Any]',
278
+ prompt_character_count: int = 0,
279
+ streaming_candidates_characters: Optional[int] = None) -> None:
280
+
281
+ def is_character_billing_model(model: str) -> bool:
282
+ return model.startswith("gemini-1.")
283
+
284
+ def is_large_context_token_model(model: str, input_tokens: int) -> bool:
285
+ return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
286
+
287
+ def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
288
+ if key not in request._ingest["units"]:
289
+ request._ingest["units"][key] = {}
247
290
  if input is not None:
248
- self._ingest["units"][key]["input"] = input
291
+ request._ingest["units"][key]["input"] = input
249
292
  if output is not None:
250
- self._ingest["units"][key]["output"] = output
251
-
252
- def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
253
- usage = response_dict.get("usage_metadata", {})
254
- input = usage.get("prompt_token_count", 0)
293
+ request._ingest["units"][key]["output"] = output
255
294
 
256
- prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
257
- candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
295
+ usage = response_dict.get("usage_metadata", {})
296
+ input = usage.get("prompt_token_count", 0)
258
297
 
259
- model: str = response_dict.get("model_version", "")
260
-
261
- # for character billing only
262
- large_context = "" if input < 128000 else "_large_context"
263
-
264
- if self._is_character_billing_model(model):
265
- # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
266
- for details in prompt_tokens_details:
267
- modality = details.get("modality", "")
268
- if not modality:
269
- continue
270
-
271
- modality_token_count = details.get("token_count", 0)
272
- if modality == "TEXT":
273
- input = self._prompt_character_count
274
- if input == 0:
275
- # back up calc if nothing was calculated from the prompt
276
- input = response_dict["usage_metadata"]["prompt_token_count"] * 4
277
-
278
- output = 0
279
- if streaming_candidates_characters is None:
280
- for candidate in response_dict.get("candidates", []):
281
- parts = candidate.get("content", {}).get("parts", [])
282
- for part in parts:
283
- output += count_chars_skip_spaces(part.get("text", ""))
284
-
285
- if output == 0:
286
- # back up calc if no parts
287
- output = response_dict["usage_metadata"]["candidates_token_count"] * 4
288
- else:
289
- output = streaming_candidates_characters
290
-
291
- self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
292
-
293
- elif modality == "IMAGE":
294
- num_images = math.ceil(modality_token_count / 258)
295
- self.add_units("vision"+large_context, input=num_images)
296
-
297
- elif modality == "VIDEO":
298
- video_seconds = math.ceil(modality_token_count / 285)
299
- self.add_units("video"+large_context, input=video_seconds)
300
-
301
- elif modality == "AUDIO":
302
- audio_seconds = math.ceil(modality_token_count / 25)
303
- self.add_units("audio"+large_context, input=audio_seconds)
298
+ prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
299
+ candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
304
300
 
305
- else:
306
- for details in prompt_tokens_details:
307
- modality = details.get("modality", "")
308
- if not modality:
309
- continue
310
-
311
- modality_token_count = details.get("token_count", 0)
312
- if modality == "IMAGE":
313
- self.add_units("vision", input=modality_token_count)
314
- elif modality in ("VIDEO", "AUDIO", "TEXT"):
315
- self.add_units(modality.lower(), input=modality_token_count)
316
- for details in candidates_tokens_details:
317
- modality = details.get("modality", "")
318
- if not modality:
319
- continue
320
-
321
- modality_token_count = details.get("token_count", 0)
322
- if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
323
- self.add_units(modality.lower(), output=modality_token_count)
324
-
325
- if not self._ingest["units"]:
326
- input = usage.get("prompt_token_count", 0)
327
- output = usage.get("candidates_token_count", 0) * 4
328
-
329
- if self._is_character_billing_model(model):
330
- if self._prompt_character_count > 0:
331
- input = self._prompt_character_count
301
+ if not model:
302
+ model = ""
303
+
304
+ large_context = ""
305
+
306
+ if is_character_billing_model(model):
307
+ if input > 128000:
308
+ large_context = "_large_context"
309
+
310
+ # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
311
+ for details in prompt_tokens_details:
312
+ modality = details.get("modality", "")
313
+ if not modality:
314
+ continue
315
+
316
+ modality_token_count = details.get("token_count", 0)
317
+ if modality == "TEXT":
318
+ input = prompt_character_count
319
+ if input == 0:
320
+ # back up calc if nothing was calculated from the prompt
321
+ input = response_dict["usage_metadata"]["prompt_token_count"] * 4
322
+
323
+ output = 0
324
+ if streaming_candidates_characters is None:
325
+ for candidate in response_dict.get("candidates", []):
326
+ parts = candidate.get("content", {}).get("parts", [])
327
+ for part in parts:
328
+ output += count_chars_skip_spaces(part.get("text", ""))
329
+
330
+ if output == 0:
331
+ # back up calc if no parts
332
+ output = response_dict["usage_metadata"]["candidates_token_count"] * 4
332
333
  else:
333
- input *= 4
334
+ output = streaming_candidates_characters
335
+
336
+ request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
337
+
338
+ elif modality == "IMAGE":
339
+ num_images = math.ceil(modality_token_count / 258)
340
+ add_units(request, "vision"+large_context, input=num_images)
334
341
 
335
- # if no units were added, add a default unit and assume 4 characters per token
336
- self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
342
+ elif modality == "VIDEO":
343
+ video_seconds = math.ceil(modality_token_count / 285)
344
+ add_units(request, "video"+large_context, input=video_seconds)
345
+
346
+ elif modality == "AUDIO":
347
+ audio_seconds = math.ceil(modality_token_count / 25)
348
+ add_units(request, "audio"+large_context, input=audio_seconds)
349
+
350
+ # No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
351
+ # for details in candidates_tokens_details:
352
+
353
+ else:
354
+ # thinking tokens introduced in 2.5 after the transition to token based billing
355
+ thinking_token_count = usage.get("thoughts_token_count", 0)
356
+
357
+ if is_large_context_token_model(model, input):
358
+ large_context = "_large_context"
359
+
360
+ for details in prompt_tokens_details:
361
+ modality = details.get("modality", "")
362
+ if not modality:
363
+ continue
364
+
365
+ modality_token_count = details.get("token_count", 0)
366
+ if modality == "IMAGE":
367
+ add_units(request, "vision"+large_context, input=modality_token_count)
368
+ elif modality in ("VIDEO", "AUDIO", "TEXT"):
369
+ add_units(request, modality.lower()+large_context, input=modality_token_count)
370
+ for details in candidates_tokens_details:
371
+ modality = details.get("modality", "")
372
+ if not modality:
373
+ continue
374
+
375
+ modality_token_count = details.get("token_count", 0)
376
+ if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
377
+ add_units(request, modality.lower()+large_context, output=modality_token_count)
378
+
379
+ if thinking_token_count > 0:
380
+ add_units(request, "reasoning"+large_context, output=thinking_token_count)
381
+
382
+ if not request._ingest["units"]:
383
+ input = usage.get("prompt_token_count", 0)
384
+ output = usage.get("candidates_token_count", 0) * 4
385
+
386
+ if is_character_billing_model(model):
387
+ if prompt_character_count > 0:
388
+ input = prompt_character_count
337
389
  else:
338
- # if no units were added, add a default unit
339
- self._ingest["units"]["text"] = Units(input=input, output=output)
390
+ input *= 4
391
+
392
+ # if no units were added, add a default unit and assume 4 characters per token
393
+ request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
394
+ else:
395
+ # if no units were added, add a default unit
396
+ request._ingest["units"]["text"] = Units(input=input, output=output)