payi 0.1.0a85__py3-none-any.whl → 0.1.0a87__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of payi might be problematic. Click here for more details.

@@ -1,14 +1,10 @@
1
- import json
2
- import math
3
1
  from typing import Any, List, Union, Optional, Sequence
4
2
  from typing_extensions import override
5
3
 
6
4
  from wrapt import wrap_function_wrapper # type: ignore
7
5
 
8
- from payi.lib.helpers import PayiCategories
9
- from payi.types.ingest_units_params import Units
10
-
11
- from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
6
+ from .instrument import _ChunkResult, _IsStreaming, _PayiInstrumentor
7
+ from .VertexRequest import _VertexRequest
12
8
 
13
9
 
14
10
  class VertexInstrumentor:
@@ -85,16 +81,10 @@ async def agenerate_wrapper(
85
81
  kwargs,
86
82
  )
87
83
 
88
- def count_chars_skip_spaces(text: str) -> int:
89
- return sum(1 for c in text if not c.isspace())
90
-
91
- class _GoogleVertexRequest(_ProviderRequest):
84
+ class _GoogleVertexRequest(_VertexRequest):
92
85
  def __init__(self, instrumentor: _PayiInstrumentor):
93
86
  super().__init__(
94
87
  instrumentor=instrumentor,
95
- category=PayiCategories.google_vertex,
96
- streaming_type=_StreamingType.generator,
97
- is_google_vertex_or_genai_client=True,
98
88
  )
99
89
  self._prompt_character_count = 0
100
90
  self._candidates_character_count = 0
@@ -144,7 +134,7 @@ class _GoogleVertexRequest(_ProviderRequest):
144
134
  text = item
145
135
 
146
136
  if text != "":
147
- self._prompt_character_count += count_chars_skip_spaces(text) # type: ignore
137
+ self._prompt_character_count += self.count_chars_skip_spaces(text) # type: ignore
148
138
 
149
139
  return True
150
140
 
@@ -204,44 +194,9 @@ class _GoogleVertexRequest(_ProviderRequest):
204
194
  # tool_config does not have to_dict or any other serializable object
205
195
  prompt["tool_config"] = str(tool_config) # type: ignore
206
196
 
207
- def _get_model_name(self, response: 'dict[str, Any]') -> Optional[str]:
208
- model: Optional[str] = response.get("model_version", None)
209
- if model:
210
- return model
211
-
212
- return self._model_name
213
-
214
197
  @override
215
198
  def process_chunk(self, chunk: Any) -> _ChunkResult:
216
- ingest = False
217
- response_dict: dict[str, Any] = chunk.to_dict()
218
- if "provider_response_id" not in self._ingest:
219
- id = response_dict.get("response_id", None)
220
- if id:
221
- self._ingest["provider_response_id"] = id
222
-
223
- if "resource" not in self._ingest:
224
- model: Optional[str] = self._get_model_name(response_dict) # type: ignore[unreachable]
225
- if model:
226
- self._ingest["resource"] = "google." + model
227
-
228
- for candidate in response_dict.get("candidates", []):
229
- parts = candidate.get("content", {}).get("parts", [])
230
- for part in parts:
231
- self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
232
-
233
- usage = response_dict.get("usage_metadata", {})
234
- if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
235
- vertex_compute_usage(
236
- request=self,
237
- model=self._get_model_name(response_dict),
238
- response_dict=response_dict,
239
- prompt_character_count=self._prompt_character_count,
240
- streaming_candidates_characters=self._candidates_character_count,
241
- )
242
- ingest = True
243
-
244
- return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
199
+ return self.process_chunk_dict(response_dict=chunk.to_dict())
245
200
 
246
201
  @override
247
202
  def process_synchronous_response(
@@ -249,149 +204,8 @@ class _GoogleVertexRequest(_ProviderRequest):
249
204
  response: Any,
250
205
  log_prompt_and_response: bool,
251
206
  kwargs: Any) -> Any:
252
- response_dict = response.to_dict()
253
-
254
- id: Optional[str] = response_dict.get("response_id", None)
255
- if id:
256
- self._ingest["provider_response_id"] = id
257
-
258
- model: Optional[str] = self._get_model_name(response_dict)
259
- if model:
260
- self._ingest["resource"] = "google." + model
261
-
262
- vertex_compute_usage(
263
- request=self,
264
- model=model,
265
- response_dict=response_dict,
266
- prompt_character_count=self._prompt_character_count,
267
- streaming_candidates_characters=self._candidates_character_count
268
- )
269
-
270
- if log_prompt_and_response:
271
- self._ingest["provider_response_json"] = [json.dumps(response_dict)]
272
-
273
- return None
274
-
275
- def vertex_compute_usage(
276
- request: _ProviderRequest,
277
- model: Optional[str],
278
- response_dict: 'dict[str, Any]',
279
- prompt_character_count: int = 0,
280
- streaming_candidates_characters: Optional[int] = None) -> None:
281
-
282
- def is_character_billing_model(model: str) -> bool:
283
- return model.startswith("gemini-1.")
284
-
285
- def is_large_context_token_model(model: str, input_tokens: int) -> bool:
286
- return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
287
-
288
- def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
289
- if key not in request._ingest["units"]:
290
- request._ingest["units"][key] = {}
291
- if input is not None:
292
- request._ingest["units"][key]["input"] = input
293
- if output is not None:
294
- request._ingest["units"][key]["output"] = output
295
-
296
- usage = response_dict.get("usage_metadata", {})
297
- input = usage.get("prompt_token_count", 0)
298
-
299
- prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
300
- candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
301
-
302
- if not model:
303
- model = ""
304
-
305
- large_context = ""
306
-
307
- if is_character_billing_model(model):
308
- if input > 128000:
309
- large_context = "_large_context"
310
-
311
- # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
312
- for details in prompt_tokens_details:
313
- modality = details.get("modality", "")
314
- if not modality:
315
- continue
316
-
317
- modality_token_count = details.get("token_count", 0)
318
- if modality == "TEXT":
319
- input = prompt_character_count
320
- if input == 0:
321
- # back up calc if nothing was calculated from the prompt
322
- input = response_dict["usage_metadata"]["prompt_token_count"] * 4
323
-
324
- output = 0
325
- if streaming_candidates_characters is None:
326
- for candidate in response_dict.get("candidates", []):
327
- parts = candidate.get("content", {}).get("parts", [])
328
- for part in parts:
329
- output += count_chars_skip_spaces(part.get("text", ""))
330
-
331
- if output == 0:
332
- # back up calc if no parts
333
- output = response_dict["usage_metadata"]["candidates_token_count"] * 4
334
- else:
335
- output = streaming_candidates_characters
336
-
337
- request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
338
-
339
- elif modality == "IMAGE":
340
- num_images = math.ceil(modality_token_count / 258)
341
- add_units(request, "vision"+large_context, input=num_images)
342
-
343
- elif modality == "VIDEO":
344
- video_seconds = math.ceil(modality_token_count / 285)
345
- add_units(request, "video"+large_context, input=video_seconds)
346
-
347
- elif modality == "AUDIO":
348
- audio_seconds = math.ceil(modality_token_count / 25)
349
- add_units(request, "audio"+large_context, input=audio_seconds)
350
-
351
- # No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
352
- # for details in candidates_tokens_details:
353
-
354
- else:
355
- # thinking tokens introduced in 2.5 after the transition to token based billing
356
- thinking_token_count = usage.get("thoughts_token_count", 0)
357
-
358
- if is_large_context_token_model(model, input):
359
- large_context = "_large_context"
360
-
361
- for details in prompt_tokens_details:
362
- modality = details.get("modality", "")
363
- if not modality:
364
- continue
365
-
366
- modality_token_count = details.get("token_count", 0)
367
- if modality == "IMAGE":
368
- add_units(request, "vision"+large_context, input=modality_token_count)
369
- elif modality in ("VIDEO", "AUDIO", "TEXT"):
370
- add_units(request, modality.lower()+large_context, input=modality_token_count)
371
- for details in candidates_tokens_details:
372
- modality = details.get("modality", "")
373
- if not modality:
374
- continue
375
-
376
- modality_token_count = details.get("token_count", 0)
377
- if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
378
- add_units(request, modality.lower()+large_context, output=modality_token_count)
379
-
380
- if thinking_token_count > 0:
381
- add_units(request, "reasoning"+large_context, output=thinking_token_count)
382
-
383
- if not request._ingest["units"]:
384
- input = usage.get("prompt_token_count", 0)
385
- output = usage.get("candidates_token_count", 0) * 4
386
-
387
- if is_character_billing_model(model):
388
- if prompt_character_count > 0:
389
- input = prompt_character_count
390
- else:
391
- input *= 4
392
-
393
- # if no units were added, add a default unit and assume 4 characters per token
394
- request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
395
- else:
396
- # if no units were added, add a default unit
397
- request._ingest["units"]["text"] = Units(input=input, output=output)
207
+ return self.vertex_process_synchronous_response(
208
+ response_dict=response.to_dict(),
209
+ log_prompt_and_response=log_prompt_and_response,
210
+ )
211
+
@@ -0,0 +1,237 @@
1
+ import json
2
+ import math
3
+ from typing import Any, Optional
4
+
5
+ from payi.lib.helpers import PayiCategories
6
+ from payi.types.ingest_units_params import Units
7
+
8
+ from .instrument import _ChunkResult, _StreamingType, _ProviderRequest, _PayiInstrumentor
9
+
10
+
11
+ class _VertexRequest(_ProviderRequest): # type: ignore
12
+ def __init__(self, instrumentor: _PayiInstrumentor):
13
+ super().__init__(
14
+ instrumentor=instrumentor,
15
+ category=PayiCategories.google_vertex,
16
+ streaming_type=_StreamingType.generator,
17
+ is_google_vertex_or_genai_client=True,
18
+ )
19
+ self._prompt_character_count = 0
20
+ self._streaming_candidates_character_count: Optional[int] = None
21
+ self._model_name: Optional[str] = None
22
+
23
+ def _get_model_name(self, response: 'dict[str, Any]') -> Optional[str]:
24
+ model: Optional[str] = response.get("model_version", None)
25
+ if model:
26
+ return model
27
+
28
+ return self._model_name
29
+
30
+ def process_chunk_dict(self, response_dict: 'dict[str, Any]') -> _ChunkResult:
31
+ ingest = False
32
+ if "provider_response_id" not in self._ingest:
33
+ id = response_dict.get("response_id", None)
34
+ if id:
35
+ self._ingest["provider_response_id"] = id
36
+
37
+ if "resource" not in self._ingest:
38
+ model: Optional[str] = self._get_model_name(response_dict) # type: ignore[unreachable]
39
+ if model:
40
+ self._ingest["resource"] = "google." + model
41
+
42
+ for candidate in response_dict.get("candidates", []):
43
+ parts = candidate.get("content", {}).get("parts", [])
44
+ for part in parts:
45
+
46
+ count = self.count_chars_skip_spaces(part.get("text", ""))
47
+ if count > 0:
48
+ if self._streaming_candidates_character_count is None:
49
+ self._streaming_candidates_character_count = 0
50
+ self._streaming_candidates_character_count += count
51
+
52
+ self.process_response_part_for_function_call(part)
53
+
54
+ usage = response_dict.get("usage_metadata", {})
55
+ if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
56
+ self.compute_usage(
57
+ model=self._get_model_name(response_dict),
58
+ response_dict=response_dict,
59
+ prompt_character_count=self._prompt_character_count,
60
+ streaming_candidates_characters=self._streaming_candidates_character_count,
61
+ )
62
+ ingest = True
63
+
64
+ return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
65
+
66
+ def process_response_part_for_function_call(self, part: 'dict[str, Any]') -> None:
67
+ function = part.get("function_call", {})
68
+ if not function:
69
+ return
70
+
71
+ name = function.get("name", "")
72
+ args = function.get("args", {})
73
+ arguments: Optional[str] = None
74
+ if args and isinstance(args, dict):
75
+ arguments = json.dumps(args)
76
+
77
+ if name:
78
+ self.add_synchronous_function_call(name=name, arguments=arguments)
79
+
80
+ @staticmethod
81
+ def count_chars_skip_spaces(text: str) -> int:
82
+ return sum(1 for c in text if not c.isspace())
83
+
84
+ def vertex_process_synchronous_response(
85
+ self,
86
+ response_dict: 'dict[str, Any]',
87
+ log_prompt_and_response: bool) -> Any:
88
+
89
+ id: Optional[str] = response_dict.get("response_id", None)
90
+ if id:
91
+ self._ingest["provider_response_id"] = id
92
+
93
+ model: Optional[str] = self._get_model_name(response_dict)
94
+ if model:
95
+ self._ingest["resource"] = "google." + model
96
+
97
+ candidates = response_dict.get("candidates", [])
98
+ for candidate in candidates:
99
+ parts = candidate.get("content", {}).get("parts", [])
100
+ for part in parts:
101
+ self.process_response_part_for_function_call(part)
102
+
103
+ self.compute_usage(
104
+ model=model,
105
+ response_dict=response_dict,
106
+ prompt_character_count=self._prompt_character_count,
107
+ streaming_candidates_characters=self._streaming_candidates_character_count
108
+ )
109
+
110
+ if log_prompt_and_response:
111
+ self._ingest["provider_response_json"] = [json.dumps(response_dict)]
112
+
113
+ return None
114
+
115
+ def compute_usage(
116
+ self,
117
+ model: Optional[str],
118
+ response_dict: 'dict[str, Any]',
119
+ prompt_character_count: int,
120
+ streaming_candidates_characters: Optional[int]) -> None:
121
+
122
+ def is_character_billing_model(model: str) -> bool:
123
+ return model.startswith("gemini-1.")
124
+
125
+ def is_large_context_token_model(model: str, input_tokens: int) -> bool:
126
+ return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
127
+
128
+ def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
129
+ if key not in request._ingest["units"]:
130
+ request._ingest["units"][key] = {}
131
+ if input is not None:
132
+ request._ingest["units"][key]["input"] = input
133
+ if output is not None:
134
+ request._ingest["units"][key]["output"] = output
135
+
136
+ usage = response_dict.get("usage_metadata", {})
137
+ input = usage.get("prompt_token_count", 0)
138
+
139
+ prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
140
+ candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
141
+
142
+ if not model:
143
+ model = ""
144
+
145
+ large_context = ""
146
+
147
+ if is_character_billing_model(model):
148
+ if input > 128000:
149
+ large_context = "_large_context"
150
+
151
+ # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
152
+ for details in prompt_tokens_details:
153
+ modality = details.get("modality", "")
154
+ if not modality:
155
+ continue
156
+
157
+ modality_token_count = details.get("token_count", 0)
158
+ if modality == "TEXT":
159
+ input = prompt_character_count
160
+ if input == 0:
161
+ # back up calc if nothing was calculated from the prompt
162
+ input = response_dict["usage_metadata"]["prompt_token_count"] * 4
163
+
164
+ output = 0
165
+ if streaming_candidates_characters is None:
166
+ for candidate in response_dict.get("candidates", []):
167
+ parts = candidate.get("content", {}).get("parts", [])
168
+ for part in parts:
169
+ output += self.count_chars_skip_spaces(part.get("text", ""))
170
+
171
+ if output == 0:
172
+ # back up calc if no parts
173
+ output = response_dict["usage_metadata"]["candidates_token_count"] * 4
174
+ else:
175
+ output = streaming_candidates_characters
176
+
177
+ self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
178
+
179
+ elif modality == "IMAGE":
180
+ num_images = math.ceil(modality_token_count / 258)
181
+ add_units(self, "vision"+large_context, input=num_images)
182
+
183
+ elif modality == "VIDEO":
184
+ video_seconds = math.ceil(modality_token_count / 285)
185
+ add_units(self, "video"+large_context, input=video_seconds)
186
+
187
+ elif modality == "AUDIO":
188
+ audio_seconds = math.ceil(modality_token_count / 25)
189
+ add_units(self, "audio"+large_context, input=audio_seconds)
190
+
191
+ # No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
192
+ # for details in candidates_tokens_details:
193
+
194
+ else:
195
+ # thinking tokens introduced in 2.5 after the transition to token based billing
196
+ thinking_token_count = usage.get("thoughts_token_count", 0)
197
+
198
+ if is_large_context_token_model(model, input):
199
+ large_context = "_large_context"
200
+
201
+ for details in prompt_tokens_details:
202
+ modality = details.get("modality", "")
203
+ if not modality:
204
+ continue
205
+
206
+ modality_token_count = details.get("token_count", 0)
207
+ if modality == "IMAGE":
208
+ add_units(self, "vision"+large_context, input=modality_token_count)
209
+ elif modality in ("VIDEO", "AUDIO", "TEXT"):
210
+ add_units(self, modality.lower()+large_context, input=modality_token_count)
211
+ for details in candidates_tokens_details:
212
+ modality = details.get("modality", "")
213
+ if not modality:
214
+ continue
215
+
216
+ modality_token_count = details.get("token_count", 0)
217
+ if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
218
+ add_units(self, modality.lower()+large_context, output=modality_token_count)
219
+
220
+ if thinking_token_count > 0:
221
+ add_units(self, "reasoning"+large_context, output=thinking_token_count)
222
+
223
+ if not self._ingest["units"]:
224
+ input = usage.get("prompt_token_count", 0)
225
+ output = usage.get("candidates_token_count", 0) * 4
226
+
227
+ if is_character_billing_model(model):
228
+ if prompt_character_count > 0:
229
+ input = prompt_character_count
230
+ else:
231
+ input *= 4
232
+
233
+ # if no units were added, add a default unit and assume 4 characters per token
234
+ self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
235
+ else:
236
+ # if no units were added, add a default unit
237
+ self._ingest["units"]["text"] = Units(input=input, output=output)