payi 0.1.0a107__py3-none-any.whl → 0.1.0a137__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- payi/__init__.py +3 -1
- payi/_base_client.py +12 -12
- payi/_client.py +8 -8
- payi/_compat.py +48 -48
- payi/_models.py +87 -59
- payi/_qs.py +7 -7
- payi/_streaming.py +4 -6
- payi/_types.py +53 -12
- payi/_utils/__init__.py +9 -2
- payi/_utils/_compat.py +45 -0
- payi/_utils/_datetime_parse.py +136 -0
- payi/_utils/_sync.py +3 -31
- payi/_utils/_transform.py +13 -3
- payi/_utils/_typing.py +6 -1
- payi/_utils/_utils.py +5 -6
- payi/_version.py +1 -1
- payi/lib/AnthropicInstrumentor.py +83 -57
- payi/lib/BedrockInstrumentor.py +292 -57
- payi/lib/GoogleGenAiInstrumentor.py +18 -31
- payi/lib/OpenAIInstrumentor.py +56 -72
- payi/lib/ProviderRequest.py +216 -0
- payi/lib/StreamWrappers.py +379 -0
- payi/lib/VertexInstrumentor.py +18 -37
- payi/lib/VertexRequest.py +16 -2
- payi/lib/data/cohere_embed_english_v3.json +30706 -0
- payi/lib/helpers.py +62 -5
- payi/lib/instrument.py +433 -659
- payi/resources/categories/__init__.py +0 -14
- payi/resources/categories/categories.py +25 -53
- payi/resources/categories/resources.py +27 -23
- payi/resources/ingest.py +126 -132
- payi/resources/limits/__init__.py +14 -14
- payi/resources/limits/limits.py +58 -58
- payi/resources/limits/properties.py +171 -0
- payi/resources/requests/request_id/properties.py +8 -8
- payi/resources/requests/request_id/result.py +3 -3
- payi/resources/requests/response_id/properties.py +8 -8
- payi/resources/requests/response_id/result.py +3 -3
- payi/resources/use_cases/definitions/definitions.py +27 -27
- payi/resources/use_cases/definitions/kpis.py +23 -23
- payi/resources/use_cases/definitions/limit_config.py +14 -14
- payi/resources/use_cases/definitions/version.py +3 -3
- payi/resources/use_cases/kpis.py +15 -15
- payi/resources/use_cases/properties.py +6 -6
- payi/resources/use_cases/use_cases.py +7 -7
- payi/types/__init__.py +2 -0
- payi/types/bulk_ingest_response.py +3 -20
- payi/types/categories/__init__.py +0 -1
- payi/types/categories/resource_list_params.py +5 -1
- payi/types/category_list_resources_params.py +5 -1
- payi/types/category_resource_response.py +31 -1
- payi/types/ingest_event_param.py +7 -6
- payi/types/ingest_units_params.py +5 -4
- payi/types/limit_create_params.py +3 -3
- payi/types/limit_list_response.py +1 -3
- payi/types/limit_response.py +1 -3
- payi/types/limits/__init__.py +2 -9
- payi/types/limits/{tag_remove_params.py → property_update_params.py} +4 -5
- payi/types/limits/{tag_delete_response.py → property_update_response.py} +3 -3
- payi/types/requests/request_id/property_update_params.py +2 -2
- payi/types/requests/response_id/property_update_params.py +2 -2
- payi/types/shared/__init__.py +2 -0
- payi/types/shared/api_error.py +18 -0
- payi/types/shared/pay_i_common_models_budget_management_create_limit_base.py +3 -3
- payi/types/shared/properties_request.py +11 -0
- payi/types/shared/xproxy_result.py +2 -0
- payi/types/shared_params/pay_i_common_models_budget_management_create_limit_base.py +3 -3
- payi/types/use_cases/definitions/limit_config_create_params.py +3 -3
- payi/types/use_cases/property_update_params.py +2 -2
- {payi-0.1.0a107.dist-info → payi-0.1.0a137.dist-info}/METADATA +6 -6
- {payi-0.1.0a107.dist-info → payi-0.1.0a137.dist-info}/RECORD +73 -75
- payi/resources/categories/fixed_cost_resources.py +0 -196
- payi/resources/limits/tags.py +0 -507
- payi/types/categories/fixed_cost_resource_create_params.py +0 -21
- payi/types/limits/limit_tags.py +0 -16
- payi/types/limits/tag_create_params.py +0 -13
- payi/types/limits/tag_create_response.py +0 -10
- payi/types/limits/tag_list_response.py +0 -10
- payi/types/limits/tag_remove_response.py +0 -10
- payi/types/limits/tag_update_params.py +0 -13
- payi/types/limits/tag_update_response.py +0 -10
- {payi-0.1.0a107.dist-info → payi-0.1.0a137.dist-info}/WHEEL +0 -0
- {payi-0.1.0a107.dist-info → payi-0.1.0a137.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Union, Optional
|
|
4
|
+
|
|
5
|
+
from wrapt import ObjectProxy # type: ignore
|
|
6
|
+
|
|
7
|
+
from payi.lib.helpers import _compact_json
|
|
8
|
+
from payi.lib.Stopwatch import Stopwatch
|
|
9
|
+
from payi.types.shared.xproxy_error import XproxyError
|
|
10
|
+
from payi.types.shared.xproxy_result import XproxyResult
|
|
11
|
+
|
|
12
|
+
from .ProviderRequest import _ChunkResult, _ProviderRequest
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .instrument import _PayiInstrumentor
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"_StreamIteratorWrapper",
|
|
19
|
+
"_StreamManagerWrapper",
|
|
20
|
+
"_GeneratorWrapper",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
class _StreamIteratorWrapper(ObjectProxy): # type: ignore
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
response: Any,
|
|
27
|
+
instance: Any,
|
|
28
|
+
instrumentor: '_PayiInstrumentor',
|
|
29
|
+
stopwatch: Stopwatch,
|
|
30
|
+
request: _ProviderRequest,
|
|
31
|
+
) -> None:
|
|
32
|
+
|
|
33
|
+
instrumentor._logger.debug(f"StreamIteratorWrapper: instance {instance}, category {request._category}")
|
|
34
|
+
|
|
35
|
+
request.process_initial_stream_response(response)
|
|
36
|
+
|
|
37
|
+
bedrock_from_stream: bool = False
|
|
38
|
+
if request.is_aws_client:
|
|
39
|
+
stream = response.get("stream", None)
|
|
40
|
+
|
|
41
|
+
if stream:
|
|
42
|
+
response = stream
|
|
43
|
+
bedrock_from_stream = True
|
|
44
|
+
else:
|
|
45
|
+
response = response.get("body")
|
|
46
|
+
bedrock_from_stream = False
|
|
47
|
+
|
|
48
|
+
super().__init__(response) # type: ignore
|
|
49
|
+
|
|
50
|
+
self._response = response
|
|
51
|
+
self._instance = instance
|
|
52
|
+
|
|
53
|
+
self._instrumentor = instrumentor
|
|
54
|
+
self._stopwatch: Stopwatch = stopwatch
|
|
55
|
+
self._responses: list[str] = []
|
|
56
|
+
|
|
57
|
+
self._request: _ProviderRequest = request
|
|
58
|
+
|
|
59
|
+
self._first_token: bool = True
|
|
60
|
+
self._bedrock_from_stream: bool = bedrock_from_stream
|
|
61
|
+
self._ingested: bool = False
|
|
62
|
+
self._iter_started: bool = False
|
|
63
|
+
|
|
64
|
+
def __enter__(self) -> Any:
|
|
65
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __enter__")
|
|
66
|
+
return self
|
|
67
|
+
|
|
68
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
69
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __exit__")
|
|
70
|
+
self.__wrapped__.__exit__(exc_type, exc_val, exc_tb) # type: ignore
|
|
71
|
+
|
|
72
|
+
async def __aenter__(self) -> Any:
|
|
73
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __aenter__")
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
77
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __aexit__")
|
|
78
|
+
await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb) # type: ignore
|
|
79
|
+
|
|
80
|
+
def __iter__(self) -> Any:
|
|
81
|
+
self._iter_started = True
|
|
82
|
+
if self._request.is_aws_client:
|
|
83
|
+
# MUST reside in a separate function so that the yield statement (e.g. the generator) doesn't implicitly return its own iterator and overriding self
|
|
84
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: bedrock __iter__")
|
|
85
|
+
return self._iter_bedrock()
|
|
86
|
+
|
|
87
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __iter__")
|
|
88
|
+
return self
|
|
89
|
+
|
|
90
|
+
def _iter_bedrock(self) -> Any:
|
|
91
|
+
# botocore EventStream doesn't have a __next__ method so iterate over the wrapped object in place
|
|
92
|
+
for event in self.__wrapped__: # type: ignore
|
|
93
|
+
result: Optional[_ChunkResult] = None
|
|
94
|
+
|
|
95
|
+
if (self._bedrock_from_stream):
|
|
96
|
+
result = self._evaluate_chunk(event)
|
|
97
|
+
else:
|
|
98
|
+
chunk = event.get('chunk') # type: ignore
|
|
99
|
+
if chunk:
|
|
100
|
+
decode = chunk.get('bytes').decode() # type: ignore
|
|
101
|
+
result = self._evaluate_chunk(decode)
|
|
102
|
+
|
|
103
|
+
if result and result.ingest:
|
|
104
|
+
from .BedrockInstrumentor import BedrockInstrumentor
|
|
105
|
+
|
|
106
|
+
xproxy_result = self._stop_iteration()
|
|
107
|
+
|
|
108
|
+
# the xproxy_result is not json serializable by default so adding the object is opt in by the client
|
|
109
|
+
if BedrockInstrumentor._add_streaming_xproxy_result:
|
|
110
|
+
self._request.assign_xproxy_result(event, xproxy_result)
|
|
111
|
+
yield event
|
|
112
|
+
|
|
113
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: bedrock iter finished")
|
|
114
|
+
|
|
115
|
+
self._stop_iteration()
|
|
116
|
+
|
|
117
|
+
def __aiter__(self) -> Any:
|
|
118
|
+
self._iter_started = True
|
|
119
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __aiter__")
|
|
120
|
+
return self
|
|
121
|
+
|
|
122
|
+
def __next__(self) -> object:
|
|
123
|
+
try:
|
|
124
|
+
chunk: object = self.__wrapped__.__next__() # type: ignore
|
|
125
|
+
|
|
126
|
+
if self._ingested:
|
|
127
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __next__ already ingested, not processing chunk {chunk}")
|
|
128
|
+
return chunk # type: ignore
|
|
129
|
+
|
|
130
|
+
result = self._evaluate_chunk(chunk)
|
|
131
|
+
|
|
132
|
+
if result.ingest:
|
|
133
|
+
xproxy_result = self._stop_iteration()
|
|
134
|
+
self._request.assign_xproxy_result(chunk, xproxy_result)
|
|
135
|
+
|
|
136
|
+
if result.send_chunk_to_caller:
|
|
137
|
+
return chunk # type: ignore
|
|
138
|
+
else:
|
|
139
|
+
return self.__next__()
|
|
140
|
+
except Exception as e:
|
|
141
|
+
if isinstance(e, StopIteration):
|
|
142
|
+
self._stop_iteration()
|
|
143
|
+
else:
|
|
144
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __next__ exception {e}")
|
|
145
|
+
raise e
|
|
146
|
+
|
|
147
|
+
async def __anext__(self) -> object:
|
|
148
|
+
try:
|
|
149
|
+
chunk: object = await self.__wrapped__.__anext__() # type: ignore
|
|
150
|
+
|
|
151
|
+
if self._ingested:
|
|
152
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __next__ already ingested, not processing chunk {chunk}")
|
|
153
|
+
return chunk # type: ignore
|
|
154
|
+
|
|
155
|
+
result = self._evaluate_chunk(chunk)
|
|
156
|
+
|
|
157
|
+
if result.ingest:
|
|
158
|
+
xproxy_result = await self._astop_iteration()
|
|
159
|
+
self._request.assign_xproxy_result(chunk, xproxy_result)
|
|
160
|
+
|
|
161
|
+
if result.send_chunk_to_caller:
|
|
162
|
+
return chunk # type: ignore
|
|
163
|
+
else:
|
|
164
|
+
return await self.__anext__()
|
|
165
|
+
|
|
166
|
+
except Exception as e:
|
|
167
|
+
if isinstance(e, StopAsyncIteration):
|
|
168
|
+
await self._astop_iteration()
|
|
169
|
+
else:
|
|
170
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: __anext__ exception {e}")
|
|
171
|
+
raise e
|
|
172
|
+
|
|
173
|
+
def _evaluate_chunk(self, chunk: Any) -> _ChunkResult:
|
|
174
|
+
if self._first_token:
|
|
175
|
+
self._request._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
|
|
176
|
+
self._first_token = False
|
|
177
|
+
|
|
178
|
+
if self._instrumentor._log_prompt_and_response:
|
|
179
|
+
self._responses.append(self.chunk_to_json(chunk))
|
|
180
|
+
|
|
181
|
+
return self._request.process_chunk(chunk)
|
|
182
|
+
|
|
183
|
+
def _process_stop_iteration(self) -> None:
|
|
184
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: process stop iteration")
|
|
185
|
+
|
|
186
|
+
self._stopwatch.stop()
|
|
187
|
+
self._request._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
|
|
188
|
+
self._request._ingest["http_status_code"] = 200
|
|
189
|
+
|
|
190
|
+
if self._instrumentor._log_prompt_and_response:
|
|
191
|
+
self._request._ingest["provider_response_json"] = self._responses
|
|
192
|
+
|
|
193
|
+
async def _astop_iteration(self) -> Optional[Union[XproxyResult, XproxyError]]:
|
|
194
|
+
if self._ingested:
|
|
195
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: astop iteration already ingested, skipping")
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
self._process_stop_iteration()
|
|
199
|
+
xproxy_result = await self._instrumentor._aingest_units(self._request)
|
|
200
|
+
self._ingested = True
|
|
201
|
+
|
|
202
|
+
return xproxy_result
|
|
203
|
+
|
|
204
|
+
def _stop_iteration(self) -> Optional[Union[XproxyResult, XproxyError]]:
|
|
205
|
+
if self._ingested:
|
|
206
|
+
self._instrumentor._logger.debug(f"StreamIteratorWrapper: stop iteration already ingested, skipping")
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
self._process_stop_iteration()
|
|
210
|
+
xproxy_result = self._instrumentor._ingest_units(self._request)
|
|
211
|
+
self._ingested = True
|
|
212
|
+
|
|
213
|
+
return xproxy_result
|
|
214
|
+
|
|
215
|
+
@staticmethod
|
|
216
|
+
def chunk_to_json(chunk: Any) -> str:
|
|
217
|
+
if hasattr(chunk, "to_json"):
|
|
218
|
+
return str(chunk.to_json())
|
|
219
|
+
elif isinstance(chunk, bytes):
|
|
220
|
+
return chunk.decode()
|
|
221
|
+
elif isinstance(chunk, str):
|
|
222
|
+
return chunk
|
|
223
|
+
else:
|
|
224
|
+
# assume dict
|
|
225
|
+
return _compact_json(chunk)
|
|
226
|
+
|
|
227
|
+
class _StreamManagerWrapper(ObjectProxy): # type: ignore
|
|
228
|
+
def __init__(
|
|
229
|
+
self,
|
|
230
|
+
stream_manager: Any, # type: ignore
|
|
231
|
+
instance: Any,
|
|
232
|
+
instrumentor: _PayiInstrumentor,
|
|
233
|
+
stopwatch: Stopwatch,
|
|
234
|
+
request: _ProviderRequest,
|
|
235
|
+
) -> None:
|
|
236
|
+
instrumentor._logger.debug(f"StreamManagerWrapper: instance {instance}, category {request._category}")
|
|
237
|
+
|
|
238
|
+
super().__init__(stream_manager) # type: ignore
|
|
239
|
+
|
|
240
|
+
self._stream_manager = stream_manager
|
|
241
|
+
self._instance = instance
|
|
242
|
+
self._instrumentor = instrumentor
|
|
243
|
+
self._stopwatch: Stopwatch = stopwatch
|
|
244
|
+
self._responses: list[str] = []
|
|
245
|
+
self._request: _ProviderRequest = request
|
|
246
|
+
self._first_token: bool = True
|
|
247
|
+
|
|
248
|
+
def __enter__(self) -> _StreamIteratorWrapper:
|
|
249
|
+
self._instrumentor._logger.debug(f"_StreamManagerWrapper: __enter__")
|
|
250
|
+
|
|
251
|
+
return _StreamIteratorWrapper(
|
|
252
|
+
response=self.__wrapped__.__enter__(), # type: ignore
|
|
253
|
+
instance=self._instance,
|
|
254
|
+
instrumentor=self._instrumentor,
|
|
255
|
+
stopwatch=self._stopwatch,
|
|
256
|
+
request=self._request,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
class _GeneratorWrapper: # type: ignore
|
|
260
|
+
def __init__(
|
|
261
|
+
self,
|
|
262
|
+
generator: Any,
|
|
263
|
+
instance: Any,
|
|
264
|
+
instrumentor: _PayiInstrumentor,
|
|
265
|
+
stopwatch: Stopwatch,
|
|
266
|
+
request: _ProviderRequest,
|
|
267
|
+
) -> None:
|
|
268
|
+
instrumentor._logger.debug(f"GeneratorWrapper: instance {instance}, category {request._category}")
|
|
269
|
+
|
|
270
|
+
super().__init__() # type: ignore
|
|
271
|
+
|
|
272
|
+
self._generator = generator
|
|
273
|
+
self._instance = instance
|
|
274
|
+
self._instrumentor = instrumentor
|
|
275
|
+
self._stopwatch: Stopwatch = stopwatch
|
|
276
|
+
self._log_prompt_and_response: bool = instrumentor._log_prompt_and_response
|
|
277
|
+
self._responses: list[str] = []
|
|
278
|
+
self._request: _ProviderRequest = request
|
|
279
|
+
self._first_token: bool = True
|
|
280
|
+
self._ingested: bool = False
|
|
281
|
+
self._iter_started: bool = False
|
|
282
|
+
|
|
283
|
+
def __iter__(self) -> Any:
|
|
284
|
+
self._iter_started = True
|
|
285
|
+
self._instrumentor._logger.debug(f"GeneratorWrapper: __iter__")
|
|
286
|
+
return self
|
|
287
|
+
|
|
288
|
+
def __aiter__(self) -> Any:
|
|
289
|
+
self._instrumentor._logger.debug(f"GeneratorWrapper: __aiter__")
|
|
290
|
+
return self
|
|
291
|
+
|
|
292
|
+
def _process_chunk(self, chunk: Any) -> _ChunkResult:
|
|
293
|
+
if self._first_token:
|
|
294
|
+
self._request._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
|
|
295
|
+
self._first_token = False
|
|
296
|
+
|
|
297
|
+
if self._log_prompt_and_response:
|
|
298
|
+
dict = self._chunk_to_dict(chunk)
|
|
299
|
+
self._responses.append(_compact_json(dict))
|
|
300
|
+
|
|
301
|
+
return self._request.process_chunk(chunk)
|
|
302
|
+
|
|
303
|
+
def __next__(self) -> Any:
|
|
304
|
+
try:
|
|
305
|
+
chunk = next(self._generator)
|
|
306
|
+
result = self._process_chunk(chunk)
|
|
307
|
+
|
|
308
|
+
if result.ingest:
|
|
309
|
+
xproxy_result = self._stop_iteration()
|
|
310
|
+
self._request.assign_xproxy_result(chunk, xproxy_result)
|
|
311
|
+
|
|
312
|
+
# ignore result.send_chunk_to_caller:
|
|
313
|
+
return chunk
|
|
314
|
+
|
|
315
|
+
except Exception as e:
|
|
316
|
+
if isinstance(e, StopIteration):
|
|
317
|
+
self._stop_iteration()
|
|
318
|
+
else:
|
|
319
|
+
self._instrumentor._logger.debug(f"GeneratorWrapper: __next__ exception {e}")
|
|
320
|
+
raise e
|
|
321
|
+
|
|
322
|
+
async def __anext__(self) -> Any:
|
|
323
|
+
try:
|
|
324
|
+
chunk = await anext(self._generator) # type: ignore
|
|
325
|
+
result = self._process_chunk(chunk)
|
|
326
|
+
|
|
327
|
+
if result.ingest:
|
|
328
|
+
xproxy_result = await self._astop_iteration()
|
|
329
|
+
self._request.assign_xproxy_result(chunk, xproxy_result)
|
|
330
|
+
|
|
331
|
+
# ignore result.send_chunk_to_caller:
|
|
332
|
+
return chunk # type: ignore
|
|
333
|
+
|
|
334
|
+
except Exception as e:
|
|
335
|
+
if isinstance(e, StopAsyncIteration):
|
|
336
|
+
await self._astop_iteration()
|
|
337
|
+
else:
|
|
338
|
+
self._instrumentor._logger.debug(f"GeneratorWrapper: __anext__ exception {e}")
|
|
339
|
+
raise e
|
|
340
|
+
|
|
341
|
+
@staticmethod
|
|
342
|
+
def _chunk_to_dict(chunk: Any) -> 'dict[str, object]':
|
|
343
|
+
if hasattr(chunk, "to_dict"):
|
|
344
|
+
return chunk.to_dict() # type: ignore
|
|
345
|
+
elif hasattr(chunk, "to_json_dict"):
|
|
346
|
+
return chunk.to_json_dict() # type: ignore
|
|
347
|
+
else:
|
|
348
|
+
return {}
|
|
349
|
+
|
|
350
|
+
def _stop_iteration(self) -> Optional[Union[XproxyResult, XproxyError]]:
|
|
351
|
+
if self._ingested:
|
|
352
|
+
self._instrumentor._logger.debug(f"GeneratorWrapper: stop iteration already ingested, skipping")
|
|
353
|
+
return None
|
|
354
|
+
|
|
355
|
+
self._process_stop_iteration()
|
|
356
|
+
xproxy_result = self._instrumentor._ingest_units(self._request)
|
|
357
|
+
self._ingested = True
|
|
358
|
+
return xproxy_result
|
|
359
|
+
|
|
360
|
+
async def _astop_iteration(self) -> Optional[Union[XproxyResult, XproxyError]]:
|
|
361
|
+
if self._ingested:
|
|
362
|
+
self._instrumentor._logger.debug(f"GeneratorWrapper: astop iteration already ingested, skipping")
|
|
363
|
+
return None
|
|
364
|
+
|
|
365
|
+
self._process_stop_iteration()
|
|
366
|
+
xproxy_result = await self._instrumentor._aingest_units(self._request)
|
|
367
|
+
self._ingested = True
|
|
368
|
+
return xproxy_result
|
|
369
|
+
|
|
370
|
+
def _process_stop_iteration(self) -> None:
|
|
371
|
+
self._instrumentor._logger.debug(f"GeneratorWrapper: stop iteration")
|
|
372
|
+
|
|
373
|
+
self._stopwatch.stop()
|
|
374
|
+
self._request._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
|
|
375
|
+
self._request._ingest["http_status_code"] = 200
|
|
376
|
+
|
|
377
|
+
if self._log_prompt_and_response:
|
|
378
|
+
self._request._ingest["provider_response_json"] = self._responses
|
|
379
|
+
|
payi/lib/VertexInstrumentor.py
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from typing import Any, List, Union, Optional, Sequence
|
|
2
4
|
from typing_extensions import override
|
|
3
5
|
|
|
4
6
|
from wrapt import wrap_function_wrapper # type: ignore
|
|
5
7
|
|
|
6
|
-
from .instrument import
|
|
8
|
+
from .instrument import _IsStreaming, _PayiInstrumentor
|
|
7
9
|
from .VertexRequest import _VertexRequest
|
|
8
10
|
from .version_helper import get_version_helper
|
|
11
|
+
from .ProviderRequest import _ChunkResult
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
class VertexInstrumentor:
|
|
@@ -14,42 +17,20 @@ class VertexInstrumentor:
|
|
|
14
17
|
|
|
15
18
|
@staticmethod
|
|
16
19
|
def instrument(instrumentor: _PayiInstrumentor) -> None:
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
except Exception as e:
|
|
33
|
-
instrumentor._logger.debug(f"Error instrumenting vertex: {e}")
|
|
34
|
-
return
|
|
35
|
-
|
|
36
|
-
# separate instrumetning preview functionality from released in case it fails
|
|
37
|
-
try:
|
|
38
|
-
wrap_function_wrapper(
|
|
39
|
-
"vertexai.preview.generative_models",
|
|
40
|
-
"GenerativeModel.generate_content",
|
|
41
|
-
generate_wrapper(instrumentor),
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
wrap_function_wrapper(
|
|
45
|
-
"vertexai.preview.generative_models",
|
|
46
|
-
"GenerativeModel.generate_content_async",
|
|
47
|
-
agenerate_wrapper(instrumentor),
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
except Exception as e:
|
|
51
|
-
instrumentor._logger.debug(f"Error instrumenting vertex: {e}")
|
|
52
|
-
return
|
|
20
|
+
VertexInstrumentor._module_version = get_version_helper(VertexInstrumentor._module_name)
|
|
21
|
+
|
|
22
|
+
wrappers = [
|
|
23
|
+
("vertexai.generative_models", "GenerativeModel.generate_content", generate_wrapper(instrumentor)),
|
|
24
|
+
("vertexai.generative_models", "GenerativeModel.generate_content_async", agenerate_wrapper(instrumentor)),
|
|
25
|
+
("vertexai.preview.generative_models", "GenerativeModel.generate_content", generate_wrapper(instrumentor)),
|
|
26
|
+
("vertexai.preview.generative_models", "GenerativeModel.generate_content_async", agenerate_wrapper(instrumentor)),
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
for module, method, wrapper in wrappers:
|
|
30
|
+
try:
|
|
31
|
+
wrap_function_wrapper(module, method, wrapper)
|
|
32
|
+
except Exception as e:
|
|
33
|
+
instrumentor._logger.debug(f"Error wrapping {module}.{method}: {e}")
|
|
53
34
|
|
|
54
35
|
@_PayiInstrumentor.payi_wrapper
|
|
55
36
|
def generate_wrapper(
|
payi/lib/VertexRequest.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import math
|
|
3
5
|
from typing import Any, Optional
|
|
@@ -6,7 +8,8 @@ from typing_extensions import override
|
|
|
6
8
|
from payi.lib.helpers import PayiCategories
|
|
7
9
|
from payi.types.ingest_units_params import Units
|
|
8
10
|
|
|
9
|
-
from .instrument import
|
|
11
|
+
from .instrument import _PayiInstrumentor
|
|
12
|
+
from .ProviderRequest import _ChunkResult, _StreamingType, _ProviderRequest
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
class _VertexRequest(_ProviderRequest): # type: ignore
|
|
@@ -44,6 +47,11 @@ class _VertexRequest(_ProviderRequest): # type: ignore
|
|
|
44
47
|
if id:
|
|
45
48
|
self._ingest["provider_response_id"] = id
|
|
46
49
|
|
|
50
|
+
if "provider_response_headers" not in self._ingest:
|
|
51
|
+
response_headers = response_dict.get('sdk_http_response', {}).get('headers', {})
|
|
52
|
+
if response_headers:
|
|
53
|
+
self.add_response_headers(response_headers)
|
|
54
|
+
|
|
47
55
|
if "resource" not in self._ingest:
|
|
48
56
|
model: Optional[str] = self._get_model_name(response_dict) # type: ignore[unreachable]
|
|
49
57
|
if model:
|
|
@@ -111,6 +119,10 @@ class _VertexRequest(_ProviderRequest): # type: ignore
|
|
|
111
119
|
response_dict: 'dict[str, Any]',
|
|
112
120
|
log_prompt_and_response: bool) -> Any:
|
|
113
121
|
|
|
122
|
+
response_headers = response_dict.get('sdk_http_response', {}).get('headers', {})
|
|
123
|
+
if response_headers:
|
|
124
|
+
self.add_response_headers(response_headers)
|
|
125
|
+
|
|
114
126
|
id: Optional[str] = response_dict.get("response_id", None)
|
|
115
127
|
if id:
|
|
116
128
|
self._ingest["provider_response_id"] = id
|
|
@@ -148,7 +160,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
|
|
|
148
160
|
return model.startswith("gemini-1.")
|
|
149
161
|
|
|
150
162
|
def is_large_context_token_model(model: str, input_tokens: int) -> bool:
|
|
151
|
-
return model.startswith("gemini-2.5-pro") and input_tokens >
|
|
163
|
+
return model.startswith("gemini-2.5-pro") and input_tokens > 200000
|
|
152
164
|
|
|
153
165
|
def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
154
166
|
if key not in request._ingest["units"]:
|
|
@@ -172,6 +184,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
|
|
|
172
184
|
|
|
173
185
|
if is_character_billing_model(model):
|
|
174
186
|
if input > 128000:
|
|
187
|
+
self._is_large_context = True
|
|
175
188
|
large_context = "_large_context"
|
|
176
189
|
|
|
177
190
|
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
@@ -222,6 +235,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
|
|
|
222
235
|
thinking_token_count = usage.get("thoughts_token_count", 0)
|
|
223
236
|
|
|
224
237
|
if is_large_context_token_model(model, input):
|
|
238
|
+
self._is_large_context = True
|
|
225
239
|
large_context = "_large_context"
|
|
226
240
|
|
|
227
241
|
cache_details: dict[str, int] = {}
|