payi 0.1.0a107__py3-none-any.whl → 0.1.0a137__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. payi/__init__.py +3 -1
  2. payi/_base_client.py +12 -12
  3. payi/_client.py +8 -8
  4. payi/_compat.py +48 -48
  5. payi/_models.py +87 -59
  6. payi/_qs.py +7 -7
  7. payi/_streaming.py +4 -6
  8. payi/_types.py +53 -12
  9. payi/_utils/__init__.py +9 -2
  10. payi/_utils/_compat.py +45 -0
  11. payi/_utils/_datetime_parse.py +136 -0
  12. payi/_utils/_sync.py +3 -31
  13. payi/_utils/_transform.py +13 -3
  14. payi/_utils/_typing.py +6 -1
  15. payi/_utils/_utils.py +5 -6
  16. payi/_version.py +1 -1
  17. payi/lib/AnthropicInstrumentor.py +83 -57
  18. payi/lib/BedrockInstrumentor.py +292 -57
  19. payi/lib/GoogleGenAiInstrumentor.py +18 -31
  20. payi/lib/OpenAIInstrumentor.py +56 -72
  21. payi/lib/ProviderRequest.py +216 -0
  22. payi/lib/StreamWrappers.py +379 -0
  23. payi/lib/VertexInstrumentor.py +18 -37
  24. payi/lib/VertexRequest.py +16 -2
  25. payi/lib/data/cohere_embed_english_v3.json +30706 -0
  26. payi/lib/helpers.py +62 -5
  27. payi/lib/instrument.py +433 -659
  28. payi/resources/categories/__init__.py +0 -14
  29. payi/resources/categories/categories.py +25 -53
  30. payi/resources/categories/resources.py +27 -23
  31. payi/resources/ingest.py +126 -132
  32. payi/resources/limits/__init__.py +14 -14
  33. payi/resources/limits/limits.py +58 -58
  34. payi/resources/limits/properties.py +171 -0
  35. payi/resources/requests/request_id/properties.py +8 -8
  36. payi/resources/requests/request_id/result.py +3 -3
  37. payi/resources/requests/response_id/properties.py +8 -8
  38. payi/resources/requests/response_id/result.py +3 -3
  39. payi/resources/use_cases/definitions/definitions.py +27 -27
  40. payi/resources/use_cases/definitions/kpis.py +23 -23
  41. payi/resources/use_cases/definitions/limit_config.py +14 -14
  42. payi/resources/use_cases/definitions/version.py +3 -3
  43. payi/resources/use_cases/kpis.py +15 -15
  44. payi/resources/use_cases/properties.py +6 -6
  45. payi/resources/use_cases/use_cases.py +7 -7
  46. payi/types/__init__.py +2 -0
  47. payi/types/bulk_ingest_response.py +3 -20
  48. payi/types/categories/__init__.py +0 -1
  49. payi/types/categories/resource_list_params.py +5 -1
  50. payi/types/category_list_resources_params.py +5 -1
  51. payi/types/category_resource_response.py +31 -1
  52. payi/types/ingest_event_param.py +7 -6
  53. payi/types/ingest_units_params.py +5 -4
  54. payi/types/limit_create_params.py +3 -3
  55. payi/types/limit_list_response.py +1 -3
  56. payi/types/limit_response.py +1 -3
  57. payi/types/limits/__init__.py +2 -9
  58. payi/types/limits/{tag_remove_params.py → property_update_params.py} +4 -5
  59. payi/types/limits/{tag_delete_response.py → property_update_response.py} +3 -3
  60. payi/types/requests/request_id/property_update_params.py +2 -2
  61. payi/types/requests/response_id/property_update_params.py +2 -2
  62. payi/types/shared/__init__.py +2 -0
  63. payi/types/shared/api_error.py +18 -0
  64. payi/types/shared/pay_i_common_models_budget_management_create_limit_base.py +3 -3
  65. payi/types/shared/properties_request.py +11 -0
  66. payi/types/shared/xproxy_result.py +2 -0
  67. payi/types/shared_params/pay_i_common_models_budget_management_create_limit_base.py +3 -3
  68. payi/types/use_cases/definitions/limit_config_create_params.py +3 -3
  69. payi/types/use_cases/property_update_params.py +2 -2
  70. {payi-0.1.0a107.dist-info → payi-0.1.0a137.dist-info}/METADATA +6 -6
  71. {payi-0.1.0a107.dist-info → payi-0.1.0a137.dist-info}/RECORD +73 -75
  72. payi/resources/categories/fixed_cost_resources.py +0 -196
  73. payi/resources/limits/tags.py +0 -507
  74. payi/types/categories/fixed_cost_resource_create_params.py +0 -21
  75. payi/types/limits/limit_tags.py +0 -16
  76. payi/types/limits/tag_create_params.py +0 -13
  77. payi/types/limits/tag_create_response.py +0 -10
  78. payi/types/limits/tag_list_response.py +0 -10
  79. payi/types/limits/tag_remove_response.py +0 -10
  80. payi/types/limits/tag_update_params.py +0 -13
  81. payi/types/limits/tag_update_response.py +0 -10
  82. {payi-0.1.0a107.dist-info → payi-0.1.0a137.dist-info}/WHEEL +0 -0
  83. {payi-0.1.0a107.dist-info → payi-0.1.0a137.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,379 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Union, Optional
4
+
5
+ from wrapt import ObjectProxy # type: ignore
6
+
7
+ from payi.lib.helpers import _compact_json
8
+ from payi.lib.Stopwatch import Stopwatch
9
+ from payi.types.shared.xproxy_error import XproxyError
10
+ from payi.types.shared.xproxy_result import XproxyResult
11
+
12
+ from .ProviderRequest import _ChunkResult, _ProviderRequest
13
+
14
+ if TYPE_CHECKING:
15
+ from .instrument import _PayiInstrumentor
16
+
17
+ __all__ = [
18
+ "_StreamIteratorWrapper",
19
+ "_StreamManagerWrapper",
20
+ "_GeneratorWrapper",
21
+ ]
22
+
23
+ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
24
+ def __init__(
25
+ self,
26
+ response: Any,
27
+ instance: Any,
28
+ instrumentor: '_PayiInstrumentor',
29
+ stopwatch: Stopwatch,
30
+ request: _ProviderRequest,
31
+ ) -> None:
32
+
33
+ instrumentor._logger.debug(f"StreamIteratorWrapper: instance {instance}, category {request._category}")
34
+
35
+ request.process_initial_stream_response(response)
36
+
37
+ bedrock_from_stream: bool = False
38
+ if request.is_aws_client:
39
+ stream = response.get("stream", None)
40
+
41
+ if stream:
42
+ response = stream
43
+ bedrock_from_stream = True
44
+ else:
45
+ response = response.get("body")
46
+ bedrock_from_stream = False
47
+
48
+ super().__init__(response) # type: ignore
49
+
50
+ self._response = response
51
+ self._instance = instance
52
+
53
+ self._instrumentor = instrumentor
54
+ self._stopwatch: Stopwatch = stopwatch
55
+ self._responses: list[str] = []
56
+
57
+ self._request: _ProviderRequest = request
58
+
59
+ self._first_token: bool = True
60
+ self._bedrock_from_stream: bool = bedrock_from_stream
61
+ self._ingested: bool = False
62
+ self._iter_started: bool = False
63
+
64
+ def __enter__(self) -> Any:
65
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __enter__")
66
+ return self
67
+
68
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
69
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __exit__")
70
+ self.__wrapped__.__exit__(exc_type, exc_val, exc_tb) # type: ignore
71
+
72
+ async def __aenter__(self) -> Any:
73
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __aenter__")
74
+ return self
75
+
76
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
77
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __aexit__")
78
+ await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb) # type: ignore
79
+
80
+ def __iter__(self) -> Any:
81
+ self._iter_started = True
82
+ if self._request.is_aws_client:
83
+ # MUST reside in a separate function so that the yield statement (e.g. the generator) doesn't implicitly return its own iterator and overriding self
84
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: bedrock __iter__")
85
+ return self._iter_bedrock()
86
+
87
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __iter__")
88
+ return self
89
+
90
+ def _iter_bedrock(self) -> Any:
91
+ # botocore EventStream doesn't have a __next__ method so iterate over the wrapped object in place
92
+ for event in self.__wrapped__: # type: ignore
93
+ result: Optional[_ChunkResult] = None
94
+
95
+ if (self._bedrock_from_stream):
96
+ result = self._evaluate_chunk(event)
97
+ else:
98
+ chunk = event.get('chunk') # type: ignore
99
+ if chunk:
100
+ decode = chunk.get('bytes').decode() # type: ignore
101
+ result = self._evaluate_chunk(decode)
102
+
103
+ if result and result.ingest:
104
+ from .BedrockInstrumentor import BedrockInstrumentor
105
+
106
+ xproxy_result = self._stop_iteration()
107
+
108
+ # the xproxy_result is not json serializable by default so adding the object is opt in by the client
109
+ if BedrockInstrumentor._add_streaming_xproxy_result:
110
+ self._request.assign_xproxy_result(event, xproxy_result)
111
+ yield event
112
+
113
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: bedrock iter finished")
114
+
115
+ self._stop_iteration()
116
+
117
+ def __aiter__(self) -> Any:
118
+ self._iter_started = True
119
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __aiter__")
120
+ return self
121
+
122
+ def __next__(self) -> object:
123
+ try:
124
+ chunk: object = self.__wrapped__.__next__() # type: ignore
125
+
126
+ if self._ingested:
127
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __next__ already ingested, not processing chunk {chunk}")
128
+ return chunk # type: ignore
129
+
130
+ result = self._evaluate_chunk(chunk)
131
+
132
+ if result.ingest:
133
+ xproxy_result = self._stop_iteration()
134
+ self._request.assign_xproxy_result(chunk, xproxy_result)
135
+
136
+ if result.send_chunk_to_caller:
137
+ return chunk # type: ignore
138
+ else:
139
+ return self.__next__()
140
+ except Exception as e:
141
+ if isinstance(e, StopIteration):
142
+ self._stop_iteration()
143
+ else:
144
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __next__ exception {e}")
145
+ raise e
146
+
147
+ async def __anext__(self) -> object:
148
+ try:
149
+ chunk: object = await self.__wrapped__.__anext__() # type: ignore
150
+
151
+ if self._ingested:
152
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __next__ already ingested, not processing chunk {chunk}")
153
+ return chunk # type: ignore
154
+
155
+ result = self._evaluate_chunk(chunk)
156
+
157
+ if result.ingest:
158
+ xproxy_result = await self._astop_iteration()
159
+ self._request.assign_xproxy_result(chunk, xproxy_result)
160
+
161
+ if result.send_chunk_to_caller:
162
+ return chunk # type: ignore
163
+ else:
164
+ return await self.__anext__()
165
+
166
+ except Exception as e:
167
+ if isinstance(e, StopAsyncIteration):
168
+ await self._astop_iteration()
169
+ else:
170
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: __anext__ exception {e}")
171
+ raise e
172
+
173
+ def _evaluate_chunk(self, chunk: Any) -> _ChunkResult:
174
+ if self._first_token:
175
+ self._request._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
176
+ self._first_token = False
177
+
178
+ if self._instrumentor._log_prompt_and_response:
179
+ self._responses.append(self.chunk_to_json(chunk))
180
+
181
+ return self._request.process_chunk(chunk)
182
+
183
+ def _process_stop_iteration(self) -> None:
184
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: process stop iteration")
185
+
186
+ self._stopwatch.stop()
187
+ self._request._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
188
+ self._request._ingest["http_status_code"] = 200
189
+
190
+ if self._instrumentor._log_prompt_and_response:
191
+ self._request._ingest["provider_response_json"] = self._responses
192
+
193
+ async def _astop_iteration(self) -> Optional[Union[XproxyResult, XproxyError]]:
194
+ if self._ingested:
195
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: astop iteration already ingested, skipping")
196
+ return None
197
+
198
+ self._process_stop_iteration()
199
+ xproxy_result = await self._instrumentor._aingest_units(self._request)
200
+ self._ingested = True
201
+
202
+ return xproxy_result
203
+
204
+ def _stop_iteration(self) -> Optional[Union[XproxyResult, XproxyError]]:
205
+ if self._ingested:
206
+ self._instrumentor._logger.debug(f"StreamIteratorWrapper: stop iteration already ingested, skipping")
207
+ return None
208
+
209
+ self._process_stop_iteration()
210
+ xproxy_result = self._instrumentor._ingest_units(self._request)
211
+ self._ingested = True
212
+
213
+ return xproxy_result
214
+
215
+ @staticmethod
216
+ def chunk_to_json(chunk: Any) -> str:
217
+ if hasattr(chunk, "to_json"):
218
+ return str(chunk.to_json())
219
+ elif isinstance(chunk, bytes):
220
+ return chunk.decode()
221
+ elif isinstance(chunk, str):
222
+ return chunk
223
+ else:
224
+ # assume dict
225
+ return _compact_json(chunk)
226
+
227
+ class _StreamManagerWrapper(ObjectProxy): # type: ignore
228
+ def __init__(
229
+ self,
230
+ stream_manager: Any, # type: ignore
231
+ instance: Any,
232
+ instrumentor: _PayiInstrumentor,
233
+ stopwatch: Stopwatch,
234
+ request: _ProviderRequest,
235
+ ) -> None:
236
+ instrumentor._logger.debug(f"StreamManagerWrapper: instance {instance}, category {request._category}")
237
+
238
+ super().__init__(stream_manager) # type: ignore
239
+
240
+ self._stream_manager = stream_manager
241
+ self._instance = instance
242
+ self._instrumentor = instrumentor
243
+ self._stopwatch: Stopwatch = stopwatch
244
+ self._responses: list[str] = []
245
+ self._request: _ProviderRequest = request
246
+ self._first_token: bool = True
247
+
248
+ def __enter__(self) -> _StreamIteratorWrapper:
249
+ self._instrumentor._logger.debug(f"_StreamManagerWrapper: __enter__")
250
+
251
+ return _StreamIteratorWrapper(
252
+ response=self.__wrapped__.__enter__(), # type: ignore
253
+ instance=self._instance,
254
+ instrumentor=self._instrumentor,
255
+ stopwatch=self._stopwatch,
256
+ request=self._request,
257
+ )
258
+
259
+ class _GeneratorWrapper: # type: ignore
260
+ def __init__(
261
+ self,
262
+ generator: Any,
263
+ instance: Any,
264
+ instrumentor: _PayiInstrumentor,
265
+ stopwatch: Stopwatch,
266
+ request: _ProviderRequest,
267
+ ) -> None:
268
+ instrumentor._logger.debug(f"GeneratorWrapper: instance {instance}, category {request._category}")
269
+
270
+ super().__init__() # type: ignore
271
+
272
+ self._generator = generator
273
+ self._instance = instance
274
+ self._instrumentor = instrumentor
275
+ self._stopwatch: Stopwatch = stopwatch
276
+ self._log_prompt_and_response: bool = instrumentor._log_prompt_and_response
277
+ self._responses: list[str] = []
278
+ self._request: _ProviderRequest = request
279
+ self._first_token: bool = True
280
+ self._ingested: bool = False
281
+ self._iter_started: bool = False
282
+
283
+ def __iter__(self) -> Any:
284
+ self._iter_started = True
285
+ self._instrumentor._logger.debug(f"GeneratorWrapper: __iter__")
286
+ return self
287
+
288
+ def __aiter__(self) -> Any:
289
+ self._instrumentor._logger.debug(f"GeneratorWrapper: __aiter__")
290
+ return self
291
+
292
+ def _process_chunk(self, chunk: Any) -> _ChunkResult:
293
+ if self._first_token:
294
+ self._request._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
295
+ self._first_token = False
296
+
297
+ if self._log_prompt_and_response:
298
+ dict = self._chunk_to_dict(chunk)
299
+ self._responses.append(_compact_json(dict))
300
+
301
+ return self._request.process_chunk(chunk)
302
+
303
+ def __next__(self) -> Any:
304
+ try:
305
+ chunk = next(self._generator)
306
+ result = self._process_chunk(chunk)
307
+
308
+ if result.ingest:
309
+ xproxy_result = self._stop_iteration()
310
+ self._request.assign_xproxy_result(chunk, xproxy_result)
311
+
312
+ # ignore result.send_chunk_to_caller:
313
+ return chunk
314
+
315
+ except Exception as e:
316
+ if isinstance(e, StopIteration):
317
+ self._stop_iteration()
318
+ else:
319
+ self._instrumentor._logger.debug(f"GeneratorWrapper: __next__ exception {e}")
320
+ raise e
321
+
322
+ async def __anext__(self) -> Any:
323
+ try:
324
+ chunk = await anext(self._generator) # type: ignore
325
+ result = self._process_chunk(chunk)
326
+
327
+ if result.ingest:
328
+ xproxy_result = await self._astop_iteration()
329
+ self._request.assign_xproxy_result(chunk, xproxy_result)
330
+
331
+ # ignore result.send_chunk_to_caller:
332
+ return chunk # type: ignore
333
+
334
+ except Exception as e:
335
+ if isinstance(e, StopAsyncIteration):
336
+ await self._astop_iteration()
337
+ else:
338
+ self._instrumentor._logger.debug(f"GeneratorWrapper: __anext__ exception {e}")
339
+ raise e
340
+
341
+ @staticmethod
342
+ def _chunk_to_dict(chunk: Any) -> 'dict[str, object]':
343
+ if hasattr(chunk, "to_dict"):
344
+ return chunk.to_dict() # type: ignore
345
+ elif hasattr(chunk, "to_json_dict"):
346
+ return chunk.to_json_dict() # type: ignore
347
+ else:
348
+ return {}
349
+
350
+ def _stop_iteration(self) -> Optional[Union[XproxyResult, XproxyError]]:
351
+ if self._ingested:
352
+ self._instrumentor._logger.debug(f"GeneratorWrapper: stop iteration already ingested, skipping")
353
+ return None
354
+
355
+ self._process_stop_iteration()
356
+ xproxy_result = self._instrumentor._ingest_units(self._request)
357
+ self._ingested = True
358
+ return xproxy_result
359
+
360
+ async def _astop_iteration(self) -> Optional[Union[XproxyResult, XproxyError]]:
361
+ if self._ingested:
362
+ self._instrumentor._logger.debug(f"GeneratorWrapper: astop iteration already ingested, skipping")
363
+ return None
364
+
365
+ self._process_stop_iteration()
366
+ xproxy_result = await self._instrumentor._aingest_units(self._request)
367
+ self._ingested = True
368
+ return xproxy_result
369
+
370
+ def _process_stop_iteration(self) -> None:
371
+ self._instrumentor._logger.debug(f"GeneratorWrapper: stop iteration")
372
+
373
+ self._stopwatch.stop()
374
+ self._request._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
375
+ self._request._ingest["http_status_code"] = 200
376
+
377
+ if self._log_prompt_and_response:
378
+ self._request._ingest["provider_response_json"] = self._responses
379
+
@@ -1,11 +1,14 @@
1
+ from __future__ import annotations
2
+
1
3
  from typing import Any, List, Union, Optional, Sequence
2
4
  from typing_extensions import override
3
5
 
4
6
  from wrapt import wrap_function_wrapper # type: ignore
5
7
 
6
- from .instrument import _ChunkResult, _IsStreaming, _PayiInstrumentor
8
+ from .instrument import _IsStreaming, _PayiInstrumentor
7
9
  from .VertexRequest import _VertexRequest
8
10
  from .version_helper import get_version_helper
11
+ from .ProviderRequest import _ChunkResult
9
12
 
10
13
 
11
14
  class VertexInstrumentor:
@@ -14,42 +17,20 @@ class VertexInstrumentor:
14
17
 
15
18
  @staticmethod
16
19
  def instrument(instrumentor: _PayiInstrumentor) -> None:
17
- try:
18
- VertexInstrumentor._module_version = get_version_helper(VertexInstrumentor._module_name)
19
-
20
- wrap_function_wrapper(
21
- "vertexai.generative_models",
22
- "GenerativeModel.generate_content",
23
- generate_wrapper(instrumentor),
24
- )
25
-
26
- wrap_function_wrapper(
27
- "vertexai.generative_models",
28
- "GenerativeModel.generate_content_async",
29
- agenerate_wrapper(instrumentor),
30
- )
31
-
32
- except Exception as e:
33
- instrumentor._logger.debug(f"Error instrumenting vertex: {e}")
34
- return
35
-
36
- # separate instrumetning preview functionality from released in case it fails
37
- try:
38
- wrap_function_wrapper(
39
- "vertexai.preview.generative_models",
40
- "GenerativeModel.generate_content",
41
- generate_wrapper(instrumentor),
42
- )
43
-
44
- wrap_function_wrapper(
45
- "vertexai.preview.generative_models",
46
- "GenerativeModel.generate_content_async",
47
- agenerate_wrapper(instrumentor),
48
- )
49
-
50
- except Exception as e:
51
- instrumentor._logger.debug(f"Error instrumenting vertex: {e}")
52
- return
20
+ VertexInstrumentor._module_version = get_version_helper(VertexInstrumentor._module_name)
21
+
22
+ wrappers = [
23
+ ("vertexai.generative_models", "GenerativeModel.generate_content", generate_wrapper(instrumentor)),
24
+ ("vertexai.generative_models", "GenerativeModel.generate_content_async", agenerate_wrapper(instrumentor)),
25
+ ("vertexai.preview.generative_models", "GenerativeModel.generate_content", generate_wrapper(instrumentor)),
26
+ ("vertexai.preview.generative_models", "GenerativeModel.generate_content_async", agenerate_wrapper(instrumentor)),
27
+ ]
28
+
29
+ for module, method, wrapper in wrappers:
30
+ try:
31
+ wrap_function_wrapper(module, method, wrapper)
32
+ except Exception as e:
33
+ instrumentor._logger.debug(f"Error wrapping {module}.{method}: {e}")
53
34
 
54
35
  @_PayiInstrumentor.payi_wrapper
55
36
  def generate_wrapper(
payi/lib/VertexRequest.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import math
3
5
  from typing import Any, Optional
@@ -6,7 +8,8 @@ from typing_extensions import override
6
8
  from payi.lib.helpers import PayiCategories
7
9
  from payi.types.ingest_units_params import Units
8
10
 
9
- from .instrument import _ChunkResult, _StreamingType, _ProviderRequest, _PayiInstrumentor
11
+ from .instrument import _PayiInstrumentor
12
+ from .ProviderRequest import _ChunkResult, _StreamingType, _ProviderRequest
10
13
 
11
14
 
12
15
  class _VertexRequest(_ProviderRequest): # type: ignore
@@ -44,6 +47,11 @@ class _VertexRequest(_ProviderRequest): # type: ignore
44
47
  if id:
45
48
  self._ingest["provider_response_id"] = id
46
49
 
50
+ if "provider_response_headers" not in self._ingest:
51
+ response_headers = response_dict.get('sdk_http_response', {}).get('headers', {})
52
+ if response_headers:
53
+ self.add_response_headers(response_headers)
54
+
47
55
  if "resource" not in self._ingest:
48
56
  model: Optional[str] = self._get_model_name(response_dict) # type: ignore[unreachable]
49
57
  if model:
@@ -111,6 +119,10 @@ class _VertexRequest(_ProviderRequest): # type: ignore
111
119
  response_dict: 'dict[str, Any]',
112
120
  log_prompt_and_response: bool) -> Any:
113
121
 
122
+ response_headers = response_dict.get('sdk_http_response', {}).get('headers', {})
123
+ if response_headers:
124
+ self.add_response_headers(response_headers)
125
+
114
126
  id: Optional[str] = response_dict.get("response_id", None)
115
127
  if id:
116
128
  self._ingest["provider_response_id"] = id
@@ -148,7 +160,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
148
160
  return model.startswith("gemini-1.")
149
161
 
150
162
  def is_large_context_token_model(model: str, input_tokens: int) -> bool:
151
- return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
163
+ return model.startswith("gemini-2.5-pro") and input_tokens > 200000
152
164
 
153
165
  def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
154
166
  if key not in request._ingest["units"]:
@@ -172,6 +184,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
172
184
 
173
185
  if is_character_billing_model(model):
174
186
  if input > 128000:
187
+ self._is_large_context = True
175
188
  large_context = "_large_context"
176
189
 
177
190
  # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
@@ -222,6 +235,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
222
235
  thinking_token_count = usage.get("thoughts_token_count", 0)
223
236
 
224
237
  if is_large_context_token_model(model, input):
238
+ self._is_large_context = True
225
239
  large_context = "_large_context"
226
240
 
227
241
  cache_details: dict[str, int] = {}