payi 0.1.0a83__py3-none-any.whl → 0.1.0a85__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of payi might be problematic. Click here for more details.

@@ -1,12 +1,10 @@
1
1
  import json
2
- import math
3
2
  from typing import Any, List, Union, Optional, Sequence
4
3
  from typing_extensions import override
5
4
 
6
5
  from wrapt import wrap_function_wrapper # type: ignore
7
6
 
8
7
  from payi.lib.helpers import PayiCategories
9
- from payi.types.ingest_units_params import Units
10
8
 
11
9
  from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
12
10
 
@@ -115,18 +113,16 @@ async def agenerate_stream_wrapper(
115
113
  kwargs,
116
114
  )
117
115
 
118
- def count_chars_skip_spaces(text: str) -> int:
119
- return sum(1 for c in text if not c.isspace())
120
-
121
116
  class _GoogleGenAiRequest(_ProviderRequest):
122
117
  def __init__(self, instrumentor: _PayiInstrumentor):
123
118
  super().__init__(
124
119
  instrumentor=instrumentor,
125
120
  category=PayiCategories.google_vertex,
126
121
  streaming_type=_StreamingType.generator,
122
+ is_google_vertex_or_genai_client=True,
127
123
  )
128
124
  self._prompt_character_count = 0
129
- self._candiates_character_count = 0
125
+ self._candidates_character_count = 0
130
126
 
131
127
  @override
132
128
  def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
@@ -158,6 +154,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
158
154
  if isinstance(value, list):
159
155
  items = value # type: ignore
160
156
 
157
+ from .VertexInstrumentor import count_chars_skip_spaces
158
+
161
159
  for item in items: # type: ignore
162
160
  text = ""
163
161
  if isinstance(item, Part):
@@ -249,6 +247,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
249
247
 
250
248
  @override
251
249
  def process_chunk(self, chunk: Any) -> _ChunkResult:
250
+ from .VertexInstrumentor import vertex_compute_usage, count_chars_skip_spaces
251
+
252
252
  ingest = False
253
253
  response_dict: dict[str, Any] = chunk.to_json_dict()
254
254
  if "provider_response_id" not in self._ingest:
@@ -260,22 +260,25 @@ class _GoogleGenAiRequest(_ProviderRequest):
260
260
 
261
261
  self._ingest["resource"] = "google." + model
262
262
 
263
+
263
264
  for candidate in response_dict.get("candidates", []):
264
265
  parts = candidate.get("content", {}).get("parts", [])
265
266
  for part in parts:
266
- self._candiates_character_count += count_chars_skip_spaces(part.get("text", ""))
267
+ self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
267
268
 
268
269
  usage = response_dict.get("usage_metadata", {})
269
270
  if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
270
- self._compute_usage(response_dict, streaming_candidates_characters=self._candiates_character_count)
271
+ vertex_compute_usage(
272
+ request=self,
273
+ model=model,
274
+ response_dict=response_dict,
275
+ prompt_character_count=self._prompt_character_count,
276
+ streaming_candidates_characters=self._candidates_character_count
277
+ )
271
278
  ingest = True
272
279
 
273
280
  return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
274
281
 
275
- @staticmethod
276
- def _is_character_billing_model(model: str) -> bool:
277
- return model.startswith("gemini-1.")
278
-
279
282
  @override
280
283
  def process_synchronous_response(
281
284
  self,
@@ -284,6 +287,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
284
287
  kwargs: Any) -> Any:
285
288
  response_dict = response.to_json_dict()
286
289
 
290
+ from .VertexInstrumentor import vertex_compute_usage
291
+
287
292
  id: Optional[str] = response_dict.get("response_id", None)
288
293
  if id:
289
294
  self._ingest["provider_response_id"] = id
@@ -292,105 +297,15 @@ class _GoogleGenAiRequest(_ProviderRequest):
292
297
  if model:
293
298
  self._ingest["resource"] = "google." + model
294
299
 
295
- self._compute_usage(response_dict)
296
-
300
+ vertex_compute_usage(
301
+ request=self,
302
+ model=model,
303
+ response_dict=response_dict,
304
+ prompt_character_count=self._prompt_character_count,
305
+ streaming_candidates_characters=self._candidates_character_count
306
+ )
307
+
297
308
  if log_prompt_and_response:
298
309
  self._ingest["provider_response_json"] = [json.dumps(response_dict)]
299
310
 
300
- return None
301
-
302
- def add_units(self, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
303
- if key not in self._ingest["units"]:
304
- self._ingest["units"][key] = {}
305
- if input is not None:
306
- self._ingest["units"][key]["input"] = input
307
- if output is not None:
308
- self._ingest["units"][key]["output"] = output
309
-
310
- def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
311
- usage = response_dict.get("usage_metadata", {})
312
- input = usage.get("prompt_token_count", 0)
313
-
314
- prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
315
- candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
316
-
317
- model: str = response_dict.get("model_version", "")
318
-
319
- # for character billing only
320
- large_context = "" if input < 128000 else "_large_context"
321
-
322
- if self._is_character_billing_model(model):
323
- for details in prompt_tokens_details:
324
- modality = details.get("modality", "")
325
- if not modality:
326
- continue
327
-
328
- modality_token_count = details.get("token_count", 0)
329
- if modality == "TEXT":
330
- input = self._prompt_character_count
331
- if input == 0:
332
- # back up calc if nothing was calculated from the prompt
333
- input = response_dict["usage_metadata"]["prompt_token_count"] * 4
334
-
335
- output = 0
336
- if streaming_candidates_characters is None:
337
- for candidate in response_dict.get("candidates", []):
338
- parts = candidate.get("content", {}).get("parts", [])
339
- for part in parts:
340
- output += count_chars_skip_spaces(part.get("text", ""))
341
-
342
- if output == 0:
343
- # back up calc if no parts
344
- output = response_dict["usage_metadata"]["candidates_token_count"] * 4
345
- else:
346
- output = streaming_candidates_characters
347
-
348
- self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
349
-
350
- elif modality == "IMAGE":
351
- num_images = math.ceil(modality_token_count / 258)
352
- self.add_units("vision"+large_context, input=num_images)
353
-
354
- elif modality == "VIDEO":
355
- video_seconds = math.ceil(modality_token_count / 285)
356
- self.add_units("video"+large_context, input=video_seconds)
357
-
358
- elif modality == "AUDIO":
359
- audio_seconds = math.ceil(modality_token_count / 25)
360
- self.add_units("audio"+large_context, input=audio_seconds)
361
-
362
- else:
363
- for details in prompt_tokens_details:
364
- modality = details.get("modality", "")
365
- if not modality:
366
- continue
367
-
368
- modality_token_count = details.get("token_count", 0)
369
- if modality == "IMAGE":
370
- self.add_units("vision", input=modality_token_count)
371
- elif modality in ("VIDEO", "AUDIO", "TEXT"):
372
- self.add_units(modality.lower(), input=modality_token_count)
373
- for details in candidates_tokens_details:
374
- modality = details.get("modality", "")
375
- if not modality:
376
- continue
377
-
378
- modality_token_count = details.get("token_count", 0)
379
- if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
380
- self.add_units(modality.lower(), output=modality_token_count)
381
-
382
- if not self._ingest["units"]:
383
- input = usage.get("prompt_token_count", 0)
384
- output = usage.get("candidates_token_count", 0) * 4
385
-
386
- if self._is_character_billing_model(model):
387
- if self._prompt_character_count > 0:
388
- input = self._prompt_character_count
389
- else:
390
- input *= 4
391
-
392
- # if no units were added, add a default unit and assume 4 characters per token
393
- self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
394
- else:
395
- # if no units were added, add a default unit
396
- self._ingest["units"]["text"] = Units(input=input, output=output)
311
+ return None
@@ -94,9 +94,10 @@ class _GoogleVertexRequest(_ProviderRequest):
94
94
  instrumentor=instrumentor,
95
95
  category=PayiCategories.google_vertex,
96
96
  streaming_type=_StreamingType.generator,
97
+ is_google_vertex_or_genai_client=True,
97
98
  )
98
99
  self._prompt_character_count = 0
99
- self._candiates_character_count = 0
100
+ self._candidates_character_count = 0
100
101
  self._model_name: Optional[str] = None
101
102
 
102
103
  @override
@@ -227,19 +228,21 @@ class _GoogleVertexRequest(_ProviderRequest):
227
228
  for candidate in response_dict.get("candidates", []):
228
229
  parts = candidate.get("content", {}).get("parts", [])
229
230
  for part in parts:
230
- self._candiates_character_count += count_chars_skip_spaces(part.get("text", ""))
231
+ self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
231
232
 
232
233
  usage = response_dict.get("usage_metadata", {})
233
234
  if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
234
- self._compute_usage(response_dict, streaming_candidates_characters=self._candiates_character_count)
235
+ vertex_compute_usage(
236
+ request=self,
237
+ model=self._get_model_name(response_dict),
238
+ response_dict=response_dict,
239
+ prompt_character_count=self._prompt_character_count,
240
+ streaming_candidates_characters=self._candidates_character_count,
241
+ )
235
242
  ingest = True
236
243
 
237
244
  return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
238
245
 
239
- @staticmethod
240
- def _is_character_billing_model(model: str) -> bool:
241
- return model.startswith("gemini-1.")
242
-
243
246
  @override
244
247
  def process_synchronous_response(
245
248
  self,
@@ -256,108 +259,139 @@ class _GoogleVertexRequest(_ProviderRequest):
256
259
  if model:
257
260
  self._ingest["resource"] = "google." + model
258
261
 
259
- self._compute_usage(response_dict)
262
+ vertex_compute_usage(
263
+ request=self,
264
+ model=model,
265
+ response_dict=response_dict,
266
+ prompt_character_count=self._prompt_character_count,
267
+ streaming_candidates_characters=self._candidates_character_count
268
+ )
260
269
 
261
270
  if log_prompt_and_response:
262
271
  self._ingest["provider_response_json"] = [json.dumps(response_dict)]
263
272
 
264
273
  return None
265
274
 
266
- def add_units(self, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
267
- if key not in self._ingest["units"]:
268
- self._ingest["units"][key] = {}
275
+ def vertex_compute_usage(
276
+ request: _ProviderRequest,
277
+ model: Optional[str],
278
+ response_dict: 'dict[str, Any]',
279
+ prompt_character_count: int = 0,
280
+ streaming_candidates_characters: Optional[int] = None) -> None:
281
+
282
+ def is_character_billing_model(model: str) -> bool:
283
+ return model.startswith("gemini-1.")
284
+
285
+ def is_large_context_token_model(model: str, input_tokens: int) -> bool:
286
+ return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
287
+
288
+ def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
289
+ if key not in request._ingest["units"]:
290
+ request._ingest["units"][key] = {}
269
291
  if input is not None:
270
- self._ingest["units"][key]["input"] = input
292
+ request._ingest["units"][key]["input"] = input
271
293
  if output is not None:
272
- self._ingest["units"][key]["output"] = output
273
-
274
- def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
275
- usage = response_dict.get("usage_metadata", {})
276
- input = usage.get("prompt_token_count", 0)
294
+ request._ingest["units"][key]["output"] = output
277
295
 
278
- prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
279
- candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
296
+ usage = response_dict.get("usage_metadata", {})
297
+ input = usage.get("prompt_token_count", 0)
280
298
 
281
- model: Optional[str] = self._get_model_name(response_dict)
282
- if not model:
283
- model = ""
284
-
285
- # for character billing only
286
- large_context = "" if input < 128000 else "_large_context"
287
-
288
- if self._is_character_billing_model(model):
289
- # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
290
- for details in prompt_tokens_details:
291
- modality = details.get("modality", "")
292
- if not modality:
293
- continue
294
-
295
- modality_token_count = details.get("token_count", 0)
296
- if modality == "TEXT":
297
- input = self._prompt_character_count
298
- if input == 0:
299
- # back up calc if nothing was calculated from the prompt
300
- input = response_dict["usage_metadata"]["prompt_token_count"] * 4
301
-
302
- output = 0
303
- if streaming_candidates_characters is None:
304
- for candidate in response_dict.get("candidates", []):
305
- parts = candidate.get("content", {}).get("parts", [])
306
- for part in parts:
307
- output += count_chars_skip_spaces(part.get("text", ""))
308
-
309
- if output == 0:
310
- # back up calc if no parts
311
- output = response_dict["usage_metadata"]["candidates_token_count"] * 4
312
- else:
313
- output = streaming_candidates_characters
314
-
315
- self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
316
-
317
- elif modality == "IMAGE":
318
- num_images = math.ceil(modality_token_count / 258)
319
- self.add_units("vision"+large_context, input=num_images)
320
-
321
- elif modality == "VIDEO":
322
- video_seconds = math.ceil(modality_token_count / 285)
323
- self.add_units("video"+large_context, input=video_seconds)
324
-
325
- elif modality == "AUDIO":
326
- audio_seconds = math.ceil(modality_token_count / 25)
327
- self.add_units("audio"+large_context, input=audio_seconds)
299
+ prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
300
+ candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
328
301
 
329
- else:
330
- for details in prompt_tokens_details:
331
- modality = details.get("modality", "")
332
- if not modality:
333
- continue
334
-
335
- modality_token_count = details.get("token_count", 0)
336
- if modality == "IMAGE":
337
- self.add_units("vision", input=modality_token_count)
338
- elif modality in ("VIDEO", "AUDIO", "TEXT"):
339
- self.add_units(modality.lower(), input=modality_token_count)
340
- for details in candidates_tokens_details:
341
- modality = details.get("modality", "")
342
- if not modality:
343
- continue
344
-
345
- modality_token_count = details.get("token_count", 0)
346
- if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
347
- self.add_units(modality.lower(), output=modality_token_count)
348
-
349
- if not self._ingest["units"]:
350
- input = usage.get("prompt_token_count", 0)
351
- output = usage.get("candidates_token_count", 0) * 4
352
-
353
- if self._is_character_billing_model(model):
354
- if self._prompt_character_count > 0:
355
- input = self._prompt_character_count
302
+ if not model:
303
+ model = ""
304
+
305
+ large_context = ""
306
+
307
+ if is_character_billing_model(model):
308
+ if input > 128000:
309
+ large_context = "_large_context"
310
+
311
+ # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
312
+ for details in prompt_tokens_details:
313
+ modality = details.get("modality", "")
314
+ if not modality:
315
+ continue
316
+
317
+ modality_token_count = details.get("token_count", 0)
318
+ if modality == "TEXT":
319
+ input = prompt_character_count
320
+ if input == 0:
321
+ # back up calc if nothing was calculated from the prompt
322
+ input = response_dict["usage_metadata"]["prompt_token_count"] * 4
323
+
324
+ output = 0
325
+ if streaming_candidates_characters is None:
326
+ for candidate in response_dict.get("candidates", []):
327
+ parts = candidate.get("content", {}).get("parts", [])
328
+ for part in parts:
329
+ output += count_chars_skip_spaces(part.get("text", ""))
330
+
331
+ if output == 0:
332
+ # back up calc if no parts
333
+ output = response_dict["usage_metadata"]["candidates_token_count"] * 4
356
334
  else:
357
- input *= 4
335
+ output = streaming_candidates_characters
358
336
 
359
- # if no units were added, add a default unit and assume 4 characters per token
360
- self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
337
+ request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
338
+
339
+ elif modality == "IMAGE":
340
+ num_images = math.ceil(modality_token_count / 258)
341
+ add_units(request, "vision"+large_context, input=num_images)
342
+
343
+ elif modality == "VIDEO":
344
+ video_seconds = math.ceil(modality_token_count / 285)
345
+ add_units(request, "video"+large_context, input=video_seconds)
346
+
347
+ elif modality == "AUDIO":
348
+ audio_seconds = math.ceil(modality_token_count / 25)
349
+ add_units(request, "audio"+large_context, input=audio_seconds)
350
+
351
+ # No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
352
+ # for details in candidates_tokens_details:
353
+
354
+ else:
355
+ # thinking tokens introduced in 2.5 after the transition to token based billing
356
+ thinking_token_count = usage.get("thoughts_token_count", 0)
357
+
358
+ if is_large_context_token_model(model, input):
359
+ large_context = "_large_context"
360
+
361
+ for details in prompt_tokens_details:
362
+ modality = details.get("modality", "")
363
+ if not modality:
364
+ continue
365
+
366
+ modality_token_count = details.get("token_count", 0)
367
+ if modality == "IMAGE":
368
+ add_units(request, "vision"+large_context, input=modality_token_count)
369
+ elif modality in ("VIDEO", "AUDIO", "TEXT"):
370
+ add_units(request, modality.lower()+large_context, input=modality_token_count)
371
+ for details in candidates_tokens_details:
372
+ modality = details.get("modality", "")
373
+ if not modality:
374
+ continue
375
+
376
+ modality_token_count = details.get("token_count", 0)
377
+ if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
378
+ add_units(request, modality.lower()+large_context, output=modality_token_count)
379
+
380
+ if thinking_token_count > 0:
381
+ add_units(request, "reasoning"+large_context, output=thinking_token_count)
382
+
383
+ if not request._ingest["units"]:
384
+ input = usage.get("prompt_token_count", 0)
385
+ output = usage.get("candidates_token_count", 0) * 4
386
+
387
+ if is_character_billing_model(model):
388
+ if prompt_character_count > 0:
389
+ input = prompt_character_count
361
390
  else:
362
- # if no units were added, add a default unit
363
- self._ingest["units"]["text"] = Units(input=input, output=output)
391
+ input *= 4
392
+
393
+ # if no units were added, add a default unit and assume 4 characters per token
394
+ request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
395
+ else:
396
+ # if no units were added, add a default unit
397
+ request._ingest["units"]["text"] = Units(input=input, output=output)
payi/lib/instrument.py CHANGED
@@ -35,12 +35,20 @@ class _ChunkResult:
35
35
  ingest: bool = False
36
36
 
37
37
  class _ProviderRequest:
38
- def __init__(self, instrumentor: '_PayiInstrumentor', category: str, streaming_type: '_StreamingType'):
38
+ def __init__(
39
+ self, instrumentor: '_PayiInstrumentor',
40
+ category: str,
41
+ streaming_type: '_StreamingType',
42
+ is_aws_client: Optional[bool] = None,
43
+ is_google_vertex_or_genai_client: Optional[bool] = None,
44
+ ) -> None:
39
45
  self._instrumentor: '_PayiInstrumentor' = instrumentor
40
46
  self._estimated_prompt_tokens: Optional[int] = None
41
47
  self._category: str = category
42
48
  self._ingest: IngestUnitsParams = { "category": category, "units": {} } # type: ignore
43
49
  self._streaming_type: '_StreamingType' = streaming_type
50
+ self._is_aws_client: Optional[bool] = is_aws_client
51
+ self._is_google_vertex_or_genai_client: Optional[bool] = is_google_vertex_or_genai_client
44
52
 
45
53
  def process_chunk(self, _chunk: Any) -> _ChunkResult:
46
54
  return _ChunkResult(send_chunk_to_caller=True)
@@ -55,16 +63,25 @@ class _ProviderRequest:
55
63
  def process_request_prompt(self, prompt: 'dict[str, Any]', args: Sequence[Any], kwargs: 'dict[str, Any]') -> None:
56
64
  ...
57
65
 
58
- def is_bedrock(self) -> bool:
59
- return self._category == PayiCategories.aws_bedrock
66
+ def process_initial_stream_response(self, response: Any) -> None:
67
+ pass
68
+
69
+ @property
70
+ def is_aws_client(self) -> bool:
71
+ return self._is_aws_client if self._is_aws_client is not None else False
72
+
73
+ @property
74
+ def is_google_vertex_or_genai_client(self) -> bool:
75
+ return self._is_google_vertex_or_genai_client if self._is_google_vertex_or_genai_client is not None else False
60
76
 
61
- def is_vertex(self) -> bool:
62
- return self._category == PayiCategories.google_vertex
63
-
64
77
  def process_exception(self, exception: Exception, kwargs: Any, ) -> bool: # noqa: ARG002
65
78
  self.exception_to_semantic_failure(exception)
66
79
  return True
67
80
 
81
+ @property
82
+ def supports_extra_headers(self) -> bool:
83
+ return not self.is_aws_client and not self.is_google_vertex_or_genai_client
84
+
68
85
  @property
69
86
  def streaming_type(self) -> '_StreamingType':
70
87
  return self._streaming_type
@@ -277,6 +294,22 @@ class _PayiInstrumentor:
277
294
  except Exception as e:
278
295
  self._logger.error(f"Error instrumenting Google GenAi: {e}")
279
296
 
297
+ @staticmethod
298
+ def _create_logged_ingest_units(
299
+ ingest_units: IngestUnitsParams,
300
+ ) -> IngestUnitsParams:
301
+ # remove large and potentially sensitive data from the log
302
+ log_ingest_units: IngestUnitsParams = ingest_units.copy()
303
+
304
+ log_ingest_units.pop('provider_request_json', None)
305
+ log_ingest_units.pop('provider_response_json', None)
306
+
307
+ # Pop system.stack_trace from properties if it exists
308
+ if 'properties' in log_ingest_units and isinstance(log_ingest_units['properties'], dict):
309
+ log_ingest_units['properties'].pop('system.stack_trace', None)
310
+
311
+ return log_ingest_units
312
+
280
313
  def _process_ingest_units(self, ingest_units: IngestUnitsParams, log_data: 'dict[str, str]') -> bool:
281
314
  if int(ingest_units.get("http_status_code") or 0) < 400:
282
315
  units = ingest_units.get("units", {})
@@ -327,6 +360,9 @@ class _PayiInstrumentor:
327
360
  return None
328
361
 
329
362
  try:
363
+ if self._logger.isEnabledFor(logging.DEBUG):
364
+ self._logger.debug(f"_aingest_units: sending ({self._create_logged_ingest_units(ingest_units)})")
365
+
330
366
  if self._apayi:
331
367
  ingest_response = await self._apayi.ingest.units(**ingest_units)
332
368
  elif self._payi:
@@ -399,6 +435,9 @@ class _PayiInstrumentor:
399
435
 
400
436
  try:
401
437
  if self._payi:
438
+ if self._logger.isEnabledFor(logging.DEBUG):
439
+ self._logger.debug(f"_ingest_units: sending ({self._create_logged_ingest_units(ingest_units)})")
440
+
402
441
  ingest_response = self._payi.ingest.units(**ingest_units)
403
442
  self._logger.debug(f"_ingest_units: success ({ingest_response})")
404
443
 
@@ -806,8 +845,7 @@ class _PayiInstrumentor:
806
845
  context = self.get_context()
807
846
 
808
847
  if not context:
809
- if request.is_bedrock():
810
- # boto3 doesn't allow extra_headers
848
+ if not request.supports_extra_headers:
811
849
  kwargs.pop("extra_headers", None)
812
850
 
813
851
  self._logger.debug(f"invoke_wrapper: no instrumentation context, exit early")
@@ -822,8 +860,7 @@ class _PayiInstrumentor:
822
860
  self._update_extra_headers(context, extra_headers)
823
861
 
824
862
  if context.get("proxy", self._proxy_default):
825
- if request.is_bedrock():
826
- # boto3 doesn't allow extra_headers
863
+ if not request.supports_extra_headers:
827
864
  kwargs.pop("extra_headers", None)
828
865
  elif "extra_headers" not in kwargs and extra_headers:
829
866
  # assumes anthropic and openai clients
@@ -899,7 +936,7 @@ class _PayiInstrumentor:
899
936
  request=request,
900
937
  )
901
938
 
902
- if request.is_bedrock():
939
+ if request.is_aws_client:
903
940
  if "body" in response:
904
941
  response["body"] = stream_result
905
942
  else:
@@ -1084,9 +1121,10 @@ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
1084
1121
 
1085
1122
  instrumentor._logger.debug(f"StreamIteratorWrapper: instance {instance}, category {request._category}")
1086
1123
 
1124
+ request.process_initial_stream_response(response)
1125
+
1087
1126
  bedrock_from_stream: bool = False
1088
- if request.is_bedrock():
1089
- request._ingest["provider_response_id"] = response["ResponseMetadata"]["RequestId"]
1127
+ if request.is_aws_client:
1090
1128
  stream = response.get("stream", None)
1091
1129
 
1092
1130
  if stream:
@@ -1108,7 +1146,6 @@ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
1108
1146
  self._request: _ProviderRequest = request
1109
1147
 
1110
1148
  self._first_token: bool = True
1111
- self._is_bedrock: bool = request.is_bedrock()
1112
1149
  self._bedrock_from_stream: bool = bedrock_from_stream
1113
1150
  self._ingested: bool = False
1114
1151
  self._iter_started: bool = False
@@ -1131,7 +1168,7 @@ class _StreamIteratorWrapper(ObjectProxy): # type: ignore
1131
1168
 
1132
1169
  def __iter__(self) -> Any:
1133
1170
  self._iter_started = True
1134
- if self._is_bedrock:
1171
+ if self._request.is_aws_client:
1135
1172
  # MUST reside in a separate function so that the yield statement (e.g. the generator) doesn't implicitly return its own iterator and overriding self
1136
1173
  self._instrumentor._logger.debug(f"StreamIteratorWrapper: bedrock __iter__")
1137
1174
  return self._iter_bedrock()
@@ -16,6 +16,14 @@ from .categories import (
16
16
  CategoriesResourceWithStreamingResponse,
17
17
  AsyncCategoriesResourceWithStreamingResponse,
18
18
  )
19
+ from .fixed_cost_resources import (
20
+ FixedCostResourcesResource,
21
+ AsyncFixedCostResourcesResource,
22
+ FixedCostResourcesResourceWithRawResponse,
23
+ AsyncFixedCostResourcesResourceWithRawResponse,
24
+ FixedCostResourcesResourceWithStreamingResponse,
25
+ AsyncFixedCostResourcesResourceWithStreamingResponse,
26
+ )
19
27
 
20
28
  __all__ = [
21
29
  "ResourcesResource",
@@ -24,6 +32,12 @@ __all__ = [
24
32
  "AsyncResourcesResourceWithRawResponse",
25
33
  "ResourcesResourceWithStreamingResponse",
26
34
  "AsyncResourcesResourceWithStreamingResponse",
35
+ "FixedCostResourcesResource",
36
+ "AsyncFixedCostResourcesResource",
37
+ "FixedCostResourcesResourceWithRawResponse",
38
+ "AsyncFixedCostResourcesResourceWithRawResponse",
39
+ "FixedCostResourcesResourceWithStreamingResponse",
40
+ "AsyncFixedCostResourcesResourceWithStreamingResponse",
27
41
  "CategoriesResource",
28
42
  "AsyncCategoriesResource",
29
43
  "CategoriesResourceWithRawResponse",