payi 0.1.0a109__py3-none-any.whl → 0.1.0a111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of payi might be problematic. Click here for more details.

payi/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "payi"
4
- __version__ = "0.1.0-alpha.109" # x-release-please-version
4
+ __version__ = "0.1.0-alpha.111" # x-release-please-version
@@ -45,6 +45,18 @@ class AnthropicInstrumentor:
45
45
  stream_messages_wrapper(instrumentor),
46
46
  )
47
47
 
48
+ wrap_function_wrapper(
49
+ "anthropic.resources.beta.messages",
50
+ "Messages.create",
51
+ messages_wrapper(instrumentor),
52
+ )
53
+
54
+ wrap_function_wrapper(
55
+ "anthropic.resources.beta.messages",
56
+ "Messages.stream",
57
+ stream_messages_wrapper(instrumentor),
58
+ )
59
+
48
60
  wrap_function_wrapper(
49
61
  "anthropic.resources.messages",
50
62
  "AsyncMessages.create",
@@ -57,6 +69,18 @@ class AnthropicInstrumentor:
57
69
  astream_messages_wrapper(instrumentor),
58
70
  )
59
71
 
72
+ wrap_function_wrapper(
73
+ "anthropic.resources.beta.messages",
74
+ "AsyncMessages.create",
75
+ amessages_wrapper(instrumentor),
76
+ )
77
+
78
+ wrap_function_wrapper(
79
+ "anthropic.resources.beta.messages",
80
+ "AsyncMessages.stream",
81
+ astream_messages_wrapper(instrumentor),
82
+ )
83
+
60
84
  except Exception as e:
61
85
  instrumentor._logger.debug(f"Error instrumenting anthropic: {e}")
62
86
  return
@@ -220,23 +244,52 @@ class _AnthropicProviderRequest(_ProviderRequest):
220
244
 
221
245
  return True
222
246
 
223
- def anthropic_process_synchronous_response(request: _ProviderRequest, response: 'dict[str, Any]', log_prompt_and_response: bool, assign_id: bool) -> Any:
224
- usage = response['usage']
247
+ def anthropic_process_compute_input_cost(request: _ProviderRequest, usage: 'dict[str, Any]') -> int:
225
248
  input = usage['input_tokens']
226
- output = usage['output_tokens']
227
249
  units: dict[str, Units] = request._ingest["units"]
228
250
 
229
251
  cache_creation_input_tokens = usage.get("cache_creation_input_tokens", 0)
230
- if cache_creation_input_tokens > 0:
231
- units["text_cache_write"] = Units(input=cache_creation_input_tokens, output=0)
252
+ cache_read_input_tokens = usage.get("cache_read_input_tokens", 0)
253
+
254
+ total_input_tokens = input + cache_creation_input_tokens + cache_read_input_tokens
255
+
256
+ request._is_large_context = total_input_tokens > 200000
257
+ large_context = "_large_context" if request._is_large_context else ""
258
+
259
+ cache_creation: dict[str, int] = usage.get("cache_creation", {})
260
+ ephemeral_5m_input_tokens: Optional[int] = None
261
+ ephemeral_1h_input_tokens: Optional[int] = None
262
+ textCacheWriteAdded = False
263
+
264
+ if cache_creation:
265
+ ephemeral_5m_input_tokens = cache_creation.get("ephemeral_5m_input_tokens", 0)
266
+ if ephemeral_5m_input_tokens > 0:
267
+ textCacheWriteAdded = True
268
+ units["text_cache_write"+large_context] = Units(input=ephemeral_5m_input_tokens, output=0)
269
+
270
+ ephemeral_1h_input_tokens = cache_creation.get("ephemeral_1h_input_tokens", 0)
271
+ if ephemeral_1h_input_tokens > 0:
272
+ textCacheWriteAdded = True
273
+ units["text_cache_write_1h"+large_context] = Units(input=ephemeral_1h_input_tokens, output=0)
274
+
275
+ if textCacheWriteAdded is False and cache_creation_input_tokens > 0:
276
+ units["text_cache_write"+large_context] = Units(input=cache_creation_input_tokens, output=0)
232
277
 
233
278
  cache_read_input_tokens = usage.get("cache_read_input_tokens", 0)
234
279
  if cache_read_input_tokens > 0:
235
- units["text_cache_read"] = Units(input=cache_read_input_tokens, output=0)
280
+ units["text_cache_read"+large_context] = Units(input=cache_read_input_tokens, output=0)
236
281
 
237
- input = _PayiInstrumentor.update_for_vision(input, units, request._estimated_prompt_tokens)
282
+ return _PayiInstrumentor.update_for_vision(input, units, request._estimated_prompt_tokens, is_large_context=request._is_large_context)
238
283
 
239
- units["text"] = Units(input=input, output=output)
284
+ def anthropic_process_synchronous_response(request: _ProviderRequest, response: 'dict[str, Any]', log_prompt_and_response: bool, assign_id: bool) -> Any:
285
+ usage = response['usage']
286
+ units: dict[str, Units] = request._ingest["units"]
287
+
288
+ input_tokens = anthropic_process_compute_input_cost(request, usage)
289
+ output = usage['output_tokens']
290
+
291
+ large_context = "_large_context" if request._is_large_context else ""
292
+ units["text"+large_context] = Units(input=input_tokens, output=output)
240
293
 
241
294
  content = response.get('content', [])
242
295
  if content:
@@ -277,31 +330,25 @@ def anthropic_process_chunk(request: _ProviderRequest, chunk: 'dict[str, Any]',
277
330
  usage = message['usage']
278
331
  units = request._ingest["units"]
279
332
 
280
- input = _PayiInstrumentor.update_for_vision(usage['input_tokens'], units, request._estimated_prompt_tokens)
281
-
282
- units["text"] = Units(input=input, output=0)
283
-
284
- text_cache_write: int = usage.get("cache_creation_input_tokens", 0)
285
- if text_cache_write > 0:
286
- units["text_cache_write"] = Units(input=text_cache_write, output=0)
333
+ input = anthropic_process_compute_input_cost(request, usage)
287
334
 
288
- text_cache_read: int = usage.get("cache_read_input_tokens", 0)
289
- if text_cache_read > 0:
290
- units["text_cache_read"] = Units(input=text_cache_read, output=0)
335
+ large_context = "_large_context" if request._is_large_context else ""
336
+ units["text"+large_context] = Units(input=input, output=0)
291
337
 
292
338
  request._instrumentor._logger.debug(f"Anthropic streaming captured {input} input tokens, ")
293
339
 
294
340
  elif type == "message_delta":
295
341
  usage = chunk.get('usage', {})
296
342
  ingest = True
343
+ large_context = "_large_context" if request._is_large_context else ""
297
344
 
298
345
  # Web search will return an updated input tokens value at the end of streaming
299
346
  input_tokens = usage.get('input_tokens', None)
300
347
  if input_tokens is not None:
301
348
  request._instrumentor._logger.debug(f"Anthropic streaming finished, updated input tokens: {input_tokens}")
302
- request._ingest["units"]["text"]["input"] = input_tokens
349
+ request._ingest["units"]["text"+large_context]["input"] = input_tokens
303
350
 
304
- request._ingest["units"]["text"]["output"] = usage.get('output_tokens', 0)
351
+ request._ingest["units"]["text"+large_context]["output"] = usage.get('output_tokens', 0)
305
352
 
306
353
  request._instrumentor._logger.debug(f"Anthropic streaming finished: output tokens {usage.get('output_tokens', 0)} ")
307
354
 
@@ -5,6 +5,7 @@ from functools import wraps
5
5
  from typing_extensions import override
6
6
 
7
7
  from wrapt import ObjectProxy, wrap_function_wrapper # type: ignore
8
+ from tokenizers import Tokenizer # type: ignore
8
9
 
9
10
  from payi.lib.helpers import PayiCategories, PayiHeaderNames, payi_aws_bedrock_url
10
11
  from payi.types.ingest_units_params import Units
@@ -102,6 +103,8 @@ def _redirect_to_payi(request: Any, event_name: str, **_: 'dict[str, Any]') -> N
102
103
 
103
104
 
104
105
  class InvokeResponseWrapper(ObjectProxy): # type: ignore
106
+ _cohere_embed_english_v3_tokenizer: Optional[Tokenizer] = None
107
+
105
108
  def __init__(
106
109
  self,
107
110
  response: Any,
@@ -160,6 +163,25 @@ class InvokeResponseWrapper(ObjectProxy): # type: ignore
160
163
 
161
164
  bedrock_converse_process_synchronous_function_call(self._request, response)
162
165
 
166
+ elif self._request._is_amazon_titan_embed_text_v1:
167
+ input = response.get('inputTextTokenCount', 0)
168
+ units["text"] = Units(input=input, output=0)
169
+
170
+ elif self._request._is_cohere_embed_english_v3:
171
+ texts: list[str] = response.get("texts", [])
172
+ if texts and len(texts) > 0:
173
+ text = " ".join(texts)
174
+
175
+ if self._cohere_embed_english_v3_tokenizer is None:
176
+ current_dir = os.path.dirname(os.path.abspath(__file__))
177
+ tokenizer_path = os.path.join(current_dir, "data", "cohere_embed_english_v3.json")
178
+ self._cohere_embed_english_v3_tokenizer = Tokenizer.from_file(tokenizer_path) # type: ignore
179
+
180
+ tokens: list = self._cohere_embed_english_v3_tokenizer.encode(text, add_special_tokens=False).tokens # type: ignore
181
+
182
+ if tokens and isinstance(tokens, list):
183
+ units["text"] = Units(input=len(tokens), output=0) # type: ignore
184
+
163
185
  if self._log_prompt_and_response:
164
186
  ingest["provider_response_json"] = data.decode('utf-8') # type: ignore
165
187
 
@@ -287,6 +309,8 @@ class _BedrockInvokeProviderRequest(_BedrockProviderRequest):
287
309
  self._is_anthropic: bool = 'anthropic' in model_id
288
310
  self._is_nova: bool = 'nova' in model_id
289
311
  self._is_meta: bool = 'meta' in model_id
312
+ self._is_amazon_titan_embed_text_v1: bool = 'amazon.titan-embed-text-v1' == model_id
313
+ self._is_cohere_embed_english_v3: bool = 'cohere.embed-english-v3' == model_id
290
314
 
291
315
  @override
292
316
  def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
@@ -302,7 +326,17 @@ class _BedrockInvokeProviderRequest(_BedrockProviderRequest):
302
326
  anthropic_has_image_and_get_texts(self, messages)
303
327
  except Exception as e:
304
328
  self._instrumentor._logger.debug(f"Bedrock invoke error processing request body: {e}")
305
-
329
+ elif self._is_cohere_embed_english_v3:
330
+ try:
331
+ body = json.loads( kwargs.get("body", ""))
332
+ input_type = body.get("input_type", "")
333
+ if input_type == 'image':
334
+ images = body.get("images", [])
335
+ if (len(images) > 0):
336
+ # only supports one image according to docs
337
+ self._ingest["units"]["vision"] = Units(input=1, output=0)
338
+ except Exception as e:
339
+ self._instrumentor._logger.debug(f"Bedrock invoke error processing request body: {e}")
306
340
  return True
307
341
 
308
342
  @override
payi/lib/VertexRequest.py CHANGED
@@ -148,7 +148,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
148
148
  return model.startswith("gemini-1.")
149
149
 
150
150
  def is_large_context_token_model(model: str, input_tokens: int) -> bool:
151
- return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
151
+ return model.startswith("gemini-2.5-pro") and input_tokens > 200000
152
152
 
153
153
  def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
154
154
  if key not in request._ingest["units"]:
@@ -172,6 +172,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
172
172
 
173
173
  if is_character_billing_model(model):
174
174
  if input > 128000:
175
+ self._is_large_context = True
175
176
  large_context = "_large_context"
176
177
 
177
178
  # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
@@ -222,6 +223,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
222
223
  thinking_token_count = usage.get("thoughts_token_count", 0)
223
224
 
224
225
  if is_large_context_token_model(model, input):
226
+ self._is_large_context = True
225
227
  large_context = "_large_context"
226
228
 
227
229
  cache_details: dict[str, int] = {}