payi 0.1.0a110__py3-none-any.whl → 0.1.0a111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of payi might be problematic. Click here for more details.
- payi/_version.py +1 -1
- payi/lib/AnthropicInstrumentor.py +67 -20
- payi/lib/BedrockInstrumentor.py +35 -1
- payi/lib/VertexRequest.py +3 -1
- payi/lib/data/cohere_embed_english_v3.json +30706 -0
- payi/lib/instrument.py +4 -2
- {payi-0.1.0a110.dist-info → payi-0.1.0a111.dist-info}/METADATA +2 -1
- {payi-0.1.0a110.dist-info → payi-0.1.0a111.dist-info}/RECORD +10 -9
- {payi-0.1.0a110.dist-info → payi-0.1.0a111.dist-info}/WHEEL +0 -0
- {payi-0.1.0a110.dist-info → payi-0.1.0a111.dist-info}/licenses/LICENSE +0 -0
payi/_version.py
CHANGED
|
@@ -45,6 +45,18 @@ class AnthropicInstrumentor:
|
|
|
45
45
|
stream_messages_wrapper(instrumentor),
|
|
46
46
|
)
|
|
47
47
|
|
|
48
|
+
wrap_function_wrapper(
|
|
49
|
+
"anthropic.resources.beta.messages",
|
|
50
|
+
"Messages.create",
|
|
51
|
+
messages_wrapper(instrumentor),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
wrap_function_wrapper(
|
|
55
|
+
"anthropic.resources.beta.messages",
|
|
56
|
+
"Messages.stream",
|
|
57
|
+
stream_messages_wrapper(instrumentor),
|
|
58
|
+
)
|
|
59
|
+
|
|
48
60
|
wrap_function_wrapper(
|
|
49
61
|
"anthropic.resources.messages",
|
|
50
62
|
"AsyncMessages.create",
|
|
@@ -57,6 +69,18 @@ class AnthropicInstrumentor:
|
|
|
57
69
|
astream_messages_wrapper(instrumentor),
|
|
58
70
|
)
|
|
59
71
|
|
|
72
|
+
wrap_function_wrapper(
|
|
73
|
+
"anthropic.resources.beta.messages",
|
|
74
|
+
"AsyncMessages.create",
|
|
75
|
+
amessages_wrapper(instrumentor),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
wrap_function_wrapper(
|
|
79
|
+
"anthropic.resources.beta.messages",
|
|
80
|
+
"AsyncMessages.stream",
|
|
81
|
+
astream_messages_wrapper(instrumentor),
|
|
82
|
+
)
|
|
83
|
+
|
|
60
84
|
except Exception as e:
|
|
61
85
|
instrumentor._logger.debug(f"Error instrumenting anthropic: {e}")
|
|
62
86
|
return
|
|
@@ -220,23 +244,52 @@ class _AnthropicProviderRequest(_ProviderRequest):
|
|
|
220
244
|
|
|
221
245
|
return True
|
|
222
246
|
|
|
223
|
-
def
|
|
224
|
-
usage = response['usage']
|
|
247
|
+
def anthropic_process_compute_input_cost(request: _ProviderRequest, usage: 'dict[str, Any]') -> int:
|
|
225
248
|
input = usage['input_tokens']
|
|
226
|
-
output = usage['output_tokens']
|
|
227
249
|
units: dict[str, Units] = request._ingest["units"]
|
|
228
250
|
|
|
229
251
|
cache_creation_input_tokens = usage.get("cache_creation_input_tokens", 0)
|
|
230
|
-
|
|
231
|
-
|
|
252
|
+
cache_read_input_tokens = usage.get("cache_read_input_tokens", 0)
|
|
253
|
+
|
|
254
|
+
total_input_tokens = input + cache_creation_input_tokens + cache_read_input_tokens
|
|
255
|
+
|
|
256
|
+
request._is_large_context = total_input_tokens > 200000
|
|
257
|
+
large_context = "_large_context" if request._is_large_context else ""
|
|
258
|
+
|
|
259
|
+
cache_creation: dict[str, int] = usage.get("cache_creation", {})
|
|
260
|
+
ephemeral_5m_input_tokens: Optional[int] = None
|
|
261
|
+
ephemeral_1h_input_tokens: Optional[int] = None
|
|
262
|
+
textCacheWriteAdded = False
|
|
263
|
+
|
|
264
|
+
if cache_creation:
|
|
265
|
+
ephemeral_5m_input_tokens = cache_creation.get("ephemeral_5m_input_tokens", 0)
|
|
266
|
+
if ephemeral_5m_input_tokens > 0:
|
|
267
|
+
textCacheWriteAdded = True
|
|
268
|
+
units["text_cache_write"+large_context] = Units(input=ephemeral_5m_input_tokens, output=0)
|
|
269
|
+
|
|
270
|
+
ephemeral_1h_input_tokens = cache_creation.get("ephemeral_1h_input_tokens", 0)
|
|
271
|
+
if ephemeral_1h_input_tokens > 0:
|
|
272
|
+
textCacheWriteAdded = True
|
|
273
|
+
units["text_cache_write_1h"+large_context] = Units(input=ephemeral_1h_input_tokens, output=0)
|
|
274
|
+
|
|
275
|
+
if textCacheWriteAdded is False and cache_creation_input_tokens > 0:
|
|
276
|
+
units["text_cache_write"+large_context] = Units(input=cache_creation_input_tokens, output=0)
|
|
232
277
|
|
|
233
278
|
cache_read_input_tokens = usage.get("cache_read_input_tokens", 0)
|
|
234
279
|
if cache_read_input_tokens > 0:
|
|
235
|
-
units["text_cache_read"] = Units(input=cache_read_input_tokens, output=0)
|
|
280
|
+
units["text_cache_read"+large_context] = Units(input=cache_read_input_tokens, output=0)
|
|
236
281
|
|
|
237
|
-
|
|
282
|
+
return _PayiInstrumentor.update_for_vision(input, units, request._estimated_prompt_tokens, is_large_context=request._is_large_context)
|
|
238
283
|
|
|
239
|
-
|
|
284
|
+
def anthropic_process_synchronous_response(request: _ProviderRequest, response: 'dict[str, Any]', log_prompt_and_response: bool, assign_id: bool) -> Any:
|
|
285
|
+
usage = response['usage']
|
|
286
|
+
units: dict[str, Units] = request._ingest["units"]
|
|
287
|
+
|
|
288
|
+
input_tokens = anthropic_process_compute_input_cost(request, usage)
|
|
289
|
+
output = usage['output_tokens']
|
|
290
|
+
|
|
291
|
+
large_context = "_large_context" if request._is_large_context else ""
|
|
292
|
+
units["text"+large_context] = Units(input=input_tokens, output=output)
|
|
240
293
|
|
|
241
294
|
content = response.get('content', [])
|
|
242
295
|
if content:
|
|
@@ -277,31 +330,25 @@ def anthropic_process_chunk(request: _ProviderRequest, chunk: 'dict[str, Any]',
|
|
|
277
330
|
usage = message['usage']
|
|
278
331
|
units = request._ingest["units"]
|
|
279
332
|
|
|
280
|
-
input =
|
|
281
|
-
|
|
282
|
-
units["text"] = Units(input=input, output=0)
|
|
283
|
-
|
|
284
|
-
text_cache_write: int = usage.get("cache_creation_input_tokens", 0)
|
|
285
|
-
if text_cache_write > 0:
|
|
286
|
-
units["text_cache_write"] = Units(input=text_cache_write, output=0)
|
|
333
|
+
input = anthropic_process_compute_input_cost(request, usage)
|
|
287
334
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
335
|
+
large_context = "_large_context" if request._is_large_context else ""
|
|
336
|
+
units["text"+large_context] = Units(input=input, output=0)
|
|
291
337
|
|
|
292
338
|
request._instrumentor._logger.debug(f"Anthropic streaming captured {input} input tokens, ")
|
|
293
339
|
|
|
294
340
|
elif type == "message_delta":
|
|
295
341
|
usage = chunk.get('usage', {})
|
|
296
342
|
ingest = True
|
|
343
|
+
large_context = "_large_context" if request._is_large_context else ""
|
|
297
344
|
|
|
298
345
|
# Web search will return an updated input tokens value at the end of streaming
|
|
299
346
|
input_tokens = usage.get('input_tokens', None)
|
|
300
347
|
if input_tokens is not None:
|
|
301
348
|
request._instrumentor._logger.debug(f"Anthropic streaming finished, updated input tokens: {input_tokens}")
|
|
302
|
-
request._ingest["units"]["text"]["input"] = input_tokens
|
|
349
|
+
request._ingest["units"]["text"+large_context]["input"] = input_tokens
|
|
303
350
|
|
|
304
|
-
request._ingest["units"]["text"]["output"] = usage.get('output_tokens', 0)
|
|
351
|
+
request._ingest["units"]["text"+large_context]["output"] = usage.get('output_tokens', 0)
|
|
305
352
|
|
|
306
353
|
request._instrumentor._logger.debug(f"Anthropic streaming finished: output tokens {usage.get('output_tokens', 0)} ")
|
|
307
354
|
|
payi/lib/BedrockInstrumentor.py
CHANGED
|
@@ -5,6 +5,7 @@ from functools import wraps
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
7
|
from wrapt import ObjectProxy, wrap_function_wrapper # type: ignore
|
|
8
|
+
from tokenizers import Tokenizer # type: ignore
|
|
8
9
|
|
|
9
10
|
from payi.lib.helpers import PayiCategories, PayiHeaderNames, payi_aws_bedrock_url
|
|
10
11
|
from payi.types.ingest_units_params import Units
|
|
@@ -102,6 +103,8 @@ def _redirect_to_payi(request: Any, event_name: str, **_: 'dict[str, Any]') -> N
|
|
|
102
103
|
|
|
103
104
|
|
|
104
105
|
class InvokeResponseWrapper(ObjectProxy): # type: ignore
|
|
106
|
+
_cohere_embed_english_v3_tokenizer: Optional[Tokenizer] = None
|
|
107
|
+
|
|
105
108
|
def __init__(
|
|
106
109
|
self,
|
|
107
110
|
response: Any,
|
|
@@ -160,6 +163,25 @@ class InvokeResponseWrapper(ObjectProxy): # type: ignore
|
|
|
160
163
|
|
|
161
164
|
bedrock_converse_process_synchronous_function_call(self._request, response)
|
|
162
165
|
|
|
166
|
+
elif self._request._is_amazon_titan_embed_text_v1:
|
|
167
|
+
input = response.get('inputTextTokenCount', 0)
|
|
168
|
+
units["text"] = Units(input=input, output=0)
|
|
169
|
+
|
|
170
|
+
elif self._request._is_cohere_embed_english_v3:
|
|
171
|
+
texts: list[str] = response.get("texts", [])
|
|
172
|
+
if texts and len(texts) > 0:
|
|
173
|
+
text = " ".join(texts)
|
|
174
|
+
|
|
175
|
+
if self._cohere_embed_english_v3_tokenizer is None:
|
|
176
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
177
|
+
tokenizer_path = os.path.join(current_dir, "data", "cohere_embed_english_v3.json")
|
|
178
|
+
self._cohere_embed_english_v3_tokenizer = Tokenizer.from_file(tokenizer_path) # type: ignore
|
|
179
|
+
|
|
180
|
+
tokens: list = self._cohere_embed_english_v3_tokenizer.encode(text, add_special_tokens=False).tokens # type: ignore
|
|
181
|
+
|
|
182
|
+
if tokens and isinstance(tokens, list):
|
|
183
|
+
units["text"] = Units(input=len(tokens), output=0) # type: ignore
|
|
184
|
+
|
|
163
185
|
if self._log_prompt_and_response:
|
|
164
186
|
ingest["provider_response_json"] = data.decode('utf-8') # type: ignore
|
|
165
187
|
|
|
@@ -287,6 +309,8 @@ class _BedrockInvokeProviderRequest(_BedrockProviderRequest):
|
|
|
287
309
|
self._is_anthropic: bool = 'anthropic' in model_id
|
|
288
310
|
self._is_nova: bool = 'nova' in model_id
|
|
289
311
|
self._is_meta: bool = 'meta' in model_id
|
|
312
|
+
self._is_amazon_titan_embed_text_v1: bool = 'amazon.titan-embed-text-v1' == model_id
|
|
313
|
+
self._is_cohere_embed_english_v3: bool = 'cohere.embed-english-v3' == model_id
|
|
290
314
|
|
|
291
315
|
@override
|
|
292
316
|
def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
|
|
@@ -302,7 +326,17 @@ class _BedrockInvokeProviderRequest(_BedrockProviderRequest):
|
|
|
302
326
|
anthropic_has_image_and_get_texts(self, messages)
|
|
303
327
|
except Exception as e:
|
|
304
328
|
self._instrumentor._logger.debug(f"Bedrock invoke error processing request body: {e}")
|
|
305
|
-
|
|
329
|
+
elif self._is_cohere_embed_english_v3:
|
|
330
|
+
try:
|
|
331
|
+
body = json.loads( kwargs.get("body", ""))
|
|
332
|
+
input_type = body.get("input_type", "")
|
|
333
|
+
if input_type == 'image':
|
|
334
|
+
images = body.get("images", [])
|
|
335
|
+
if (len(images) > 0):
|
|
336
|
+
# only supports one image according to docs
|
|
337
|
+
self._ingest["units"]["vision"] = Units(input=1, output=0)
|
|
338
|
+
except Exception as e:
|
|
339
|
+
self._instrumentor._logger.debug(f"Bedrock invoke error processing request body: {e}")
|
|
306
340
|
return True
|
|
307
341
|
|
|
308
342
|
@override
|
payi/lib/VertexRequest.py
CHANGED
|
@@ -148,7 +148,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
|
|
|
148
148
|
return model.startswith("gemini-1.")
|
|
149
149
|
|
|
150
150
|
def is_large_context_token_model(model: str, input_tokens: int) -> bool:
|
|
151
|
-
return model.startswith("gemini-2.5-pro") and input_tokens >
|
|
151
|
+
return model.startswith("gemini-2.5-pro") and input_tokens > 200000
|
|
152
152
|
|
|
153
153
|
def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
|
|
154
154
|
if key not in request._ingest["units"]:
|
|
@@ -172,6 +172,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
|
|
|
172
172
|
|
|
173
173
|
if is_character_billing_model(model):
|
|
174
174
|
if input > 128000:
|
|
175
|
+
self._is_large_context = True
|
|
175
176
|
large_context = "_large_context"
|
|
176
177
|
|
|
177
178
|
# gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
|
|
@@ -222,6 +223,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
|
|
|
222
223
|
thinking_token_count = usage.get("thoughts_token_count", 0)
|
|
223
224
|
|
|
224
225
|
if is_large_context_token_model(model, input):
|
|
226
|
+
self._is_large_context = True
|
|
225
227
|
large_context = "_large_context"
|
|
226
228
|
|
|
227
229
|
cache_details: dict[str, int] = {}
|