payi 0.1.0a64__py3-none-any.whl → 0.1.0a66__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of payi might be problematic. Click here for more details.
- payi/_version.py +1 -1
- payi/lib/AnthropicInstrumentor.py +60 -60
- payi/lib/BedrockInstrumentor.py +102 -92
- payi/lib/OpenAIInstrumentor.py +90 -58
- payi/lib/helpers.py +3 -0
- payi/lib/instrument.py +83 -79
- {payi-0.1.0a64.dist-info → payi-0.1.0a66.dist-info}/METADATA +1 -1
- {payi-0.1.0a64.dist-info → payi-0.1.0a66.dist-info}/RECORD +10 -10
- {payi-0.1.0a64.dist-info → payi-0.1.0a66.dist-info}/WHEEL +0 -0
- {payi-0.1.0a64.dist-info → payi-0.1.0a66.dist-info}/licenses/LICENSE +0 -0
payi/_version.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Any, Union
|
|
3
|
+
from typing_extensions import override
|
|
3
4
|
|
|
4
5
|
import tiktoken
|
|
5
6
|
from wrapt import wrap_function_wrapper # type: ignore
|
|
6
7
|
|
|
7
|
-
from payi.types import IngestUnitsParams
|
|
8
8
|
from payi.types.ingest_units_params import Units
|
|
9
9
|
|
|
10
|
-
from .instrument import _IsStreaming, _PayiInstrumentor
|
|
10
|
+
from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class AnthropicIntrumentor:
|
|
@@ -55,9 +55,7 @@ def chat_wrapper(
|
|
|
55
55
|
) -> Any:
|
|
56
56
|
return instrumentor.chat_wrapper(
|
|
57
57
|
"system.anthropic",
|
|
58
|
-
|
|
59
|
-
process_request,
|
|
60
|
-
process_synchronous_response,
|
|
58
|
+
_AnthropicProviderRequest(instrumentor),
|
|
61
59
|
_IsStreaming.kwargs,
|
|
62
60
|
wrapped,
|
|
63
61
|
instance,
|
|
@@ -75,9 +73,7 @@ async def achat_wrapper(
|
|
|
75
73
|
) -> Any:
|
|
76
74
|
return await instrumentor.achat_wrapper(
|
|
77
75
|
"system.anthropic",
|
|
78
|
-
|
|
79
|
-
process_request,
|
|
80
|
-
process_synchronous_response,
|
|
76
|
+
_AnthropicProviderRequest(instrumentor),
|
|
81
77
|
_IsStreaming.kwargs,
|
|
82
78
|
wrapped,
|
|
83
79
|
instance,
|
|
@@ -85,17 +81,39 @@ async def achat_wrapper(
|
|
|
85
81
|
kwargs,
|
|
86
82
|
)
|
|
87
83
|
|
|
84
|
+
class _AnthropicProviderRequest(_ProviderRequest):
|
|
85
|
+
@override
|
|
86
|
+
def process_chunk(self, chunk: Any) -> bool:
|
|
87
|
+
if chunk.type == "message_start":
|
|
88
|
+
self._ingest["provider_response_id"] = chunk.message.id
|
|
88
89
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
ingest["provider_response_id"] = chunk.message.id
|
|
90
|
+
usage = chunk.message.usage
|
|
91
|
+
units = self._ingest["units"]
|
|
92
92
|
|
|
93
|
-
|
|
94
|
-
units = ingest["units"]
|
|
93
|
+
input = _PayiInstrumentor.update_for_vision(usage.input_tokens, units, self._estimated_prompt_tokens)
|
|
95
94
|
|
|
96
|
-
|
|
95
|
+
units["text"] = Units(input=input, output=0)
|
|
97
96
|
|
|
98
|
-
|
|
97
|
+
if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens > 0:
|
|
98
|
+
text_cache_write = usage.cache_creation_input_tokens
|
|
99
|
+
units["text_cache_write"] = Units(input=text_cache_write, output=0)
|
|
100
|
+
|
|
101
|
+
if hasattr(usage, "cache_read_input_tokens") and usage.cache_read_input_tokens > 0:
|
|
102
|
+
text_cache_read = usage.cache_read_input_tokens
|
|
103
|
+
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
104
|
+
|
|
105
|
+
elif chunk.type == "message_delta":
|
|
106
|
+
usage = chunk.usage
|
|
107
|
+
self._ingest["units"]["text"]["output"] = usage.output_tokens
|
|
108
|
+
|
|
109
|
+
return True
|
|
110
|
+
|
|
111
|
+
@override
|
|
112
|
+
def process_synchronous_response(self, response: Any, log_prompt_and_response: bool, kwargs: Any) -> Any:
|
|
113
|
+
usage = response.usage
|
|
114
|
+
input = usage.input_tokens
|
|
115
|
+
output = usage.output_tokens
|
|
116
|
+
units: dict[str, Units] = self._ingest["units"]
|
|
99
117
|
|
|
100
118
|
if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens > 0:
|
|
101
119
|
text_cache_write = usage.cache_creation_input_tokens
|
|
@@ -105,35 +123,37 @@ def process_chunk(chunk: Any, ingest: IngestUnitsParams) -> None:
|
|
|
105
123
|
text_cache_read = usage.cache_read_input_tokens
|
|
106
124
|
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
107
125
|
|
|
108
|
-
|
|
109
|
-
usage = chunk.usage
|
|
110
|
-
ingest["units"]["text"]["output"] = usage.output_tokens
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def process_synchronous_response(response: Any, ingest: IngestUnitsParams, log_prompt_and_response: bool, *args: Any, **kwargs: 'dict[str, Any]') -> Any: # noqa: ARG001
|
|
114
|
-
usage = response.usage
|
|
115
|
-
input = usage.input_tokens
|
|
116
|
-
output = usage.output_tokens
|
|
117
|
-
units: dict[str, Units] = ingest["units"]
|
|
118
|
-
|
|
119
|
-
if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens > 0:
|
|
120
|
-
text_cache_write = usage.cache_creation_input_tokens
|
|
121
|
-
units["text_cache_write"] = Units(input=text_cache_write, output=0)
|
|
126
|
+
input = _PayiInstrumentor.update_for_vision(input, units, self._estimated_prompt_tokens)
|
|
122
127
|
|
|
123
|
-
|
|
124
|
-
text_cache_read = usage.cache_read_input_tokens
|
|
125
|
-
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
128
|
+
units["text"] = Units(input=input, output=output)
|
|
126
129
|
|
|
127
|
-
|
|
130
|
+
if log_prompt_and_response:
|
|
131
|
+
self._ingest["provider_response_json"] = response.to_json()
|
|
132
|
+
|
|
133
|
+
self._ingest["provider_response_id"] = response.id
|
|
134
|
+
|
|
135
|
+
return None
|
|
128
136
|
|
|
129
|
-
|
|
137
|
+
@override
|
|
138
|
+
def process_request(self, kwargs: Any) -> None:
|
|
139
|
+
messages = kwargs.get("messages")
|
|
140
|
+
if not messages or len(messages) == 0:
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
estimated_token_count = 0
|
|
144
|
+
has_image = False
|
|
130
145
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
146
|
+
enc = tiktoken.get_encoding("cl100k_base")
|
|
147
|
+
|
|
148
|
+
for message in messages:
|
|
149
|
+
msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
|
|
150
|
+
if msg_has_image:
|
|
151
|
+
has_image = True
|
|
152
|
+
estimated_token_count += msg_prompt_tokens
|
|
153
|
+
|
|
154
|
+
if not has_image or estimated_token_count == 0:
|
|
155
|
+
return
|
|
156
|
+
self._estimated_prompt_tokens = estimated_token_count
|
|
137
157
|
|
|
138
158
|
def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'list[Any]']) -> 'tuple[bool, int]':
|
|
139
159
|
if isinstance(content, str):
|
|
@@ -146,23 +166,3 @@ def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'li
|
|
|
146
166
|
token_count = sum(len(encoding.encode(item.get("text", ""))) for item in content if item.get("type") == "text")
|
|
147
167
|
return has_image, token_count
|
|
148
168
|
|
|
149
|
-
def process_request(ingest: IngestUnitsParams, *args: Any, **kwargs: Any) -> None: # noqa: ARG001
|
|
150
|
-
messages = kwargs.get("messages")
|
|
151
|
-
if not messages or len(messages) == 0:
|
|
152
|
-
return
|
|
153
|
-
|
|
154
|
-
estimated_token_count = 0
|
|
155
|
-
has_image = False
|
|
156
|
-
|
|
157
|
-
enc = tiktoken.get_encoding("cl100k_base")
|
|
158
|
-
|
|
159
|
-
for message in messages:
|
|
160
|
-
msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
|
|
161
|
-
if msg_has_image:
|
|
162
|
-
has_image = True
|
|
163
|
-
estimated_token_count += msg_prompt_tokens
|
|
164
|
-
|
|
165
|
-
if not has_image or estimated_token_count == 0:
|
|
166
|
-
return
|
|
167
|
-
|
|
168
|
-
ingest["units"][_PayiInstrumentor.estimated_prompt_tokens] = Units(input=estimated_token_count, output=0)
|
payi/lib/BedrockInstrumentor.py
CHANGED
|
@@ -2,13 +2,14 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
from typing import Any
|
|
4
4
|
from functools import wraps
|
|
5
|
+
from typing_extensions import override
|
|
5
6
|
|
|
6
7
|
from wrapt import ObjectProxy, wrap_function_wrapper # type: ignore
|
|
7
8
|
|
|
8
9
|
from payi.types.ingest_units_params import Units, IngestUnitsParams
|
|
9
10
|
from payi.types.pay_i_common_models_api_router_header_info_param import PayICommonModelsAPIRouterHeaderInfoParam
|
|
10
11
|
|
|
11
|
-
from .instrument import _IsStreaming, _PayiInstrumentor
|
|
12
|
+
from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class BedrockInstrumentor:
|
|
@@ -103,9 +104,7 @@ def wrap_invoke(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
103
104
|
if modelId.startswith("meta.llama3") or modelId.startswith("anthropic."):
|
|
104
105
|
return instrumentor.chat_wrapper(
|
|
105
106
|
"system.aws.bedrock",
|
|
106
|
-
|
|
107
|
-
process_invoke_request,
|
|
108
|
-
process_synchronous_invoke_response,
|
|
107
|
+
_BedrockInvokeSynchronousProviderRequest(instrumentor),
|
|
109
108
|
_IsStreaming.false,
|
|
110
109
|
wrapped,
|
|
111
110
|
None,
|
|
@@ -119,14 +118,12 @@ def wrap_invoke(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
119
118
|
def wrap_invoke_stream(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
120
119
|
@wraps(wrapped)
|
|
121
120
|
def invoke_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
122
|
-
|
|
121
|
+
model_id: str = kwargs.get("modelId", "") # type: ignore
|
|
123
122
|
|
|
124
|
-
if
|
|
123
|
+
if model_id.startswith("meta.llama3") or model_id.startswith("anthropic."):
|
|
125
124
|
return instrumentor.chat_wrapper(
|
|
126
125
|
"system.aws.bedrock",
|
|
127
|
-
|
|
128
|
-
process_invoke_request,
|
|
129
|
-
None,
|
|
126
|
+
_BedrockInvokeStreamingProviderRequest(instrumentor, model_id),
|
|
130
127
|
_IsStreaming.true,
|
|
131
128
|
wrapped,
|
|
132
129
|
None,
|
|
@@ -145,9 +142,7 @@ def wrap_converse(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
145
142
|
if modelId.startswith("meta.llama3") or modelId.startswith("anthropic."):
|
|
146
143
|
return instrumentor.chat_wrapper(
|
|
147
144
|
"system.aws.bedrock",
|
|
148
|
-
|
|
149
|
-
process_converse_request,
|
|
150
|
-
process_synchronous_converse_response,
|
|
145
|
+
_BedrockConverseSynchronousProviderRequest(instrumentor),
|
|
151
146
|
_IsStreaming.false,
|
|
152
147
|
wrapped,
|
|
153
148
|
None,
|
|
@@ -161,14 +156,12 @@ def wrap_converse(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
161
156
|
def wrap_converse_stream(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
162
157
|
@wraps(wrapped)
|
|
163
158
|
def invoke_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
164
|
-
|
|
159
|
+
model_id: str = kwargs.get("modelId", "") # type: ignore
|
|
165
160
|
|
|
166
|
-
if
|
|
161
|
+
if model_id.startswith("meta.llama3") or model_id.startswith("anthropic."):
|
|
167
162
|
return instrumentor.chat_wrapper(
|
|
168
163
|
"system.aws.bedrock",
|
|
169
|
-
|
|
170
|
-
process_converse_request,
|
|
171
|
-
None,
|
|
164
|
+
_BedrockConverseStreamingProviderRequest(instrumentor),
|
|
172
165
|
_IsStreaming.true,
|
|
173
166
|
wrapped,
|
|
174
167
|
None,
|
|
@@ -179,104 +172,121 @@ def wrap_converse_stream(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
179
172
|
|
|
180
173
|
return invoke_wrapper
|
|
181
174
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
175
|
+
class _BedrockInvokeStreamingProviderRequest(_ProviderRequest):
|
|
176
|
+
def __init__(self, instrumentor: _PayiInstrumentor, model_id: str):
|
|
177
|
+
super().__init__(instrumentor)
|
|
178
|
+
self._is_anthropic: bool = model_id.startswith("anthropic.")
|
|
185
179
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
180
|
+
@override
|
|
181
|
+
def process_chunk(self, chunk: Any) -> bool:
|
|
182
|
+
if self._is_anthropic:
|
|
183
|
+
return self.process_invoke_streaming_anthropic_chunk(chunk)
|
|
184
|
+
else:
|
|
185
|
+
return self.process_invoke_streaming_llama_chunk(chunk)
|
|
189
186
|
|
|
190
|
-
|
|
187
|
+
def process_invoke_streaming_anthropic_chunk(self, chunk: str) -> bool:
|
|
188
|
+
chunk_dict = json.loads(chunk)
|
|
189
|
+
type = chunk_dict.get("type", "")
|
|
191
190
|
|
|
192
|
-
|
|
191
|
+
if type == "message_start":
|
|
192
|
+
usage = chunk_dict['message']['usage']
|
|
193
|
+
units = self._ingest["units"]
|
|
193
194
|
|
|
194
|
-
|
|
195
|
-
if text_cache_write > 0:
|
|
196
|
-
units["text_cache_write"] = Units(input=text_cache_write, output=0)
|
|
195
|
+
input = _PayiInstrumentor.update_for_vision(usage['input_tokens'], units, self._estimated_prompt_tokens)
|
|
197
196
|
|
|
198
|
-
|
|
199
|
-
if text_cache_read > 0:
|
|
200
|
-
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
197
|
+
units["text"] = Units(input=input, output=0)
|
|
201
198
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
199
|
+
text_cache_write: int = usage.get("cache_creation_input_tokens", 0)
|
|
200
|
+
if text_cache_write > 0:
|
|
201
|
+
units["text_cache_write"] = Units(input=text_cache_write, output=0)
|
|
205
202
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
203
|
+
text_cache_read: int = usage.get("cache_read_input_tokens", 0)
|
|
204
|
+
if text_cache_read > 0:
|
|
205
|
+
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
206
|
+
|
|
207
|
+
elif type == "message_delta":
|
|
208
|
+
usage = chunk_dict['usage']
|
|
209
|
+
self._ingest["units"]["text"]["output"] = usage['output_tokens']
|
|
210
|
+
|
|
211
|
+
return True
|
|
212
|
+
|
|
213
|
+
def process_invoke_streaming_llama_chunk(self, chunk: str) -> bool:
|
|
214
|
+
chunk_dict = json.loads(chunk)
|
|
215
|
+
metrics = chunk_dict.get("amazon-bedrock-invocationMetrics", {})
|
|
216
|
+
if metrics:
|
|
217
|
+
input = metrics.get("inputTokenCount", 0)
|
|
218
|
+
output = metrics.get("outputTokenCount", 0)
|
|
219
|
+
self._ingest["units"]["text"] = Units(input=input, output=output)
|
|
220
|
+
|
|
221
|
+
return True
|
|
222
|
+
|
|
223
|
+
class _BedrockInvokeSynchronousProviderRequest(_ProviderRequest):
|
|
224
|
+
@override
|
|
225
|
+
def process_synchronous_response(
|
|
226
|
+
self,
|
|
215
227
|
response: Any,
|
|
216
|
-
ingest: IngestUnitsParams,
|
|
217
228
|
log_prompt_and_response: bool,
|
|
218
|
-
|
|
219
|
-
**kargs: Any) -> Any: # noqa: ARG001
|
|
229
|
+
kwargs: Any) -> Any:
|
|
220
230
|
|
|
221
|
-
|
|
231
|
+
metadata = response.get("ResponseMetadata", {})
|
|
222
232
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
233
|
+
request_id = metadata.get("RequestId", "")
|
|
234
|
+
if request_id:
|
|
235
|
+
self._ingest["provider_response_id"] = request_id
|
|
226
236
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
237
|
+
response_headers = metadata.get("HTTPHeaders", {}).copy()
|
|
238
|
+
if response_headers:
|
|
239
|
+
self._ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
|
|
230
240
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
241
|
+
response["body"] = InvokeResponseWrapper(
|
|
242
|
+
response=response["body"],
|
|
243
|
+
instrumentor=self._instrumentor,
|
|
244
|
+
ingest=self._ingest,
|
|
245
|
+
log_prompt_and_response=log_prompt_and_response)
|
|
236
246
|
|
|
237
|
-
|
|
247
|
+
return response
|
|
238
248
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
249
|
+
class _BedrockConverseSynchronousProviderRequest(_ProviderRequest):
|
|
250
|
+
@override
|
|
251
|
+
def process_synchronous_response(
|
|
252
|
+
self,
|
|
253
|
+
response: 'dict[str, Any]',
|
|
254
|
+
log_prompt_and_response: bool,
|
|
255
|
+
kwargs: Any) -> Any:
|
|
244
256
|
|
|
245
|
-
|
|
246
|
-
usage = metadata['usage']
|
|
257
|
+
usage = response["usage"]
|
|
247
258
|
input = usage["inputTokens"]
|
|
248
259
|
output = usage["outputTokens"]
|
|
249
|
-
|
|
260
|
+
|
|
261
|
+
units: dict[str, Units] = self._ingest["units"]
|
|
262
|
+
units["text"] = Units(input=input, output=output)
|
|
250
263
|
|
|
251
|
-
|
|
252
|
-
response: 'dict[str, Any]',
|
|
253
|
-
ingest: IngestUnitsParams,
|
|
254
|
-
log_prompt_and_response: bool,
|
|
255
|
-
**kargs: Any) -> Any: # noqa: ARG001
|
|
264
|
+
metadata = response.get("ResponseMetadata", {})
|
|
256
265
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
units: dict[str, Units] = ingest["units"]
|
|
262
|
-
units["text"] = Units(input=input, output=output)
|
|
266
|
+
request_id = metadata.get("RequestId", "")
|
|
267
|
+
if request_id:
|
|
268
|
+
self._ingest["provider_response_id"] = request_id
|
|
263
269
|
|
|
264
|
-
|
|
270
|
+
response_headers = metadata.get("HTTPHeaders", {})
|
|
271
|
+
if response_headers:
|
|
272
|
+
self._ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
|
|
265
273
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
274
|
+
if log_prompt_and_response:
|
|
275
|
+
response_without_metadata = response.copy()
|
|
276
|
+
response_without_metadata.pop("ResponseMetadata", None)
|
|
277
|
+
self._ingest["provider_response_json"] = json.dumps(response_without_metadata)
|
|
269
278
|
|
|
270
|
-
|
|
271
|
-
if response_headers:
|
|
272
|
-
ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
|
|
279
|
+
return None
|
|
273
280
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
281
|
+
class _BedrockConverseStreamingProviderRequest(_ProviderRequest):
|
|
282
|
+
@override
|
|
283
|
+
def process_chunk(self, chunk: 'dict[str, Any]') -> bool:
|
|
284
|
+
metadata = chunk.get("metadata", {})
|
|
278
285
|
|
|
279
|
-
|
|
286
|
+
if metadata:
|
|
287
|
+
usage = metadata['usage']
|
|
288
|
+
input = usage["inputTokens"]
|
|
289
|
+
output = usage["outputTokens"]
|
|
290
|
+
self._ingest["units"]["text"] = Units(input=input, output=output)
|
|
280
291
|
|
|
281
|
-
|
|
282
|
-
return
|
|
292
|
+
return True
|
payi/lib/OpenAIInstrumentor.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Any, Union
|
|
3
|
+
from typing import Any, Union, Optional
|
|
4
|
+
from typing_extensions import override
|
|
4
5
|
from importlib.metadata import version
|
|
5
6
|
|
|
6
7
|
import tiktoken # type: ignore
|
|
@@ -9,7 +10,7 @@ from wrapt import wrap_function_wrapper # type: ignore
|
|
|
9
10
|
from payi.types import IngestUnitsParams
|
|
10
11
|
from payi.types.ingest_units_params import Units
|
|
11
12
|
|
|
12
|
-
from .instrument import _IsStreaming, _PayiInstrumentor
|
|
13
|
+
from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class OpenAiInstrumentor:
|
|
@@ -63,9 +64,7 @@ def embeddings_wrapper(
|
|
|
63
64
|
) -> Any:
|
|
64
65
|
return instrumentor.chat_wrapper(
|
|
65
66
|
"system.openai",
|
|
66
|
-
|
|
67
|
-
None, # process_chat_request,
|
|
68
|
-
process_ebmeddings_synchronous_response,
|
|
67
|
+
_OpenAiEmbeddingsProviderRequest(instrumentor),
|
|
69
68
|
_IsStreaming.false,
|
|
70
69
|
wrapped,
|
|
71
70
|
instance,
|
|
@@ -83,9 +82,7 @@ async def aembeddings_wrapper(
|
|
|
83
82
|
) -> Any:
|
|
84
83
|
return await instrumentor.achat_wrapper(
|
|
85
84
|
"system.openai",
|
|
86
|
-
|
|
87
|
-
None, # process_chat_request,
|
|
88
|
-
process_ebmeddings_synchronous_response,
|
|
85
|
+
_OpenAiEmbeddingsProviderRequest(instrumentor),
|
|
89
86
|
_IsStreaming.false,
|
|
90
87
|
wrapped,
|
|
91
88
|
instance,
|
|
@@ -103,9 +100,7 @@ def chat_wrapper(
|
|
|
103
100
|
) -> Any:
|
|
104
101
|
return instrumentor.chat_wrapper(
|
|
105
102
|
"system.openai",
|
|
106
|
-
|
|
107
|
-
process_chat_request,
|
|
108
|
-
process_chat_synchronous_response,
|
|
103
|
+
_OpenAiChatProviderRequest(instrumentor),
|
|
109
104
|
_IsStreaming.kwargs,
|
|
110
105
|
wrapped,
|
|
111
106
|
instance,
|
|
@@ -123,9 +118,7 @@ async def achat_wrapper(
|
|
|
123
118
|
) -> Any:
|
|
124
119
|
return await instrumentor.achat_wrapper(
|
|
125
120
|
"system.openai",
|
|
126
|
-
|
|
127
|
-
process_chat_request,
|
|
128
|
-
process_chat_synchronous_response,
|
|
121
|
+
_OpenAiChatProviderRequest(instrumentor),
|
|
129
122
|
_IsStreaming.kwargs,
|
|
130
123
|
wrapped,
|
|
131
124
|
instance,
|
|
@@ -133,13 +126,89 @@ async def achat_wrapper(
|
|
|
133
126
|
kwargs,
|
|
134
127
|
)
|
|
135
128
|
|
|
136
|
-
|
|
137
|
-
|
|
129
|
+
class _OpenAiEmbeddingsProviderRequest(_ProviderRequest):
|
|
130
|
+
@override
|
|
131
|
+
def process_synchronous_response(
|
|
132
|
+
self,
|
|
133
|
+
response: Any,
|
|
134
|
+
log_prompt_and_response: bool,
|
|
135
|
+
kwargs: Any) -> Any:
|
|
136
|
+
return process_chat_synchronous_response(response, self._ingest, log_prompt_and_response, self._estimated_prompt_tokens)
|
|
137
|
+
|
|
138
|
+
class _OpenAiChatProviderRequest(_ProviderRequest):
|
|
139
|
+
def __init__(self, instrumentor: _PayiInstrumentor):
|
|
140
|
+
super().__init__(instrumentor)
|
|
141
|
+
self._include_usage_added = False
|
|
142
|
+
|
|
143
|
+
@override
|
|
144
|
+
def process_chunk(self, chunk: Any) -> bool:
|
|
145
|
+
model = model_to_dict(chunk)
|
|
146
|
+
|
|
147
|
+
if "provider_response_id" not in self._ingest:
|
|
148
|
+
response_id = model.get("id", None)
|
|
149
|
+
if response_id:
|
|
150
|
+
self._ingest["provider_response_id"] = response_id
|
|
151
|
+
|
|
152
|
+
send_chunk_to_client = True
|
|
153
|
+
|
|
154
|
+
usage = model.get("usage")
|
|
155
|
+
if usage:
|
|
156
|
+
add_usage_units(usage, self._ingest["units"], self._estimated_prompt_tokens)
|
|
157
|
+
|
|
158
|
+
# If we aded "include_usage" in the request on behalf of the client, do not return the extra
|
|
159
|
+
# packet which contains the usage to the client as they are not expecting the data
|
|
160
|
+
if self._include_usage_added:
|
|
161
|
+
send_chunk_to_client = False
|
|
138
162
|
|
|
139
|
-
|
|
163
|
+
return send_chunk_to_client
|
|
164
|
+
|
|
165
|
+
@override
|
|
166
|
+
def process_request(self, kwargs: Any) -> None: # noqa: ARG001
|
|
167
|
+
messages = kwargs.get("messages", None)
|
|
168
|
+
if not messages or len(messages) == 0:
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
estimated_token_count = 0
|
|
172
|
+
has_image = False
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
enc = tiktoken.encoding_for_model(kwargs.get("model")) # type: ignore
|
|
176
|
+
except KeyError:
|
|
177
|
+
enc = tiktoken.get_encoding("o200k_base") # type: ignore
|
|
178
|
+
|
|
179
|
+
for message in messages:
|
|
180
|
+
msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
|
|
181
|
+
if msg_has_image:
|
|
182
|
+
has_image = True
|
|
183
|
+
estimated_token_count += msg_prompt_tokens
|
|
184
|
+
|
|
185
|
+
if has_image and estimated_token_count > 0:
|
|
186
|
+
self._estimated_prompt_tokens = estimated_token_count
|
|
187
|
+
|
|
188
|
+
stream: bool = kwargs.get("stream", False)
|
|
189
|
+
if stream:
|
|
190
|
+
add_include_usage = True
|
|
191
|
+
|
|
192
|
+
stream_options: dict[str, Any] = kwargs.get("stream_options", None)
|
|
193
|
+
if stream_options and "include_usage" in stream_options:
|
|
194
|
+
add_include_usage = stream_options["include_usage"] == False
|
|
195
|
+
|
|
196
|
+
if add_include_usage:
|
|
197
|
+
kwargs['stream_options'] = {"include_usage": True}
|
|
198
|
+
self._include_usage_added = True
|
|
199
|
+
|
|
200
|
+
@override
|
|
201
|
+
def process_synchronous_response(
|
|
202
|
+
self,
|
|
203
|
+
response: Any,
|
|
204
|
+
log_prompt_and_response: bool,
|
|
205
|
+
kwargs: Any) -> Any:
|
|
206
|
+
process_chat_synchronous_response(response, self._ingest, log_prompt_and_response, self._estimated_prompt_tokens)
|
|
207
|
+
|
|
208
|
+
def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams, log_prompt_and_response: bool, estimated_prompt_tokens: Optional[int]) -> Any:
|
|
140
209
|
response_dict = model_to_dict(response)
|
|
141
210
|
|
|
142
|
-
add_usage_units(response_dict.get("usage", {}), ingest["units"])
|
|
211
|
+
add_usage_units(response_dict.get("usage", {}), ingest["units"], estimated_prompt_tokens)
|
|
143
212
|
|
|
144
213
|
if log_prompt_and_response:
|
|
145
214
|
ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
@@ -149,19 +218,6 @@ def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams,
|
|
|
149
218
|
|
|
150
219
|
return None
|
|
151
220
|
|
|
152
|
-
def process_chat_chunk(chunk: Any, ingest: IngestUnitsParams) -> None:
|
|
153
|
-
model = model_to_dict(chunk)
|
|
154
|
-
|
|
155
|
-
if "provider_response_id" not in ingest:
|
|
156
|
-
response_id = model.get("id", None)
|
|
157
|
-
if response_id:
|
|
158
|
-
ingest["provider_response_id"] = response_id
|
|
159
|
-
|
|
160
|
-
usage = model.get("usage")
|
|
161
|
-
if usage:
|
|
162
|
-
add_usage_units(usage, ingest["units"])
|
|
163
|
-
|
|
164
|
-
|
|
165
221
|
def model_to_dict(model: Any) -> Any:
|
|
166
222
|
if version("pydantic") < "2.0.0":
|
|
167
223
|
return model.dict()
|
|
@@ -173,7 +229,7 @@ def model_to_dict(model: Any) -> Any:
|
|
|
173
229
|
return model
|
|
174
230
|
|
|
175
231
|
|
|
176
|
-
def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]") -> None:
|
|
232
|
+
def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]", estimated_prompt_tokens: Optional[int]) -> None:
|
|
177
233
|
input = usage["prompt_tokens"] if "prompt_tokens" in usage else 0
|
|
178
234
|
output = usage["completion_tokens"] if "completion_tokens" in usage else 0
|
|
179
235
|
input_cache = 0
|
|
@@ -184,7 +240,7 @@ def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]") -> None:
|
|
|
184
240
|
if input_cache != 0:
|
|
185
241
|
units["text_cache_read"] = Units(input=input_cache, output=0)
|
|
186
242
|
|
|
187
|
-
input = _PayiInstrumentor.update_for_vision(input - input_cache, units)
|
|
243
|
+
input = _PayiInstrumentor.update_for_vision(input - input_cache, units, estimated_prompt_tokens)
|
|
188
244
|
|
|
189
245
|
units["text"] = Units(input=input, output=output)
|
|
190
246
|
|
|
@@ -197,28 +253,4 @@ def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'li
|
|
|
197
253
|
return has_image, 0
|
|
198
254
|
|
|
199
255
|
token_count = sum(len(encoding.encode(item.get("text", ""))) for item in content if item.get("type") == "text")
|
|
200
|
-
return has_image, token_count
|
|
201
|
-
|
|
202
|
-
def process_chat_request(ingest: IngestUnitsParams, *args: Any, **kwargs: Any) -> None: # noqa: ARG001
|
|
203
|
-
messages = kwargs.get("messages")
|
|
204
|
-
if not messages or len(messages) == 0:
|
|
205
|
-
return
|
|
206
|
-
|
|
207
|
-
estimated_token_count = 0
|
|
208
|
-
has_image = False
|
|
209
|
-
|
|
210
|
-
try:
|
|
211
|
-
enc = tiktoken.encoding_for_model(kwargs.get("model")) # type: ignore
|
|
212
|
-
except KeyError:
|
|
213
|
-
enc = tiktoken.get_encoding("o200k_base") # type: ignore
|
|
214
|
-
|
|
215
|
-
for message in messages:
|
|
216
|
-
msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
|
|
217
|
-
if msg_has_image:
|
|
218
|
-
has_image = True
|
|
219
|
-
estimated_token_count += msg_prompt_tokens
|
|
220
|
-
|
|
221
|
-
if not has_image or estimated_token_count == 0:
|
|
222
|
-
return
|
|
223
|
-
|
|
224
|
-
ingest["units"][_PayiInstrumentor.estimated_prompt_tokens] = Units(input=estimated_token_count, output=0)
|
|
256
|
+
return has_image, token_count
|
payi/lib/helpers.py
CHANGED
|
@@ -50,6 +50,7 @@ def create_headers(
|
|
|
50
50
|
use_case_name: Union[str, None] = None,
|
|
51
51
|
use_case_version: Union[int, None] = None,
|
|
52
52
|
route_as_resource: Union[str, None] = None,
|
|
53
|
+
resource_scope: Union[str, None] = None,
|
|
53
54
|
) -> Dict[str, str]:
|
|
54
55
|
headers: Dict[str, str] = {}
|
|
55
56
|
|
|
@@ -71,6 +72,8 @@ def create_headers(
|
|
|
71
72
|
headers.update({ PayiHeaderNames.use_case_version: str(use_case_version)})
|
|
72
73
|
if route_as_resource:
|
|
73
74
|
headers.update({ PayiHeaderNames.route_as_resource: route_as_resource})
|
|
75
|
+
if resource_scope:
|
|
76
|
+
headers.update({ PayiHeaderNames.resource_scope: resource_scope })
|
|
74
77
|
return headers
|
|
75
78
|
|
|
76
79
|
def _resolve_payi_base_url(payi_base_url: Union[str, None]) -> str:
|
payi/lib/instrument.py
CHANGED
|
@@ -6,6 +6,7 @@ import logging
|
|
|
6
6
|
import traceback
|
|
7
7
|
from enum import Enum
|
|
8
8
|
from typing import Any, Set, Union, Callable, Optional, TypedDict
|
|
9
|
+
from datetime import datetime, timezone
|
|
9
10
|
|
|
10
11
|
import nest_asyncio # type: ignore
|
|
11
12
|
from wrapt import ObjectProxy # type: ignore
|
|
@@ -21,9 +22,24 @@ from .helpers import PayiCategories
|
|
|
21
22
|
from .Stopwatch import Stopwatch
|
|
22
23
|
|
|
23
24
|
|
|
25
|
+
class _ProviderRequest:
|
|
26
|
+
def __init__(self, instrumentor: '_PayiInstrumentor'):
|
|
27
|
+
self._instrumentor: '_PayiInstrumentor' = instrumentor
|
|
28
|
+
self._estimated_prompt_tokens: Optional[int] = None
|
|
29
|
+
self._ingest: IngestUnitsParams
|
|
30
|
+
|
|
31
|
+
def process_request(self, _kwargs: Any) -> None:
|
|
32
|
+
return
|
|
33
|
+
|
|
34
|
+
def process_chunk(self, _chunk: Any) -> bool:
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
def process_synchronous_response(self, response: Any, log_prompt_and_response: bool, kwargs: Any) -> Optional[object]: # noqa: ARG002
|
|
38
|
+
return None
|
|
39
|
+
|
|
24
40
|
class PayiInstrumentConfig(TypedDict, total=False):
|
|
25
41
|
proxy: bool
|
|
26
|
-
|
|
42
|
+
global_instrumentation: bool
|
|
27
43
|
limit_ids: Optional["list[str]"]
|
|
28
44
|
experience_name: Optional[str]
|
|
29
45
|
experience_id: Optional[str]
|
|
@@ -48,8 +64,6 @@ class _IsStreaming(Enum):
|
|
|
48
64
|
kwargs = 2
|
|
49
65
|
|
|
50
66
|
class _PayiInstrumentor:
|
|
51
|
-
estimated_prompt_tokens: str = "estimated_prompt_tokens"
|
|
52
|
-
|
|
53
67
|
def __init__(
|
|
54
68
|
self,
|
|
55
69
|
payi: Optional[Payi],
|
|
@@ -76,9 +90,9 @@ class _PayiInstrumentor:
|
|
|
76
90
|
else:
|
|
77
91
|
self._instrument_specific(instruments)
|
|
78
92
|
|
|
79
|
-
|
|
93
|
+
global_instrumentation = global_config.pop("global_instrumentation", True) if global_config else True
|
|
80
94
|
|
|
81
|
-
if
|
|
95
|
+
if global_instrumentation:
|
|
82
96
|
if global_config is None:
|
|
83
97
|
global_config = {}
|
|
84
98
|
if "proxy" not in global_config:
|
|
@@ -434,7 +448,7 @@ class _PayiInstrumentor:
|
|
|
434
448
|
self,
|
|
435
449
|
ingest: IngestUnitsParams,
|
|
436
450
|
ingest_extra_headers: "dict[str, str]", # do not coflict potential kwargs["extra_headers"]
|
|
437
|
-
|
|
451
|
+
kwargs: Any,
|
|
438
452
|
) -> None:
|
|
439
453
|
limit_ids = ingest_extra_headers.pop(PayiHeaderNames.limit_ids, None)
|
|
440
454
|
request_tags = ingest_extra_headers.pop(PayiHeaderNames.request_tags, None)
|
|
@@ -481,12 +495,12 @@ class _PayiInstrumentor:
|
|
|
481
495
|
if self._log_prompt_and_response:
|
|
482
496
|
ingest["provider_request_json"] = json.dumps(provider_prompt)
|
|
483
497
|
|
|
498
|
+
ingest["event_timestamp"] = datetime.now(timezone.utc)
|
|
499
|
+
|
|
484
500
|
async def achat_wrapper(
|
|
485
501
|
self,
|
|
486
502
|
category: str,
|
|
487
|
-
|
|
488
|
-
process_request: Optional[Callable[[IngestUnitsParams, Any, Any], None]],
|
|
489
|
-
process_synchronous_response: Any,
|
|
503
|
+
provider: _ProviderRequest,
|
|
490
504
|
is_streaming: _IsStreaming,
|
|
491
505
|
wrapped: Any,
|
|
492
506
|
instance: Any,
|
|
@@ -511,8 +525,8 @@ class _PayiInstrumentor:
|
|
|
511
525
|
|
|
512
526
|
return await wrapped(*args, **kwargs)
|
|
513
527
|
|
|
514
|
-
|
|
515
|
-
|
|
528
|
+
provider._ingest = {"category": category, "units": {}} # type: ignore
|
|
529
|
+
provider._ingest["resource"] = kwargs.get("model", "")
|
|
516
530
|
|
|
517
531
|
if category == PayiCategories.openai and instance and hasattr(instance, "_client"):
|
|
518
532
|
from .OpenAIInstrumentor import OpenAiInstrumentor # noqa: I001
|
|
@@ -530,21 +544,20 @@ class _PayiInstrumentor:
|
|
|
530
544
|
logging.error("Azure OpenAI invalid resource scope, not ingesting")
|
|
531
545
|
return wrapped(*args, **kwargs)
|
|
532
546
|
|
|
533
|
-
|
|
547
|
+
provider._ingest["resource_scope"] = resource_scope
|
|
534
548
|
|
|
535
549
|
category = PayiCategories.azure_openai
|
|
536
550
|
|
|
537
|
-
|
|
538
|
-
|
|
551
|
+
provider._ingest["category"] = category
|
|
552
|
+
provider._ingest["resource"] = route_as_resource
|
|
539
553
|
|
|
540
554
|
current_frame = inspect.currentframe()
|
|
541
555
|
# f_back excludes the current frame, strip() cleans up whitespace and newlines
|
|
542
556
|
stack = [frame.strip() for frame in traceback.format_stack(current_frame.f_back)] # type: ignore
|
|
543
557
|
|
|
544
|
-
|
|
558
|
+
provider._ingest['properties'] = { 'system.stack_trace': json.dumps(stack) }
|
|
545
559
|
|
|
546
|
-
|
|
547
|
-
process_request(ingest, (), instance)
|
|
560
|
+
provider.process_request(kwargs)
|
|
548
561
|
|
|
549
562
|
sw = Stopwatch()
|
|
550
563
|
stream: bool = False
|
|
@@ -557,7 +570,7 @@ class _PayiInstrumentor:
|
|
|
557
570
|
stream = False
|
|
558
571
|
|
|
559
572
|
try:
|
|
560
|
-
self._prepare_ingest(
|
|
573
|
+
self._prepare_ingest(provider._ingest, extra_headers, kwargs)
|
|
561
574
|
sw.start()
|
|
562
575
|
response = await wrapped(*args, **kwargs)
|
|
563
576
|
|
|
@@ -575,9 +588,8 @@ class _PayiInstrumentor:
|
|
|
575
588
|
instance=instance,
|
|
576
589
|
instrumentor=self,
|
|
577
590
|
log_prompt_and_response=self._log_prompt_and_response,
|
|
578
|
-
ingest=ingest,
|
|
579
591
|
stopwatch=sw,
|
|
580
|
-
|
|
592
|
+
provider=provider,
|
|
581
593
|
is_bedrock=False,
|
|
582
594
|
)
|
|
583
595
|
|
|
@@ -585,28 +597,25 @@ class _PayiInstrumentor:
|
|
|
585
597
|
|
|
586
598
|
sw.stop()
|
|
587
599
|
duration = sw.elapsed_ms_int()
|
|
588
|
-
|
|
589
|
-
|
|
600
|
+
provider._ingest["end_to_end_latency_ms"] = duration
|
|
601
|
+
provider._ingest["http_status_code"] = 200
|
|
590
602
|
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
return return_result
|
|
603
|
+
return_result: Any = provider.process_synchronous_response(
|
|
604
|
+
response=response,
|
|
605
|
+
log_prompt_and_response=self._log_prompt_and_response,
|
|
606
|
+
kwargs=kwargs)
|
|
607
|
+
|
|
608
|
+
if return_result:
|
|
609
|
+
return return_result
|
|
599
610
|
|
|
600
|
-
await self._aingest_units(
|
|
611
|
+
await self._aingest_units(provider._ingest)
|
|
601
612
|
|
|
602
613
|
return response
|
|
603
614
|
|
|
604
615
|
def chat_wrapper(
|
|
605
616
|
self,
|
|
606
617
|
category: str,
|
|
607
|
-
|
|
608
|
-
process_request: Optional[Callable[[IngestUnitsParams, Any, Any], None]],
|
|
609
|
-
process_synchronous_response: Any,
|
|
618
|
+
provider: _ProviderRequest,
|
|
610
619
|
is_streaming: _IsStreaming,
|
|
611
620
|
wrapped: Any,
|
|
612
621
|
instance: Any,
|
|
@@ -635,13 +644,13 @@ class _PayiInstrumentor:
|
|
|
635
644
|
|
|
636
645
|
return wrapped(*args, **kwargs)
|
|
637
646
|
|
|
638
|
-
|
|
647
|
+
provider._ingest = {"category": category, "units": {}} # type: ignore
|
|
639
648
|
if is_bedrock:
|
|
640
649
|
# boto3 doesn't allow extra_headers
|
|
641
650
|
kwargs.pop("extra_headers", None)
|
|
642
|
-
|
|
651
|
+
provider._ingest["resource"] = kwargs.get("modelId", "")
|
|
643
652
|
else:
|
|
644
|
-
|
|
653
|
+
provider._ingest["resource"] = kwargs.get("model", "")
|
|
645
654
|
|
|
646
655
|
if category == PayiCategories.openai and instance and hasattr(instance, "_client"):
|
|
647
656
|
from .OpenAIInstrumentor import OpenAiInstrumentor # noqa: I001
|
|
@@ -659,21 +668,20 @@ class _PayiInstrumentor:
|
|
|
659
668
|
logging.error("Azure OpenAI invalid resource scope, not ingesting")
|
|
660
669
|
return wrapped(*args, **kwargs)
|
|
661
670
|
|
|
662
|
-
|
|
671
|
+
provider._ingest["resource_scope"] = resource_scope
|
|
663
672
|
|
|
664
673
|
category = PayiCategories.azure_openai
|
|
665
674
|
|
|
666
|
-
|
|
667
|
-
|
|
675
|
+
provider._ingest["category"] = category
|
|
676
|
+
provider._ingest["resource"] = route_as_resource
|
|
668
677
|
|
|
669
678
|
current_frame = inspect.currentframe()
|
|
670
679
|
# f_back excludes the current frame, strip() cleans up whitespace and newlines
|
|
671
680
|
stack = [frame.strip() for frame in traceback.format_stack(current_frame.f_back)] # type: ignore
|
|
672
681
|
|
|
673
|
-
|
|
682
|
+
provider._ingest['properties'] = { 'system.stack_trace': json.dumps(stack) }
|
|
674
683
|
|
|
675
|
-
|
|
676
|
-
process_request(ingest, (), kwargs)
|
|
684
|
+
provider.process_request(kwargs)
|
|
677
685
|
|
|
678
686
|
sw = Stopwatch()
|
|
679
687
|
stream: bool = False
|
|
@@ -686,7 +694,7 @@ class _PayiInstrumentor:
|
|
|
686
694
|
stream = False
|
|
687
695
|
|
|
688
696
|
try:
|
|
689
|
-
self._prepare_ingest(
|
|
697
|
+
self._prepare_ingest(provider._ingest, extra_headers, kwargs)
|
|
690
698
|
sw.start()
|
|
691
699
|
response = wrapped(*args, **kwargs)
|
|
692
700
|
|
|
@@ -704,9 +712,8 @@ class _PayiInstrumentor:
|
|
|
704
712
|
instance=instance,
|
|
705
713
|
instrumentor=self,
|
|
706
714
|
log_prompt_and_response=self._log_prompt_and_response,
|
|
707
|
-
ingest=ingest,
|
|
708
715
|
stopwatch=sw,
|
|
709
|
-
|
|
716
|
+
provider=provider,
|
|
710
717
|
is_bedrock=is_bedrock,
|
|
711
718
|
)
|
|
712
719
|
|
|
@@ -721,19 +728,17 @@ class _PayiInstrumentor:
|
|
|
721
728
|
|
|
722
729
|
sw.stop()
|
|
723
730
|
duration = sw.elapsed_ms_int()
|
|
724
|
-
|
|
725
|
-
|
|
731
|
+
provider._ingest["end_to_end_latency_ms"] = duration
|
|
732
|
+
provider._ingest["http_status_code"] = 200
|
|
726
733
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
if return_result:
|
|
734
|
-
return return_result
|
|
734
|
+
return_result: Any = provider.process_synchronous_response(
|
|
735
|
+
response=response,
|
|
736
|
+
log_prompt_and_response=self._log_prompt_and_response,
|
|
737
|
+
kwargs=kwargs)
|
|
738
|
+
if return_result:
|
|
739
|
+
return return_result
|
|
735
740
|
|
|
736
|
-
self._ingest_units(
|
|
741
|
+
self._ingest_units(provider._ingest)
|
|
737
742
|
|
|
738
743
|
return response
|
|
739
744
|
|
|
@@ -808,13 +813,12 @@ class _PayiInstrumentor:
|
|
|
808
813
|
extra_headers[PayiHeaderNames.experience_id] = context_experience_id
|
|
809
814
|
|
|
810
815
|
@staticmethod
|
|
811
|
-
def update_for_vision(input: int, units: 'dict[str, Units]') -> int:
|
|
812
|
-
if
|
|
813
|
-
|
|
814
|
-
vision = input - prompt_token_estimate
|
|
816
|
+
def update_for_vision(input: int, units: 'dict[str, Units]', estimated_prompt_tokens: Optional[int]) -> int:
|
|
817
|
+
if estimated_prompt_tokens:
|
|
818
|
+
vision = input - estimated_prompt_tokens
|
|
815
819
|
if (vision > 0):
|
|
816
820
|
units["vision"] = Units(input=vision, output=0)
|
|
817
|
-
input =
|
|
821
|
+
input = estimated_prompt_tokens
|
|
818
822
|
|
|
819
823
|
return input
|
|
820
824
|
|
|
@@ -856,16 +860,15 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
856
860
|
response: Any,
|
|
857
861
|
instance: Any,
|
|
858
862
|
instrumentor: _PayiInstrumentor,
|
|
859
|
-
ingest: IngestUnitsParams,
|
|
860
863
|
stopwatch: Stopwatch,
|
|
861
|
-
|
|
864
|
+
provider: _ProviderRequest,
|
|
862
865
|
log_prompt_and_response: bool = True,
|
|
863
866
|
is_bedrock: bool = False,
|
|
864
867
|
) -> None:
|
|
865
868
|
|
|
866
869
|
bedrock_from_stream: bool = False
|
|
867
870
|
if is_bedrock:
|
|
868
|
-
|
|
871
|
+
provider._ingest["provider_response_id"] = response["ResponseMetadata"]["RequestId"]
|
|
869
872
|
stream = response.get("stream", None)
|
|
870
873
|
|
|
871
874
|
if stream:
|
|
@@ -882,11 +885,10 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
882
885
|
|
|
883
886
|
self._instrumentor = instrumentor
|
|
884
887
|
self._stopwatch: Stopwatch = stopwatch
|
|
885
|
-
self._ingest: IngestUnitsParams = ingest
|
|
886
888
|
self._log_prompt_and_response: bool = log_prompt_and_response
|
|
887
889
|
self._responses: list[str] = []
|
|
888
890
|
|
|
889
|
-
self.
|
|
891
|
+
self._provider: _ProviderRequest = provider
|
|
890
892
|
|
|
891
893
|
self._first_token: bool = True
|
|
892
894
|
self._is_bedrock: bool = is_bedrock
|
|
@@ -906,7 +908,7 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
906
908
|
|
|
907
909
|
def __iter__(self) -> Any:
|
|
908
910
|
if self._is_bedrock:
|
|
909
|
-
# MUST
|
|
911
|
+
# MUST reside in a separate function so that the yield statement (e.g. the generator) doesn't implicitly return its own iterator and overriding self
|
|
910
912
|
return self._iter_bedrock()
|
|
911
913
|
return self
|
|
912
914
|
|
|
@@ -935,7 +937,9 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
935
937
|
self._stop_iteration()
|
|
936
938
|
raise e
|
|
937
939
|
else:
|
|
938
|
-
self._evaluate_chunk(chunk)
|
|
940
|
+
if self._evaluate_chunk(chunk) == False:
|
|
941
|
+
return self.__next__()
|
|
942
|
+
|
|
939
943
|
return chunk
|
|
940
944
|
|
|
941
945
|
async def __anext__(self) -> Any:
|
|
@@ -946,35 +950,35 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
946
950
|
await self._astop_iteration()
|
|
947
951
|
raise e
|
|
948
952
|
else:
|
|
949
|
-
self._evaluate_chunk(chunk)
|
|
953
|
+
if self._evaluate_chunk(chunk) == False:
|
|
954
|
+
return await self.__anext__()
|
|
950
955
|
return chunk
|
|
951
956
|
|
|
952
|
-
def _evaluate_chunk(self, chunk: Any) ->
|
|
957
|
+
def _evaluate_chunk(self, chunk: Any) -> bool:
|
|
953
958
|
if self._first_token:
|
|
954
|
-
self._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
|
|
959
|
+
self._provider._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
|
|
955
960
|
self._first_token = False
|
|
956
961
|
|
|
957
962
|
if self._log_prompt_and_response:
|
|
958
963
|
self._responses.append(self.chunk_to_json(chunk))
|
|
959
964
|
|
|
960
|
-
|
|
961
|
-
self._process_chunk(chunk, self._ingest)
|
|
965
|
+
return self._provider.process_chunk(chunk)
|
|
962
966
|
|
|
963
967
|
def _process_stop_iteration(self) -> None:
|
|
964
968
|
self._stopwatch.stop()
|
|
965
|
-
self._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
|
|
966
|
-
self._ingest["http_status_code"] = 200
|
|
969
|
+
self._provider._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
|
|
970
|
+
self._provider._ingest["http_status_code"] = 200
|
|
967
971
|
|
|
968
972
|
if self._log_prompt_and_response:
|
|
969
|
-
self._ingest["provider_response_json"] = self._responses
|
|
973
|
+
self._provider._ingest["provider_response_json"] = self._responses
|
|
970
974
|
|
|
971
975
|
async def _astop_iteration(self) -> None:
|
|
972
976
|
self._process_stop_iteration()
|
|
973
|
-
await self._instrumentor._aingest_units(self._ingest)
|
|
977
|
+
await self._instrumentor._aingest_units(self._provider._ingest)
|
|
974
978
|
|
|
975
979
|
def _stop_iteration(self) -> None:
|
|
976
980
|
self._process_stop_iteration()
|
|
977
|
-
self._instrumentor._ingest_units(self._ingest)
|
|
981
|
+
self._instrumentor._ingest_units(self._provider._ingest)
|
|
978
982
|
|
|
979
983
|
@staticmethod
|
|
980
984
|
def chunk_to_json(chunk: Any) -> str:
|
|
@@ -11,7 +11,7 @@ payi/_resource.py,sha256=j2jIkTr8OIC8sU6-05nxSaCyj4MaFlbZrwlyg4_xJos,1088
|
|
|
11
11
|
payi/_response.py,sha256=CfrNS_3wbL8o9dRyRVfZQ5E1GUlA4CUIUEK8olmfGqE,28777
|
|
12
12
|
payi/_streaming.py,sha256=Z_wIyo206T6Jqh2rolFg2VXZgX24PahLmpURp0-NssU,10092
|
|
13
13
|
payi/_types.py,sha256=2mbMK86K3W1aMTW7sOGQ-VND6-A2IuXKm8p4sYFztBU,6141
|
|
14
|
-
payi/_version.py,sha256=
|
|
14
|
+
payi/_version.py,sha256=77vTya8M6fJti_3rBW5AsXnf12UvkIi7HHixVB3nD24,165
|
|
15
15
|
payi/pagination.py,sha256=k2356QGPOUSjRF2vHpwLBdF6P-2vnQzFfRIJQAHGQ7A,1258
|
|
16
16
|
payi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
payi/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
|
|
@@ -24,12 +24,12 @@ payi/_utils/_transform.py,sha256=xfcRTFidCyPhQ7hXeivxpAS0x-NhTyr20iXm1cKcJYk,148
|
|
|
24
24
|
payi/_utils/_typing.py,sha256=nTJz0jcrQbEgxwy4TtAkNxuU0QHHlmc6mQtA6vIR8tg,4501
|
|
25
25
|
payi/_utils/_utils.py,sha256=8UmbPOy_AAr2uUjjFui-VZSrVBHRj6bfNEKRp5YZP2A,12004
|
|
26
26
|
payi/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
|
|
27
|
-
payi/lib/AnthropicInstrumentor.py,sha256=
|
|
28
|
-
payi/lib/BedrockInstrumentor.py,sha256=
|
|
29
|
-
payi/lib/OpenAIInstrumentor.py,sha256=
|
|
27
|
+
payi/lib/AnthropicInstrumentor.py,sha256=OivTBeTnwVYDaaoGDj2V77JmjEb2aKQ6Se295VAm_gg,5615
|
|
28
|
+
payi/lib/BedrockInstrumentor.py,sha256=CDfZ_IDEuQ2PrLcmtKGOX69sPhiKqKNIoZ2c0W0mIR8,10595
|
|
29
|
+
payi/lib/OpenAIInstrumentor.py,sha256=_LV_IqWBAqvBlzbYQHolVmGioTn60-zUkNgp179rIFk,8305
|
|
30
30
|
payi/lib/Stopwatch.py,sha256=7OJlxvr2Jyb6Zr1LYCYKczRB7rDVKkIR7gc4YoleNdE,764
|
|
31
|
-
payi/lib/helpers.py,sha256=
|
|
32
|
-
payi/lib/instrument.py,sha256=
|
|
31
|
+
payi/lib/helpers.py,sha256=ar9so8_sJaEV_Ned-XvCl-JTe5cRoYlL9JQJwp4ZvyQ,3973
|
|
32
|
+
payi/lib/instrument.py,sha256=879NVtiGSbaumaLFAWBurKMR81NPMHKUMBflB7dEEzY,43153
|
|
33
33
|
payi/resources/__init__.py,sha256=1rtrPLWbNt8oJGOp6nwPumKLJ-ftez0B6qwLFyfcoP4,2972
|
|
34
34
|
payi/resources/ingest.py,sha256=ifKMKylIkfCF-uGFPttr_VG3vWxsqntOOBrrU4_g1zk,21627
|
|
35
35
|
payi/resources/categories/__init__.py,sha256=w5gMiPdBSzJA_qfoVtFBElaoe8wGf_O63R7R1Spr6Gk,1093
|
|
@@ -135,7 +135,7 @@ payi/types/use_cases/definitions/kpi_retrieve_response.py,sha256=uQXliSvS3k-yDYw
|
|
|
135
135
|
payi/types/use_cases/definitions/kpi_update_params.py,sha256=jbawdWAdMnsTWVH0qfQGb8W7_TXe3lq4zjSRu44d8p8,373
|
|
136
136
|
payi/types/use_cases/definitions/kpi_update_response.py,sha256=zLyEoT0S8d7XHsnXZYT8tM7yDw0Aze0Mk-_Z6QeMtc8,459
|
|
137
137
|
payi/types/use_cases/definitions/limit_config_create_params.py,sha256=pzQza_16N3z8cFNEKr6gPbFvuGFrwNuGxAYb--Kbo2M,449
|
|
138
|
-
payi-0.1.
|
|
139
|
-
payi-0.1.
|
|
140
|
-
payi-0.1.
|
|
141
|
-
payi-0.1.
|
|
138
|
+
payi-0.1.0a66.dist-info/METADATA,sha256=TdtcpW-1Tv0gvxyq4EBdXLCKZ2A_lSolRCvabKP9F3Q,15290
|
|
139
|
+
payi-0.1.0a66.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
140
|
+
payi-0.1.0a66.dist-info/licenses/LICENSE,sha256=CQt03aM-P4a3Yg5qBg3JSLVoQS3smMyvx7tYg_6V7Gk,11334
|
|
141
|
+
payi-0.1.0a66.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|