payi 0.1.0a64__py3-none-any.whl → 0.1.0a65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of payi might be problematic. Click here for more details.
- payi/_version.py +1 -1
- payi/lib/AnthropicInstrumentor.py +60 -60
- payi/lib/BedrockInstrumentor.py +102 -92
- payi/lib/OpenAIInstrumentor.py +90 -58
- payi/lib/instrument.py +76 -75
- {payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/METADATA +1 -1
- {payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/RECORD +9 -9
- {payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/WHEEL +0 -0
- {payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/licenses/LICENSE +0 -0
payi/_version.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Any, Union
|
|
3
|
+
from typing_extensions import override
|
|
3
4
|
|
|
4
5
|
import tiktoken
|
|
5
6
|
from wrapt import wrap_function_wrapper # type: ignore
|
|
6
7
|
|
|
7
|
-
from payi.types import IngestUnitsParams
|
|
8
8
|
from payi.types.ingest_units_params import Units
|
|
9
9
|
|
|
10
|
-
from .instrument import _IsStreaming, _PayiInstrumentor
|
|
10
|
+
from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class AnthropicIntrumentor:
|
|
@@ -55,9 +55,7 @@ def chat_wrapper(
|
|
|
55
55
|
) -> Any:
|
|
56
56
|
return instrumentor.chat_wrapper(
|
|
57
57
|
"system.anthropic",
|
|
58
|
-
|
|
59
|
-
process_request,
|
|
60
|
-
process_synchronous_response,
|
|
58
|
+
_AnthropicProviderRequest(instrumentor),
|
|
61
59
|
_IsStreaming.kwargs,
|
|
62
60
|
wrapped,
|
|
63
61
|
instance,
|
|
@@ -75,9 +73,7 @@ async def achat_wrapper(
|
|
|
75
73
|
) -> Any:
|
|
76
74
|
return await instrumentor.achat_wrapper(
|
|
77
75
|
"system.anthropic",
|
|
78
|
-
|
|
79
|
-
process_request,
|
|
80
|
-
process_synchronous_response,
|
|
76
|
+
_AnthropicProviderRequest(instrumentor),
|
|
81
77
|
_IsStreaming.kwargs,
|
|
82
78
|
wrapped,
|
|
83
79
|
instance,
|
|
@@ -85,17 +81,39 @@ async def achat_wrapper(
|
|
|
85
81
|
kwargs,
|
|
86
82
|
)
|
|
87
83
|
|
|
84
|
+
class _AnthropicProviderRequest(_ProviderRequest):
|
|
85
|
+
@override
|
|
86
|
+
def process_chunk(self, chunk: Any) -> bool:
|
|
87
|
+
if chunk.type == "message_start":
|
|
88
|
+
self._ingest["provider_response_id"] = chunk.message.id
|
|
88
89
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
ingest["provider_response_id"] = chunk.message.id
|
|
90
|
+
usage = chunk.message.usage
|
|
91
|
+
units = self._ingest["units"]
|
|
92
92
|
|
|
93
|
-
|
|
94
|
-
units = ingest["units"]
|
|
93
|
+
input = _PayiInstrumentor.update_for_vision(usage.input_tokens, units, self._estimated_prompt_tokens)
|
|
95
94
|
|
|
96
|
-
|
|
95
|
+
units["text"] = Units(input=input, output=0)
|
|
97
96
|
|
|
98
|
-
|
|
97
|
+
if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens > 0:
|
|
98
|
+
text_cache_write = usage.cache_creation_input_tokens
|
|
99
|
+
units["text_cache_write"] = Units(input=text_cache_write, output=0)
|
|
100
|
+
|
|
101
|
+
if hasattr(usage, "cache_read_input_tokens") and usage.cache_read_input_tokens > 0:
|
|
102
|
+
text_cache_read = usage.cache_read_input_tokens
|
|
103
|
+
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
104
|
+
|
|
105
|
+
elif chunk.type == "message_delta":
|
|
106
|
+
usage = chunk.usage
|
|
107
|
+
self._ingest["units"]["text"]["output"] = usage.output_tokens
|
|
108
|
+
|
|
109
|
+
return True
|
|
110
|
+
|
|
111
|
+
@override
|
|
112
|
+
def process_synchronous_response(self, response: Any, log_prompt_and_response: bool, kwargs: Any) -> Any:
|
|
113
|
+
usage = response.usage
|
|
114
|
+
input = usage.input_tokens
|
|
115
|
+
output = usage.output_tokens
|
|
116
|
+
units: dict[str, Units] = self._ingest["units"]
|
|
99
117
|
|
|
100
118
|
if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens > 0:
|
|
101
119
|
text_cache_write = usage.cache_creation_input_tokens
|
|
@@ -105,35 +123,37 @@ def process_chunk(chunk: Any, ingest: IngestUnitsParams) -> None:
|
|
|
105
123
|
text_cache_read = usage.cache_read_input_tokens
|
|
106
124
|
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
107
125
|
|
|
108
|
-
|
|
109
|
-
usage = chunk.usage
|
|
110
|
-
ingest["units"]["text"]["output"] = usage.output_tokens
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def process_synchronous_response(response: Any, ingest: IngestUnitsParams, log_prompt_and_response: bool, *args: Any, **kwargs: 'dict[str, Any]') -> Any: # noqa: ARG001
|
|
114
|
-
usage = response.usage
|
|
115
|
-
input = usage.input_tokens
|
|
116
|
-
output = usage.output_tokens
|
|
117
|
-
units: dict[str, Units] = ingest["units"]
|
|
118
|
-
|
|
119
|
-
if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens > 0:
|
|
120
|
-
text_cache_write = usage.cache_creation_input_tokens
|
|
121
|
-
units["text_cache_write"] = Units(input=text_cache_write, output=0)
|
|
126
|
+
input = _PayiInstrumentor.update_for_vision(input, units, self._estimated_prompt_tokens)
|
|
122
127
|
|
|
123
|
-
|
|
124
|
-
text_cache_read = usage.cache_read_input_tokens
|
|
125
|
-
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
128
|
+
units["text"] = Units(input=input, output=output)
|
|
126
129
|
|
|
127
|
-
|
|
130
|
+
if log_prompt_and_response:
|
|
131
|
+
self._ingest["provider_response_json"] = response.to_json()
|
|
132
|
+
|
|
133
|
+
self._ingest["provider_response_id"] = response.id
|
|
134
|
+
|
|
135
|
+
return None
|
|
128
136
|
|
|
129
|
-
|
|
137
|
+
@override
|
|
138
|
+
def process_request(self, kwargs: Any) -> None:
|
|
139
|
+
messages = kwargs.get("messages")
|
|
140
|
+
if not messages or len(messages) == 0:
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
estimated_token_count = 0
|
|
144
|
+
has_image = False
|
|
130
145
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
146
|
+
enc = tiktoken.get_encoding("cl100k_base")
|
|
147
|
+
|
|
148
|
+
for message in messages:
|
|
149
|
+
msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
|
|
150
|
+
if msg_has_image:
|
|
151
|
+
has_image = True
|
|
152
|
+
estimated_token_count += msg_prompt_tokens
|
|
153
|
+
|
|
154
|
+
if not has_image or estimated_token_count == 0:
|
|
155
|
+
return
|
|
156
|
+
self._estimated_prompt_tokens = estimated_token_count
|
|
137
157
|
|
|
138
158
|
def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'list[Any]']) -> 'tuple[bool, int]':
|
|
139
159
|
if isinstance(content, str):
|
|
@@ -146,23 +166,3 @@ def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'li
|
|
|
146
166
|
token_count = sum(len(encoding.encode(item.get("text", ""))) for item in content if item.get("type") == "text")
|
|
147
167
|
return has_image, token_count
|
|
148
168
|
|
|
149
|
-
def process_request(ingest: IngestUnitsParams, *args: Any, **kwargs: Any) -> None: # noqa: ARG001
|
|
150
|
-
messages = kwargs.get("messages")
|
|
151
|
-
if not messages or len(messages) == 0:
|
|
152
|
-
return
|
|
153
|
-
|
|
154
|
-
estimated_token_count = 0
|
|
155
|
-
has_image = False
|
|
156
|
-
|
|
157
|
-
enc = tiktoken.get_encoding("cl100k_base")
|
|
158
|
-
|
|
159
|
-
for message in messages:
|
|
160
|
-
msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
|
|
161
|
-
if msg_has_image:
|
|
162
|
-
has_image = True
|
|
163
|
-
estimated_token_count += msg_prompt_tokens
|
|
164
|
-
|
|
165
|
-
if not has_image or estimated_token_count == 0:
|
|
166
|
-
return
|
|
167
|
-
|
|
168
|
-
ingest["units"][_PayiInstrumentor.estimated_prompt_tokens] = Units(input=estimated_token_count, output=0)
|
payi/lib/BedrockInstrumentor.py
CHANGED
|
@@ -2,13 +2,14 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
from typing import Any
|
|
4
4
|
from functools import wraps
|
|
5
|
+
from typing_extensions import override
|
|
5
6
|
|
|
6
7
|
from wrapt import ObjectProxy, wrap_function_wrapper # type: ignore
|
|
7
8
|
|
|
8
9
|
from payi.types.ingest_units_params import Units, IngestUnitsParams
|
|
9
10
|
from payi.types.pay_i_common_models_api_router_header_info_param import PayICommonModelsAPIRouterHeaderInfoParam
|
|
10
11
|
|
|
11
|
-
from .instrument import _IsStreaming, _PayiInstrumentor
|
|
12
|
+
from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class BedrockInstrumentor:
|
|
@@ -103,9 +104,7 @@ def wrap_invoke(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
103
104
|
if modelId.startswith("meta.llama3") or modelId.startswith("anthropic."):
|
|
104
105
|
return instrumentor.chat_wrapper(
|
|
105
106
|
"system.aws.bedrock",
|
|
106
|
-
|
|
107
|
-
process_invoke_request,
|
|
108
|
-
process_synchronous_invoke_response,
|
|
107
|
+
_BedrockInvokeSynchronousProviderRequest(instrumentor),
|
|
109
108
|
_IsStreaming.false,
|
|
110
109
|
wrapped,
|
|
111
110
|
None,
|
|
@@ -119,14 +118,12 @@ def wrap_invoke(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
119
118
|
def wrap_invoke_stream(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
120
119
|
@wraps(wrapped)
|
|
121
120
|
def invoke_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
122
|
-
|
|
121
|
+
model_id: str = kwargs.get("modelId", "") # type: ignore
|
|
123
122
|
|
|
124
|
-
if
|
|
123
|
+
if model_id.startswith("meta.llama3") or model_id.startswith("anthropic."):
|
|
125
124
|
return instrumentor.chat_wrapper(
|
|
126
125
|
"system.aws.bedrock",
|
|
127
|
-
|
|
128
|
-
process_invoke_request,
|
|
129
|
-
None,
|
|
126
|
+
_BedrockInvokeStreamingProviderRequest(instrumentor, model_id),
|
|
130
127
|
_IsStreaming.true,
|
|
131
128
|
wrapped,
|
|
132
129
|
None,
|
|
@@ -145,9 +142,7 @@ def wrap_converse(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
145
142
|
if modelId.startswith("meta.llama3") or modelId.startswith("anthropic."):
|
|
146
143
|
return instrumentor.chat_wrapper(
|
|
147
144
|
"system.aws.bedrock",
|
|
148
|
-
|
|
149
|
-
process_converse_request,
|
|
150
|
-
process_synchronous_converse_response,
|
|
145
|
+
_BedrockConverseSynchronousProviderRequest(instrumentor),
|
|
151
146
|
_IsStreaming.false,
|
|
152
147
|
wrapped,
|
|
153
148
|
None,
|
|
@@ -161,14 +156,12 @@ def wrap_converse(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
161
156
|
def wrap_converse_stream(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
162
157
|
@wraps(wrapped)
|
|
163
158
|
def invoke_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
164
|
-
|
|
159
|
+
model_id: str = kwargs.get("modelId", "") # type: ignore
|
|
165
160
|
|
|
166
|
-
if
|
|
161
|
+
if model_id.startswith("meta.llama3") or model_id.startswith("anthropic."):
|
|
167
162
|
return instrumentor.chat_wrapper(
|
|
168
163
|
"system.aws.bedrock",
|
|
169
|
-
|
|
170
|
-
process_converse_request,
|
|
171
|
-
None,
|
|
164
|
+
_BedrockConverseStreamingProviderRequest(instrumentor),
|
|
172
165
|
_IsStreaming.true,
|
|
173
166
|
wrapped,
|
|
174
167
|
None,
|
|
@@ -179,104 +172,121 @@ def wrap_converse_stream(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
|
|
|
179
172
|
|
|
180
173
|
return invoke_wrapper
|
|
181
174
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
175
|
+
class _BedrockInvokeStreamingProviderRequest(_ProviderRequest):
|
|
176
|
+
def __init__(self, instrumentor: _PayiInstrumentor, model_id: str):
|
|
177
|
+
super().__init__(instrumentor)
|
|
178
|
+
self._is_anthropic: bool = model_id.startswith("anthropic.")
|
|
185
179
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
180
|
+
@override
|
|
181
|
+
def process_chunk(self, chunk: Any) -> bool:
|
|
182
|
+
if self._is_anthropic:
|
|
183
|
+
return self.process_invoke_streaming_anthropic_chunk(chunk)
|
|
184
|
+
else:
|
|
185
|
+
return self.process_invoke_streaming_llama_chunk(chunk)
|
|
189
186
|
|
|
190
|
-
|
|
187
|
+
def process_invoke_streaming_anthropic_chunk(self, chunk: str) -> bool:
|
|
188
|
+
chunk_dict = json.loads(chunk)
|
|
189
|
+
type = chunk_dict.get("type", "")
|
|
191
190
|
|
|
192
|
-
|
|
191
|
+
if type == "message_start":
|
|
192
|
+
usage = chunk_dict['message']['usage']
|
|
193
|
+
units = self._ingest["units"]
|
|
193
194
|
|
|
194
|
-
|
|
195
|
-
if text_cache_write > 0:
|
|
196
|
-
units["text_cache_write"] = Units(input=text_cache_write, output=0)
|
|
195
|
+
input = _PayiInstrumentor.update_for_vision(usage['input_tokens'], units, self._estimated_prompt_tokens)
|
|
197
196
|
|
|
198
|
-
|
|
199
|
-
if text_cache_read > 0:
|
|
200
|
-
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
197
|
+
units["text"] = Units(input=input, output=0)
|
|
201
198
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
199
|
+
text_cache_write: int = usage.get("cache_creation_input_tokens", 0)
|
|
200
|
+
if text_cache_write > 0:
|
|
201
|
+
units["text_cache_write"] = Units(input=text_cache_write, output=0)
|
|
205
202
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
203
|
+
text_cache_read: int = usage.get("cache_read_input_tokens", 0)
|
|
204
|
+
if text_cache_read > 0:
|
|
205
|
+
units["text_cache_read"] = Units(input=text_cache_read, output=0)
|
|
206
|
+
|
|
207
|
+
elif type == "message_delta":
|
|
208
|
+
usage = chunk_dict['usage']
|
|
209
|
+
self._ingest["units"]["text"]["output"] = usage['output_tokens']
|
|
210
|
+
|
|
211
|
+
return True
|
|
212
|
+
|
|
213
|
+
def process_invoke_streaming_llama_chunk(self, chunk: str) -> bool:
|
|
214
|
+
chunk_dict = json.loads(chunk)
|
|
215
|
+
metrics = chunk_dict.get("amazon-bedrock-invocationMetrics", {})
|
|
216
|
+
if metrics:
|
|
217
|
+
input = metrics.get("inputTokenCount", 0)
|
|
218
|
+
output = metrics.get("outputTokenCount", 0)
|
|
219
|
+
self._ingest["units"]["text"] = Units(input=input, output=output)
|
|
220
|
+
|
|
221
|
+
return True
|
|
222
|
+
|
|
223
|
+
class _BedrockInvokeSynchronousProviderRequest(_ProviderRequest):
|
|
224
|
+
@override
|
|
225
|
+
def process_synchronous_response(
|
|
226
|
+
self,
|
|
215
227
|
response: Any,
|
|
216
|
-
ingest: IngestUnitsParams,
|
|
217
228
|
log_prompt_and_response: bool,
|
|
218
|
-
|
|
219
|
-
**kargs: Any) -> Any: # noqa: ARG001
|
|
229
|
+
kwargs: Any) -> Any:
|
|
220
230
|
|
|
221
|
-
|
|
231
|
+
metadata = response.get("ResponseMetadata", {})
|
|
222
232
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
233
|
+
request_id = metadata.get("RequestId", "")
|
|
234
|
+
if request_id:
|
|
235
|
+
self._ingest["provider_response_id"] = request_id
|
|
226
236
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
237
|
+
response_headers = metadata.get("HTTPHeaders", {}).copy()
|
|
238
|
+
if response_headers:
|
|
239
|
+
self._ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
|
|
230
240
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
241
|
+
response["body"] = InvokeResponseWrapper(
|
|
242
|
+
response=response["body"],
|
|
243
|
+
instrumentor=self._instrumentor,
|
|
244
|
+
ingest=self._ingest,
|
|
245
|
+
log_prompt_and_response=log_prompt_and_response)
|
|
236
246
|
|
|
237
|
-
|
|
247
|
+
return response
|
|
238
248
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
249
|
+
class _BedrockConverseSynchronousProviderRequest(_ProviderRequest):
|
|
250
|
+
@override
|
|
251
|
+
def process_synchronous_response(
|
|
252
|
+
self,
|
|
253
|
+
response: 'dict[str, Any]',
|
|
254
|
+
log_prompt_and_response: bool,
|
|
255
|
+
kwargs: Any) -> Any:
|
|
244
256
|
|
|
245
|
-
|
|
246
|
-
usage = metadata['usage']
|
|
257
|
+
usage = response["usage"]
|
|
247
258
|
input = usage["inputTokens"]
|
|
248
259
|
output = usage["outputTokens"]
|
|
249
|
-
|
|
260
|
+
|
|
261
|
+
units: dict[str, Units] = self._ingest["units"]
|
|
262
|
+
units["text"] = Units(input=input, output=output)
|
|
250
263
|
|
|
251
|
-
|
|
252
|
-
response: 'dict[str, Any]',
|
|
253
|
-
ingest: IngestUnitsParams,
|
|
254
|
-
log_prompt_and_response: bool,
|
|
255
|
-
**kargs: Any) -> Any: # noqa: ARG001
|
|
264
|
+
metadata = response.get("ResponseMetadata", {})
|
|
256
265
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
units: dict[str, Units] = ingest["units"]
|
|
262
|
-
units["text"] = Units(input=input, output=output)
|
|
266
|
+
request_id = metadata.get("RequestId", "")
|
|
267
|
+
if request_id:
|
|
268
|
+
self._ingest["provider_response_id"] = request_id
|
|
263
269
|
|
|
264
|
-
|
|
270
|
+
response_headers = metadata.get("HTTPHeaders", {})
|
|
271
|
+
if response_headers:
|
|
272
|
+
self._ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
|
|
265
273
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
274
|
+
if log_prompt_and_response:
|
|
275
|
+
response_without_metadata = response.copy()
|
|
276
|
+
response_without_metadata.pop("ResponseMetadata", None)
|
|
277
|
+
self._ingest["provider_response_json"] = json.dumps(response_without_metadata)
|
|
269
278
|
|
|
270
|
-
|
|
271
|
-
if response_headers:
|
|
272
|
-
ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
|
|
279
|
+
return None
|
|
273
280
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
281
|
+
class _BedrockConverseStreamingProviderRequest(_ProviderRequest):
|
|
282
|
+
@override
|
|
283
|
+
def process_chunk(self, chunk: 'dict[str, Any]') -> bool:
|
|
284
|
+
metadata = chunk.get("metadata", {})
|
|
278
285
|
|
|
279
|
-
|
|
286
|
+
if metadata:
|
|
287
|
+
usage = metadata['usage']
|
|
288
|
+
input = usage["inputTokens"]
|
|
289
|
+
output = usage["outputTokens"]
|
|
290
|
+
self._ingest["units"]["text"] = Units(input=input, output=output)
|
|
280
291
|
|
|
281
|
-
|
|
282
|
-
return
|
|
292
|
+
return True
|
payi/lib/OpenAIInstrumentor.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Any, Union
|
|
3
|
+
from typing import Any, Union, Optional
|
|
4
|
+
from typing_extensions import override
|
|
4
5
|
from importlib.metadata import version
|
|
5
6
|
|
|
6
7
|
import tiktoken # type: ignore
|
|
@@ -9,7 +10,7 @@ from wrapt import wrap_function_wrapper # type: ignore
|
|
|
9
10
|
from payi.types import IngestUnitsParams
|
|
10
11
|
from payi.types.ingest_units_params import Units
|
|
11
12
|
|
|
12
|
-
from .instrument import _IsStreaming, _PayiInstrumentor
|
|
13
|
+
from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class OpenAiInstrumentor:
|
|
@@ -63,9 +64,7 @@ def embeddings_wrapper(
|
|
|
63
64
|
) -> Any:
|
|
64
65
|
return instrumentor.chat_wrapper(
|
|
65
66
|
"system.openai",
|
|
66
|
-
|
|
67
|
-
None, # process_chat_request,
|
|
68
|
-
process_ebmeddings_synchronous_response,
|
|
67
|
+
_OpenAiEmbeddingsProviderRequest(instrumentor),
|
|
69
68
|
_IsStreaming.false,
|
|
70
69
|
wrapped,
|
|
71
70
|
instance,
|
|
@@ -83,9 +82,7 @@ async def aembeddings_wrapper(
|
|
|
83
82
|
) -> Any:
|
|
84
83
|
return await instrumentor.achat_wrapper(
|
|
85
84
|
"system.openai",
|
|
86
|
-
|
|
87
|
-
None, # process_chat_request,
|
|
88
|
-
process_ebmeddings_synchronous_response,
|
|
85
|
+
_OpenAiEmbeddingsProviderRequest(instrumentor),
|
|
89
86
|
_IsStreaming.false,
|
|
90
87
|
wrapped,
|
|
91
88
|
instance,
|
|
@@ -103,9 +100,7 @@ def chat_wrapper(
|
|
|
103
100
|
) -> Any:
|
|
104
101
|
return instrumentor.chat_wrapper(
|
|
105
102
|
"system.openai",
|
|
106
|
-
|
|
107
|
-
process_chat_request,
|
|
108
|
-
process_chat_synchronous_response,
|
|
103
|
+
_OpenAiChatProviderRequest(instrumentor),
|
|
109
104
|
_IsStreaming.kwargs,
|
|
110
105
|
wrapped,
|
|
111
106
|
instance,
|
|
@@ -123,9 +118,7 @@ async def achat_wrapper(
|
|
|
123
118
|
) -> Any:
|
|
124
119
|
return await instrumentor.achat_wrapper(
|
|
125
120
|
"system.openai",
|
|
126
|
-
|
|
127
|
-
process_chat_request,
|
|
128
|
-
process_chat_synchronous_response,
|
|
121
|
+
_OpenAiChatProviderRequest(instrumentor),
|
|
129
122
|
_IsStreaming.kwargs,
|
|
130
123
|
wrapped,
|
|
131
124
|
instance,
|
|
@@ -133,13 +126,89 @@ async def achat_wrapper(
|
|
|
133
126
|
kwargs,
|
|
134
127
|
)
|
|
135
128
|
|
|
136
|
-
|
|
137
|
-
|
|
129
|
+
class _OpenAiEmbeddingsProviderRequest(_ProviderRequest):
|
|
130
|
+
@override
|
|
131
|
+
def process_synchronous_response(
|
|
132
|
+
self,
|
|
133
|
+
response: Any,
|
|
134
|
+
log_prompt_and_response: bool,
|
|
135
|
+
kwargs: Any) -> Any:
|
|
136
|
+
return process_chat_synchronous_response(response, self._ingest, log_prompt_and_response, self._estimated_prompt_tokens)
|
|
137
|
+
|
|
138
|
+
class _OpenAiChatProviderRequest(_ProviderRequest):
|
|
139
|
+
def __init__(self, instrumentor: _PayiInstrumentor):
|
|
140
|
+
super().__init__(instrumentor)
|
|
141
|
+
self._include_usage_added = False
|
|
142
|
+
|
|
143
|
+
@override
|
|
144
|
+
def process_chunk(self, chunk: Any) -> bool:
|
|
145
|
+
model = model_to_dict(chunk)
|
|
146
|
+
|
|
147
|
+
if "provider_response_id" not in self._ingest:
|
|
148
|
+
response_id = model.get("id", None)
|
|
149
|
+
if response_id:
|
|
150
|
+
self._ingest["provider_response_id"] = response_id
|
|
151
|
+
|
|
152
|
+
send_chunk_to_client = True
|
|
153
|
+
|
|
154
|
+
usage = model.get("usage")
|
|
155
|
+
if usage:
|
|
156
|
+
add_usage_units(usage, self._ingest["units"], self._estimated_prompt_tokens)
|
|
157
|
+
|
|
158
|
+
# If we aded "include_usage" in the request on behalf of the client, do not return the extra
|
|
159
|
+
# packet which contains the usage to the client as they are not expecting the data
|
|
160
|
+
if self._include_usage_added:
|
|
161
|
+
send_chunk_to_client = False
|
|
138
162
|
|
|
139
|
-
|
|
163
|
+
return send_chunk_to_client
|
|
164
|
+
|
|
165
|
+
@override
|
|
166
|
+
def process_request(self, kwargs: Any) -> None: # noqa: ARG001
|
|
167
|
+
messages = kwargs.get("messages", None)
|
|
168
|
+
if not messages or len(messages) == 0:
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
estimated_token_count = 0
|
|
172
|
+
has_image = False
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
enc = tiktoken.encoding_for_model(kwargs.get("model")) # type: ignore
|
|
176
|
+
except KeyError:
|
|
177
|
+
enc = tiktoken.get_encoding("o200k_base") # type: ignore
|
|
178
|
+
|
|
179
|
+
for message in messages:
|
|
180
|
+
msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
|
|
181
|
+
if msg_has_image:
|
|
182
|
+
has_image = True
|
|
183
|
+
estimated_token_count += msg_prompt_tokens
|
|
184
|
+
|
|
185
|
+
if has_image and estimated_token_count > 0:
|
|
186
|
+
self._estimated_prompt_tokens = estimated_token_count
|
|
187
|
+
|
|
188
|
+
stream: bool = kwargs.get("stream", False)
|
|
189
|
+
if stream:
|
|
190
|
+
add_include_usage = True
|
|
191
|
+
|
|
192
|
+
stream_options: dict[str, Any] = kwargs.get("stream_options", None)
|
|
193
|
+
if stream_options and "include_usage" in stream_options:
|
|
194
|
+
add_include_usage = stream_options["include_usage"] == False
|
|
195
|
+
|
|
196
|
+
if add_include_usage:
|
|
197
|
+
kwargs['stream_options'] = {"include_usage": True}
|
|
198
|
+
self._include_usage_added = True
|
|
199
|
+
|
|
200
|
+
@override
|
|
201
|
+
def process_synchronous_response(
|
|
202
|
+
self,
|
|
203
|
+
response: Any,
|
|
204
|
+
log_prompt_and_response: bool,
|
|
205
|
+
kwargs: Any) -> Any:
|
|
206
|
+
process_chat_synchronous_response(response, self._ingest, log_prompt_and_response, self._estimated_prompt_tokens)
|
|
207
|
+
|
|
208
|
+
def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams, log_prompt_and_response: bool, estimated_prompt_tokens: Optional[int]) -> Any:
|
|
140
209
|
response_dict = model_to_dict(response)
|
|
141
210
|
|
|
142
|
-
add_usage_units(response_dict.get("usage", {}), ingest["units"])
|
|
211
|
+
add_usage_units(response_dict.get("usage", {}), ingest["units"], estimated_prompt_tokens)
|
|
143
212
|
|
|
144
213
|
if log_prompt_and_response:
|
|
145
214
|
ingest["provider_response_json"] = [json.dumps(response_dict)]
|
|
@@ -149,19 +218,6 @@ def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams,
|
|
|
149
218
|
|
|
150
219
|
return None
|
|
151
220
|
|
|
152
|
-
def process_chat_chunk(chunk: Any, ingest: IngestUnitsParams) -> None:
|
|
153
|
-
model = model_to_dict(chunk)
|
|
154
|
-
|
|
155
|
-
if "provider_response_id" not in ingest:
|
|
156
|
-
response_id = model.get("id", None)
|
|
157
|
-
if response_id:
|
|
158
|
-
ingest["provider_response_id"] = response_id
|
|
159
|
-
|
|
160
|
-
usage = model.get("usage")
|
|
161
|
-
if usage:
|
|
162
|
-
add_usage_units(usage, ingest["units"])
|
|
163
|
-
|
|
164
|
-
|
|
165
221
|
def model_to_dict(model: Any) -> Any:
|
|
166
222
|
if version("pydantic") < "2.0.0":
|
|
167
223
|
return model.dict()
|
|
@@ -173,7 +229,7 @@ def model_to_dict(model: Any) -> Any:
|
|
|
173
229
|
return model
|
|
174
230
|
|
|
175
231
|
|
|
176
|
-
def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]") -> None:
|
|
232
|
+
def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]", estimated_prompt_tokens: Optional[int]) -> None:
|
|
177
233
|
input = usage["prompt_tokens"] if "prompt_tokens" in usage else 0
|
|
178
234
|
output = usage["completion_tokens"] if "completion_tokens" in usage else 0
|
|
179
235
|
input_cache = 0
|
|
@@ -184,7 +240,7 @@ def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]") -> None:
|
|
|
184
240
|
if input_cache != 0:
|
|
185
241
|
units["text_cache_read"] = Units(input=input_cache, output=0)
|
|
186
242
|
|
|
187
|
-
input = _PayiInstrumentor.update_for_vision(input - input_cache, units)
|
|
243
|
+
input = _PayiInstrumentor.update_for_vision(input - input_cache, units, estimated_prompt_tokens)
|
|
188
244
|
|
|
189
245
|
units["text"] = Units(input=input, output=output)
|
|
190
246
|
|
|
@@ -197,28 +253,4 @@ def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'li
|
|
|
197
253
|
return has_image, 0
|
|
198
254
|
|
|
199
255
|
token_count = sum(len(encoding.encode(item.get("text", ""))) for item in content if item.get("type") == "text")
|
|
200
|
-
return has_image, token_count
|
|
201
|
-
|
|
202
|
-
def process_chat_request(ingest: IngestUnitsParams, *args: Any, **kwargs: Any) -> None: # noqa: ARG001
|
|
203
|
-
messages = kwargs.get("messages")
|
|
204
|
-
if not messages or len(messages) == 0:
|
|
205
|
-
return
|
|
206
|
-
|
|
207
|
-
estimated_token_count = 0
|
|
208
|
-
has_image = False
|
|
209
|
-
|
|
210
|
-
try:
|
|
211
|
-
enc = tiktoken.encoding_for_model(kwargs.get("model")) # type: ignore
|
|
212
|
-
except KeyError:
|
|
213
|
-
enc = tiktoken.get_encoding("o200k_base") # type: ignore
|
|
214
|
-
|
|
215
|
-
for message in messages:
|
|
216
|
-
msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
|
|
217
|
-
if msg_has_image:
|
|
218
|
-
has_image = True
|
|
219
|
-
estimated_token_count += msg_prompt_tokens
|
|
220
|
-
|
|
221
|
-
if not has_image or estimated_token_count == 0:
|
|
222
|
-
return
|
|
223
|
-
|
|
224
|
-
ingest["units"][_PayiInstrumentor.estimated_prompt_tokens] = Units(input=estimated_token_count, output=0)
|
|
256
|
+
return has_image, token_count
|
payi/lib/instrument.py
CHANGED
|
@@ -21,6 +21,21 @@ from .helpers import PayiCategories
|
|
|
21
21
|
from .Stopwatch import Stopwatch
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
class _ProviderRequest:
|
|
25
|
+
def __init__(self, instrumentor: '_PayiInstrumentor'):
|
|
26
|
+
self._instrumentor: '_PayiInstrumentor' = instrumentor
|
|
27
|
+
self._estimated_prompt_tokens: Optional[int] = None
|
|
28
|
+
self._ingest: IngestUnitsParams
|
|
29
|
+
|
|
30
|
+
def process_request(self, _kwargs: Any) -> None:
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
def process_chunk(self, _chunk: Any) -> bool:
|
|
34
|
+
return True
|
|
35
|
+
|
|
36
|
+
def process_synchronous_response(self, response: Any, log_prompt_and_response: bool, kwargs: Any) -> Optional[object]: # noqa: ARG002
|
|
37
|
+
return None
|
|
38
|
+
|
|
24
39
|
class PayiInstrumentConfig(TypedDict, total=False):
|
|
25
40
|
proxy: bool
|
|
26
41
|
global_instrumentation_enabled: bool
|
|
@@ -48,8 +63,6 @@ class _IsStreaming(Enum):
|
|
|
48
63
|
kwargs = 2
|
|
49
64
|
|
|
50
65
|
class _PayiInstrumentor:
|
|
51
|
-
estimated_prompt_tokens: str = "estimated_prompt_tokens"
|
|
52
|
-
|
|
53
66
|
def __init__(
|
|
54
67
|
self,
|
|
55
68
|
payi: Optional[Payi],
|
|
@@ -484,9 +497,7 @@ class _PayiInstrumentor:
|
|
|
484
497
|
async def achat_wrapper(
|
|
485
498
|
self,
|
|
486
499
|
category: str,
|
|
487
|
-
|
|
488
|
-
process_request: Optional[Callable[[IngestUnitsParams, Any, Any], None]],
|
|
489
|
-
process_synchronous_response: Any,
|
|
500
|
+
provider: _ProviderRequest,
|
|
490
501
|
is_streaming: _IsStreaming,
|
|
491
502
|
wrapped: Any,
|
|
492
503
|
instance: Any,
|
|
@@ -511,8 +522,8 @@ class _PayiInstrumentor:
|
|
|
511
522
|
|
|
512
523
|
return await wrapped(*args, **kwargs)
|
|
513
524
|
|
|
514
|
-
|
|
515
|
-
|
|
525
|
+
provider._ingest = {"category": category, "units": {}} # type: ignore
|
|
526
|
+
provider._ingest["resource"] = kwargs.get("model", "")
|
|
516
527
|
|
|
517
528
|
if category == PayiCategories.openai and instance and hasattr(instance, "_client"):
|
|
518
529
|
from .OpenAIInstrumentor import OpenAiInstrumentor # noqa: I001
|
|
@@ -530,21 +541,20 @@ class _PayiInstrumentor:
|
|
|
530
541
|
logging.error("Azure OpenAI invalid resource scope, not ingesting")
|
|
531
542
|
return wrapped(*args, **kwargs)
|
|
532
543
|
|
|
533
|
-
|
|
544
|
+
provider._ingest["resource_scope"] = resource_scope
|
|
534
545
|
|
|
535
546
|
category = PayiCategories.azure_openai
|
|
536
547
|
|
|
537
|
-
|
|
538
|
-
|
|
548
|
+
provider._ingest["category"] = category
|
|
549
|
+
provider._ingest["resource"] = route_as_resource
|
|
539
550
|
|
|
540
551
|
current_frame = inspect.currentframe()
|
|
541
552
|
# f_back excludes the current frame, strip() cleans up whitespace and newlines
|
|
542
553
|
stack = [frame.strip() for frame in traceback.format_stack(current_frame.f_back)] # type: ignore
|
|
543
554
|
|
|
544
|
-
|
|
555
|
+
provider._ingest['properties'] = { 'system.stack_trace': json.dumps(stack) }
|
|
545
556
|
|
|
546
|
-
|
|
547
|
-
process_request(ingest, (), instance)
|
|
557
|
+
provider.process_request(kwargs)
|
|
548
558
|
|
|
549
559
|
sw = Stopwatch()
|
|
550
560
|
stream: bool = False
|
|
@@ -557,7 +567,7 @@ class _PayiInstrumentor:
|
|
|
557
567
|
stream = False
|
|
558
568
|
|
|
559
569
|
try:
|
|
560
|
-
self._prepare_ingest(
|
|
570
|
+
self._prepare_ingest(provider._ingest, extra_headers, **kwargs)
|
|
561
571
|
sw.start()
|
|
562
572
|
response = await wrapped(*args, **kwargs)
|
|
563
573
|
|
|
@@ -575,9 +585,8 @@ class _PayiInstrumentor:
|
|
|
575
585
|
instance=instance,
|
|
576
586
|
instrumentor=self,
|
|
577
587
|
log_prompt_and_response=self._log_prompt_and_response,
|
|
578
|
-
ingest=ingest,
|
|
579
588
|
stopwatch=sw,
|
|
580
|
-
|
|
589
|
+
provider=provider,
|
|
581
590
|
is_bedrock=False,
|
|
582
591
|
)
|
|
583
592
|
|
|
@@ -585,28 +594,25 @@ class _PayiInstrumentor:
|
|
|
585
594
|
|
|
586
595
|
sw.stop()
|
|
587
596
|
duration = sw.elapsed_ms_int()
|
|
588
|
-
|
|
589
|
-
|
|
597
|
+
provider._ingest["end_to_end_latency_ms"] = duration
|
|
598
|
+
provider._ingest["http_status_code"] = 200
|
|
590
599
|
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
log_prompt_and_response=self._log_prompt_and_response,
|
|
596
|
-
instrumentor=self)
|
|
597
|
-
if return_result:
|
|
598
|
-
return return_result
|
|
600
|
+
return_result: Any = provider.process_synchronous_response(
|
|
601
|
+
response=response,
|
|
602
|
+
log_prompt_and_response=self._log_prompt_and_response,
|
|
603
|
+
kwargs=kwargs)
|
|
599
604
|
|
|
600
|
-
|
|
605
|
+
if return_result:
|
|
606
|
+
return return_result
|
|
607
|
+
|
|
608
|
+
await self._aingest_units(provider._ingest)
|
|
601
609
|
|
|
602
610
|
return response
|
|
603
611
|
|
|
604
612
|
def chat_wrapper(
|
|
605
613
|
self,
|
|
606
614
|
category: str,
|
|
607
|
-
|
|
608
|
-
process_request: Optional[Callable[[IngestUnitsParams, Any, Any], None]],
|
|
609
|
-
process_synchronous_response: Any,
|
|
615
|
+
provider: _ProviderRequest,
|
|
610
616
|
is_streaming: _IsStreaming,
|
|
611
617
|
wrapped: Any,
|
|
612
618
|
instance: Any,
|
|
@@ -635,13 +641,13 @@ class _PayiInstrumentor:
|
|
|
635
641
|
|
|
636
642
|
return wrapped(*args, **kwargs)
|
|
637
643
|
|
|
638
|
-
|
|
644
|
+
provider._ingest = {"category": category, "units": {}} # type: ignore
|
|
639
645
|
if is_bedrock:
|
|
640
646
|
# boto3 doesn't allow extra_headers
|
|
641
647
|
kwargs.pop("extra_headers", None)
|
|
642
|
-
|
|
648
|
+
provider._ingest["resource"] = kwargs.get("modelId", "")
|
|
643
649
|
else:
|
|
644
|
-
|
|
650
|
+
provider._ingest["resource"] = kwargs.get("model", "")
|
|
645
651
|
|
|
646
652
|
if category == PayiCategories.openai and instance and hasattr(instance, "_client"):
|
|
647
653
|
from .OpenAIInstrumentor import OpenAiInstrumentor # noqa: I001
|
|
@@ -659,21 +665,20 @@ class _PayiInstrumentor:
|
|
|
659
665
|
logging.error("Azure OpenAI invalid resource scope, not ingesting")
|
|
660
666
|
return wrapped(*args, **kwargs)
|
|
661
667
|
|
|
662
|
-
|
|
668
|
+
provider._ingest["resource_scope"] = resource_scope
|
|
663
669
|
|
|
664
670
|
category = PayiCategories.azure_openai
|
|
665
671
|
|
|
666
|
-
|
|
667
|
-
|
|
672
|
+
provider._ingest["category"] = category
|
|
673
|
+
provider._ingest["resource"] = route_as_resource
|
|
668
674
|
|
|
669
675
|
current_frame = inspect.currentframe()
|
|
670
676
|
# f_back excludes the current frame, strip() cleans up whitespace and newlines
|
|
671
677
|
stack = [frame.strip() for frame in traceback.format_stack(current_frame.f_back)] # type: ignore
|
|
672
678
|
|
|
673
|
-
|
|
679
|
+
provider._ingest['properties'] = { 'system.stack_trace': json.dumps(stack) }
|
|
674
680
|
|
|
675
|
-
|
|
676
|
-
process_request(ingest, (), kwargs)
|
|
681
|
+
provider.process_request(kwargs)
|
|
677
682
|
|
|
678
683
|
sw = Stopwatch()
|
|
679
684
|
stream: bool = False
|
|
@@ -686,7 +691,7 @@ class _PayiInstrumentor:
|
|
|
686
691
|
stream = False
|
|
687
692
|
|
|
688
693
|
try:
|
|
689
|
-
self._prepare_ingest(
|
|
694
|
+
self._prepare_ingest(provider._ingest, extra_headers, **kwargs)
|
|
690
695
|
sw.start()
|
|
691
696
|
response = wrapped(*args, **kwargs)
|
|
692
697
|
|
|
@@ -704,9 +709,8 @@ class _PayiInstrumentor:
|
|
|
704
709
|
instance=instance,
|
|
705
710
|
instrumentor=self,
|
|
706
711
|
log_prompt_and_response=self._log_prompt_and_response,
|
|
707
|
-
ingest=ingest,
|
|
708
712
|
stopwatch=sw,
|
|
709
|
-
|
|
713
|
+
provider=provider,
|
|
710
714
|
is_bedrock=is_bedrock,
|
|
711
715
|
)
|
|
712
716
|
|
|
@@ -721,19 +725,17 @@ class _PayiInstrumentor:
|
|
|
721
725
|
|
|
722
726
|
sw.stop()
|
|
723
727
|
duration = sw.elapsed_ms_int()
|
|
724
|
-
|
|
725
|
-
|
|
728
|
+
provider._ingest["end_to_end_latency_ms"] = duration
|
|
729
|
+
provider._ingest["http_status_code"] = 200
|
|
726
730
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
if return_result:
|
|
734
|
-
return return_result
|
|
731
|
+
return_result: Any = provider.process_synchronous_response(
|
|
732
|
+
response=response,
|
|
733
|
+
log_prompt_and_response=self._log_prompt_and_response,
|
|
734
|
+
kwargs=kwargs)
|
|
735
|
+
if return_result:
|
|
736
|
+
return return_result
|
|
735
737
|
|
|
736
|
-
self._ingest_units(
|
|
738
|
+
self._ingest_units(provider._ingest)
|
|
737
739
|
|
|
738
740
|
return response
|
|
739
741
|
|
|
@@ -808,13 +810,12 @@ class _PayiInstrumentor:
|
|
|
808
810
|
extra_headers[PayiHeaderNames.experience_id] = context_experience_id
|
|
809
811
|
|
|
810
812
|
@staticmethod
|
|
811
|
-
def update_for_vision(input: int, units: 'dict[str, Units]') -> int:
|
|
812
|
-
if
|
|
813
|
-
|
|
814
|
-
vision = input - prompt_token_estimate
|
|
813
|
+
def update_for_vision(input: int, units: 'dict[str, Units]', estimated_prompt_tokens: Optional[int]) -> int:
|
|
814
|
+
if estimated_prompt_tokens:
|
|
815
|
+
vision = input - estimated_prompt_tokens
|
|
815
816
|
if (vision > 0):
|
|
816
817
|
units["vision"] = Units(input=vision, output=0)
|
|
817
|
-
input =
|
|
818
|
+
input = estimated_prompt_tokens
|
|
818
819
|
|
|
819
820
|
return input
|
|
820
821
|
|
|
@@ -856,16 +857,15 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
856
857
|
response: Any,
|
|
857
858
|
instance: Any,
|
|
858
859
|
instrumentor: _PayiInstrumentor,
|
|
859
|
-
ingest: IngestUnitsParams,
|
|
860
860
|
stopwatch: Stopwatch,
|
|
861
|
-
|
|
861
|
+
provider: _ProviderRequest,
|
|
862
862
|
log_prompt_and_response: bool = True,
|
|
863
863
|
is_bedrock: bool = False,
|
|
864
864
|
) -> None:
|
|
865
865
|
|
|
866
866
|
bedrock_from_stream: bool = False
|
|
867
867
|
if is_bedrock:
|
|
868
|
-
|
|
868
|
+
provider._ingest["provider_response_id"] = response["ResponseMetadata"]["RequestId"]
|
|
869
869
|
stream = response.get("stream", None)
|
|
870
870
|
|
|
871
871
|
if stream:
|
|
@@ -882,11 +882,10 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
882
882
|
|
|
883
883
|
self._instrumentor = instrumentor
|
|
884
884
|
self._stopwatch: Stopwatch = stopwatch
|
|
885
|
-
self._ingest: IngestUnitsParams = ingest
|
|
886
885
|
self._log_prompt_and_response: bool = log_prompt_and_response
|
|
887
886
|
self._responses: list[str] = []
|
|
888
887
|
|
|
889
|
-
self.
|
|
888
|
+
self._provider: _ProviderRequest = provider
|
|
890
889
|
|
|
891
890
|
self._first_token: bool = True
|
|
892
891
|
self._is_bedrock: bool = is_bedrock
|
|
@@ -906,7 +905,7 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
906
905
|
|
|
907
906
|
def __iter__(self) -> Any:
|
|
908
907
|
if self._is_bedrock:
|
|
909
|
-
# MUST
|
|
908
|
+
# MUST reside in a separate function so that the yield statement (e.g. the generator) doesn't implicitly return its own iterator and overriding self
|
|
910
909
|
return self._iter_bedrock()
|
|
911
910
|
return self
|
|
912
911
|
|
|
@@ -935,7 +934,9 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
935
934
|
self._stop_iteration()
|
|
936
935
|
raise e
|
|
937
936
|
else:
|
|
938
|
-
self._evaluate_chunk(chunk)
|
|
937
|
+
if self._evaluate_chunk(chunk) == False:
|
|
938
|
+
return self.__next__()
|
|
939
|
+
|
|
939
940
|
return chunk
|
|
940
941
|
|
|
941
942
|
async def __anext__(self) -> Any:
|
|
@@ -946,35 +947,35 @@ class ChatStreamWrapper(ObjectProxy): # type: ignore
|
|
|
946
947
|
await self._astop_iteration()
|
|
947
948
|
raise e
|
|
948
949
|
else:
|
|
949
|
-
self._evaluate_chunk(chunk)
|
|
950
|
+
if self._evaluate_chunk(chunk) == False:
|
|
951
|
+
return await self.__anext__()
|
|
950
952
|
return chunk
|
|
951
953
|
|
|
952
|
-
def _evaluate_chunk(self, chunk: Any) ->
|
|
954
|
+
def _evaluate_chunk(self, chunk: Any) -> bool:
|
|
953
955
|
if self._first_token:
|
|
954
|
-
self._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
|
|
956
|
+
self._provider._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
|
|
955
957
|
self._first_token = False
|
|
956
958
|
|
|
957
959
|
if self._log_prompt_and_response:
|
|
958
960
|
self._responses.append(self.chunk_to_json(chunk))
|
|
959
961
|
|
|
960
|
-
|
|
961
|
-
self._process_chunk(chunk, self._ingest)
|
|
962
|
+
return self._provider.process_chunk(chunk)
|
|
962
963
|
|
|
963
964
|
def _process_stop_iteration(self) -> None:
|
|
964
965
|
self._stopwatch.stop()
|
|
965
|
-
self._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
|
|
966
|
-
self._ingest["http_status_code"] = 200
|
|
966
|
+
self._provider._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
|
|
967
|
+
self._provider._ingest["http_status_code"] = 200
|
|
967
968
|
|
|
968
969
|
if self._log_prompt_and_response:
|
|
969
|
-
self._ingest["provider_response_json"] = self._responses
|
|
970
|
+
self._provider._ingest["provider_response_json"] = self._responses
|
|
970
971
|
|
|
971
972
|
async def _astop_iteration(self) -> None:
|
|
972
973
|
self._process_stop_iteration()
|
|
973
|
-
await self._instrumentor._aingest_units(self._ingest)
|
|
974
|
+
await self._instrumentor._aingest_units(self._provider._ingest)
|
|
974
975
|
|
|
975
976
|
def _stop_iteration(self) -> None:
|
|
976
977
|
self._process_stop_iteration()
|
|
977
|
-
self._instrumentor._ingest_units(self._ingest)
|
|
978
|
+
self._instrumentor._ingest_units(self._provider._ingest)
|
|
978
979
|
|
|
979
980
|
@staticmethod
|
|
980
981
|
def chunk_to_json(chunk: Any) -> str:
|
|
@@ -11,7 +11,7 @@ payi/_resource.py,sha256=j2jIkTr8OIC8sU6-05nxSaCyj4MaFlbZrwlyg4_xJos,1088
|
|
|
11
11
|
payi/_response.py,sha256=CfrNS_3wbL8o9dRyRVfZQ5E1GUlA4CUIUEK8olmfGqE,28777
|
|
12
12
|
payi/_streaming.py,sha256=Z_wIyo206T6Jqh2rolFg2VXZgX24PahLmpURp0-NssU,10092
|
|
13
13
|
payi/_types.py,sha256=2mbMK86K3W1aMTW7sOGQ-VND6-A2IuXKm8p4sYFztBU,6141
|
|
14
|
-
payi/_version.py,sha256=
|
|
14
|
+
payi/_version.py,sha256=DNP1TOymCfAVVUyTcBcwVSj1Nor6vREMNSwCUds3Phk,165
|
|
15
15
|
payi/pagination.py,sha256=k2356QGPOUSjRF2vHpwLBdF6P-2vnQzFfRIJQAHGQ7A,1258
|
|
16
16
|
payi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
payi/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
|
|
@@ -24,12 +24,12 @@ payi/_utils/_transform.py,sha256=xfcRTFidCyPhQ7hXeivxpAS0x-NhTyr20iXm1cKcJYk,148
|
|
|
24
24
|
payi/_utils/_typing.py,sha256=nTJz0jcrQbEgxwy4TtAkNxuU0QHHlmc6mQtA6vIR8tg,4501
|
|
25
25
|
payi/_utils/_utils.py,sha256=8UmbPOy_AAr2uUjjFui-VZSrVBHRj6bfNEKRp5YZP2A,12004
|
|
26
26
|
payi/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
|
|
27
|
-
payi/lib/AnthropicInstrumentor.py,sha256=
|
|
28
|
-
payi/lib/BedrockInstrumentor.py,sha256=
|
|
29
|
-
payi/lib/OpenAIInstrumentor.py,sha256=
|
|
27
|
+
payi/lib/AnthropicInstrumentor.py,sha256=OivTBeTnwVYDaaoGDj2V77JmjEb2aKQ6Se295VAm_gg,5615
|
|
28
|
+
payi/lib/BedrockInstrumentor.py,sha256=CDfZ_IDEuQ2PrLcmtKGOX69sPhiKqKNIoZ2c0W0mIR8,10595
|
|
29
|
+
payi/lib/OpenAIInstrumentor.py,sha256=_LV_IqWBAqvBlzbYQHolVmGioTn60-zUkNgp179rIFk,8305
|
|
30
30
|
payi/lib/Stopwatch.py,sha256=7OJlxvr2Jyb6Zr1LYCYKczRB7rDVKkIR7gc4YoleNdE,764
|
|
31
31
|
payi/lib/helpers.py,sha256=dEscgoiCneUx1rbgayt8P-s-xi0gKiN2vWiKYMS7oiQ,3830
|
|
32
|
-
payi/lib/instrument.py,sha256=
|
|
32
|
+
payi/lib/instrument.py,sha256=zIo8ZdU2qQchC_d48OcH_Df5tYTWI7JGVGm08p2Riak,43079
|
|
33
33
|
payi/resources/__init__.py,sha256=1rtrPLWbNt8oJGOp6nwPumKLJ-ftez0B6qwLFyfcoP4,2972
|
|
34
34
|
payi/resources/ingest.py,sha256=ifKMKylIkfCF-uGFPttr_VG3vWxsqntOOBrrU4_g1zk,21627
|
|
35
35
|
payi/resources/categories/__init__.py,sha256=w5gMiPdBSzJA_qfoVtFBElaoe8wGf_O63R7R1Spr6Gk,1093
|
|
@@ -135,7 +135,7 @@ payi/types/use_cases/definitions/kpi_retrieve_response.py,sha256=uQXliSvS3k-yDYw
|
|
|
135
135
|
payi/types/use_cases/definitions/kpi_update_params.py,sha256=jbawdWAdMnsTWVH0qfQGb8W7_TXe3lq4zjSRu44d8p8,373
|
|
136
136
|
payi/types/use_cases/definitions/kpi_update_response.py,sha256=zLyEoT0S8d7XHsnXZYT8tM7yDw0Aze0Mk-_Z6QeMtc8,459
|
|
137
137
|
payi/types/use_cases/definitions/limit_config_create_params.py,sha256=pzQza_16N3z8cFNEKr6gPbFvuGFrwNuGxAYb--Kbo2M,449
|
|
138
|
-
payi-0.1.
|
|
139
|
-
payi-0.1.
|
|
140
|
-
payi-0.1.
|
|
141
|
-
payi-0.1.
|
|
138
|
+
payi-0.1.0a65.dist-info/METADATA,sha256=tYqX7J8pbMs3G74yZiX9T4cJ6wl3UQYn4OMCOUTnSBc,15290
|
|
139
|
+
payi-0.1.0a65.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
140
|
+
payi-0.1.0a65.dist-info/licenses/LICENSE,sha256=CQt03aM-P4a3Yg5qBg3JSLVoQS3smMyvx7tYg_6V7Gk,11334
|
|
141
|
+
payi-0.1.0a65.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|