payi 0.1.0a63__py3-none-any.whl → 0.1.0a65__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of payi might be problematic. Click here for more details.

@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import logging
3
- from typing import Any, Union
3
+ from typing import Any, Union, Optional
4
+ from typing_extensions import override
4
5
  from importlib.metadata import version
5
6
 
6
7
  import tiktoken # type: ignore
@@ -9,7 +10,7 @@ from wrapt import wrap_function_wrapper # type: ignore
9
10
  from payi.types import IngestUnitsParams
10
11
  from payi.types.ingest_units_params import Units
11
12
 
12
- from .instrument import _IsStreaming, _PayiInstrumentor
13
+ from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
13
14
 
14
15
 
15
16
  class OpenAiInstrumentor:
@@ -63,9 +64,7 @@ def embeddings_wrapper(
63
64
  ) -> Any:
64
65
  return instrumentor.chat_wrapper(
65
66
  "system.openai",
66
- None, # process_chat_chunk,
67
- None, # process_chat_request,
68
- process_ebmeddings_synchronous_response,
67
+ _OpenAiEmbeddingsProviderRequest(instrumentor),
69
68
  _IsStreaming.false,
70
69
  wrapped,
71
70
  instance,
@@ -83,9 +82,7 @@ async def aembeddings_wrapper(
83
82
  ) -> Any:
84
83
  return await instrumentor.achat_wrapper(
85
84
  "system.openai",
86
- None, # process_chat_chunk,
87
- None, # process_chat_request,
88
- process_ebmeddings_synchronous_response,
85
+ _OpenAiEmbeddingsProviderRequest(instrumentor),
89
86
  _IsStreaming.false,
90
87
  wrapped,
91
88
  instance,
@@ -103,9 +100,7 @@ def chat_wrapper(
103
100
  ) -> Any:
104
101
  return instrumentor.chat_wrapper(
105
102
  "system.openai",
106
- process_chat_chunk,
107
- process_chat_request,
108
- process_chat_synchronous_response,
103
+ _OpenAiChatProviderRequest(instrumentor),
109
104
  _IsStreaming.kwargs,
110
105
  wrapped,
111
106
  instance,
@@ -123,9 +118,7 @@ async def achat_wrapper(
123
118
  ) -> Any:
124
119
  return await instrumentor.achat_wrapper(
125
120
  "system.openai",
126
- process_chat_chunk,
127
- process_chat_request,
128
- process_chat_synchronous_response,
121
+ _OpenAiChatProviderRequest(instrumentor),
129
122
  _IsStreaming.kwargs,
130
123
  wrapped,
131
124
  instance,
@@ -133,13 +126,89 @@ async def achat_wrapper(
133
126
  kwargs,
134
127
  )
135
128
 
136
- def process_ebmeddings_synchronous_response(response: str, ingest: IngestUnitsParams, log_prompt_and_response: bool, **kwargs: Any) -> Any: # noqa: ARG001
137
- return process_chat_synchronous_response(response, ingest, log_prompt_and_response, **kwargs)
129
+ class _OpenAiEmbeddingsProviderRequest(_ProviderRequest):
130
+ @override
131
+ def process_synchronous_response(
132
+ self,
133
+ response: Any,
134
+ log_prompt_and_response: bool,
135
+ kwargs: Any) -> Any:
136
+ return process_chat_synchronous_response(response, self._ingest, log_prompt_and_response, self._estimated_prompt_tokens)
137
+
138
+ class _OpenAiChatProviderRequest(_ProviderRequest):
139
+ def __init__(self, instrumentor: _PayiInstrumentor):
140
+ super().__init__(instrumentor)
141
+ self._include_usage_added = False
142
+
143
+ @override
144
+ def process_chunk(self, chunk: Any) -> bool:
145
+ model = model_to_dict(chunk)
146
+
147
+ if "provider_response_id" not in self._ingest:
148
+ response_id = model.get("id", None)
149
+ if response_id:
150
+ self._ingest["provider_response_id"] = response_id
151
+
152
+ send_chunk_to_client = True
153
+
154
+ usage = model.get("usage")
155
+ if usage:
156
+ add_usage_units(usage, self._ingest["units"], self._estimated_prompt_tokens)
157
+
158
+ # If we aded "include_usage" in the request on behalf of the client, do not return the extra
159
+ # packet which contains the usage to the client as they are not expecting the data
160
+ if self._include_usage_added:
161
+ send_chunk_to_client = False
138
162
 
139
- def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams, log_prompt_and_response: bool, **kwargs: Any) -> Any: # noqa: ARG001
163
+ return send_chunk_to_client
164
+
165
+ @override
166
+ def process_request(self, kwargs: Any) -> None: # noqa: ARG001
167
+ messages = kwargs.get("messages", None)
168
+ if not messages or len(messages) == 0:
169
+ return
170
+
171
+ estimated_token_count = 0
172
+ has_image = False
173
+
174
+ try:
175
+ enc = tiktoken.encoding_for_model(kwargs.get("model")) # type: ignore
176
+ except KeyError:
177
+ enc = tiktoken.get_encoding("o200k_base") # type: ignore
178
+
179
+ for message in messages:
180
+ msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
181
+ if msg_has_image:
182
+ has_image = True
183
+ estimated_token_count += msg_prompt_tokens
184
+
185
+ if has_image and estimated_token_count > 0:
186
+ self._estimated_prompt_tokens = estimated_token_count
187
+
188
+ stream: bool = kwargs.get("stream", False)
189
+ if stream:
190
+ add_include_usage = True
191
+
192
+ stream_options: dict[str, Any] = kwargs.get("stream_options", None)
193
+ if stream_options and "include_usage" in stream_options:
194
+ add_include_usage = stream_options["include_usage"] == False
195
+
196
+ if add_include_usage:
197
+ kwargs['stream_options'] = {"include_usage": True}
198
+ self._include_usage_added = True
199
+
200
+ @override
201
+ def process_synchronous_response(
202
+ self,
203
+ response: Any,
204
+ log_prompt_and_response: bool,
205
+ kwargs: Any) -> Any:
206
+ process_chat_synchronous_response(response, self._ingest, log_prompt_and_response, self._estimated_prompt_tokens)
207
+
208
+ def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams, log_prompt_and_response: bool, estimated_prompt_tokens: Optional[int]) -> Any:
140
209
  response_dict = model_to_dict(response)
141
210
 
142
- add_usage_units(response_dict.get("usage", {}), ingest["units"])
211
+ add_usage_units(response_dict.get("usage", {}), ingest["units"], estimated_prompt_tokens)
143
212
 
144
213
  if log_prompt_and_response:
145
214
  ingest["provider_response_json"] = [json.dumps(response_dict)]
@@ -149,19 +218,6 @@ def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams,
149
218
 
150
219
  return None
151
220
 
152
- def process_chat_chunk(chunk: Any, ingest: IngestUnitsParams) -> None:
153
- model = model_to_dict(chunk)
154
-
155
- if "provider_response_id" not in ingest:
156
- response_id = model.get("id", None)
157
- if response_id:
158
- ingest["provider_response_id"] = response_id
159
-
160
- usage = model.get("usage")
161
- if usage:
162
- add_usage_units(usage, ingest["units"])
163
-
164
-
165
221
  def model_to_dict(model: Any) -> Any:
166
222
  if version("pydantic") < "2.0.0":
167
223
  return model.dict()
@@ -173,7 +229,7 @@ def model_to_dict(model: Any) -> Any:
173
229
  return model
174
230
 
175
231
 
176
- def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]") -> None:
232
+ def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]", estimated_prompt_tokens: Optional[int]) -> None:
177
233
  input = usage["prompt_tokens"] if "prompt_tokens" in usage else 0
178
234
  output = usage["completion_tokens"] if "completion_tokens" in usage else 0
179
235
  input_cache = 0
@@ -184,7 +240,7 @@ def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]") -> None:
184
240
  if input_cache != 0:
185
241
  units["text_cache_read"] = Units(input=input_cache, output=0)
186
242
 
187
- input = _PayiInstrumentor.update_for_vision(input - input_cache, units)
243
+ input = _PayiInstrumentor.update_for_vision(input - input_cache, units, estimated_prompt_tokens)
188
244
 
189
245
  units["text"] = Units(input=input, output=output)
190
246
 
@@ -197,28 +253,4 @@ def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'li
197
253
  return has_image, 0
198
254
 
199
255
  token_count = sum(len(encoding.encode(item.get("text", ""))) for item in content if item.get("type") == "text")
200
- return has_image, token_count
201
-
202
- def process_chat_request(ingest: IngestUnitsParams, *args: Any, **kwargs: Any) -> None: # noqa: ARG001
203
- messages = kwargs.get("messages")
204
- if not messages or len(messages) == 0:
205
- return
206
-
207
- estimated_token_count = 0
208
- has_image = False
209
-
210
- try:
211
- enc = tiktoken.encoding_for_model(kwargs.get("model")) # type: ignore
212
- except KeyError:
213
- enc = tiktoken.get_encoding("o200k_base") # type: ignore
214
-
215
- for message in messages:
216
- msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
217
- if msg_has_image:
218
- has_image = True
219
- estimated_token_count += msg_prompt_tokens
220
-
221
- if not has_image or estimated_token_count == 0:
222
- return
223
-
224
- ingest["units"][_PayiInstrumentor.estimated_prompt_tokens] = Units(input=estimated_token_count, output=0)
256
+ return has_image, token_count
payi/lib/helpers.py CHANGED
@@ -49,6 +49,7 @@ def create_headers(
49
49
  use_case_id: Union[str, None] = None,
50
50
  use_case_name: Union[str, None] = None,
51
51
  use_case_version: Union[int, None] = None,
52
+ route_as_resource: Union[str, None] = None,
52
53
  ) -> Dict[str, str]:
53
54
  headers: Dict[str, str] = {}
54
55
 
@@ -68,6 +69,8 @@ def create_headers(
68
69
  headers.update({ PayiHeaderNames.use_case_name: use_case_name})
69
70
  if use_case_version:
70
71
  headers.update({ PayiHeaderNames.use_case_version: str(use_case_version)})
72
+ if route_as_resource:
73
+ headers.update({ PayiHeaderNames.route_as_resource: route_as_resource})
71
74
  return headers
72
75
 
73
76
  def _resolve_payi_base_url(payi_base_url: Union[str, None]) -> str: