not-again-ai 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- from not_again_ai.llm.chat_completion.interface import chat_completion
1
+ from not_again_ai.llm.chat_completion.interface import chat_completion, chat_completion_stream
2
2
  from not_again_ai.llm.chat_completion.types import ChatCompletionRequest
3
3
 
4
- __all__ = ["ChatCompletionRequest", "chat_completion"]
4
+ __all__ = ["ChatCompletionRequest", "chat_completion", "chat_completion_stream"]
@@ -1,9 +1,9 @@
1
- from collections.abc import Callable
1
+ from collections.abc import AsyncGenerator, Callable
2
2
  from typing import Any
3
3
 
4
- from not_again_ai.llm.chat_completion.providers.ollama_api import ollama_chat_completion
5
- from not_again_ai.llm.chat_completion.providers.openai_api import openai_chat_completion
6
- from not_again_ai.llm.chat_completion.types import ChatCompletionRequest, ChatCompletionResponse
4
+ from not_again_ai.llm.chat_completion.providers.ollama_api import ollama_chat_completion, ollama_chat_completion_stream
5
+ from not_again_ai.llm.chat_completion.providers.openai_api import openai_chat_completion, openai_chat_completion_stream
6
+ from not_again_ai.llm.chat_completion.types import ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse
7
7
 
8
8
 
9
9
  def chat_completion(
@@ -30,3 +30,32 @@ def chat_completion(
30
30
  return ollama_chat_completion(request, client)
31
31
  else:
32
32
  raise ValueError(f"Provider {provider} not supported")
33
+
34
+
35
+ async def chat_completion_stream(
36
+ request: ChatCompletionRequest,
37
+ provider: str,
38
+ client: Callable[..., Any],
39
+ ) -> AsyncGenerator[ChatCompletionChunk, None]:
40
+ """Stream a chat completion response from the given provider. Currently supported providers:
41
+ - `openai` - OpenAI
42
+ - `azure_openai` - Azure OpenAI
43
+ - `ollama` - Ollama
44
+
45
+ Args:
46
+ request: Request parameter object
47
+ provider: The supported provider name
48
+ client: Client information, see the provider's implementation for what can be provided
49
+
50
+ Returns:
51
+ AsyncGenerator[ChatCompletionChunk, None]
52
+ """
53
+ request.stream = True
54
+ if provider == "openai" or provider == "azure_openai":
55
+ async for chunk in openai_chat_completion_stream(request, client):
56
+ yield chunk
57
+ elif provider == "ollama":
58
+ async for chunk in ollama_chat_completion_stream(request, client):
59
+ yield chunk
60
+ else:
61
+ raise ValueError(f"Provider {provider} not supported")
@@ -1,4 +1,4 @@
1
- from collections.abc import Callable
1
+ from collections.abc import AsyncGenerator, Callable
2
2
  import json
3
3
  import os
4
4
  import re
@@ -6,14 +6,20 @@ import time
6
6
  from typing import Any, Literal, cast
7
7
 
8
8
  from loguru import logger
9
- from ollama import ChatResponse, Client, ResponseError
9
+ from ollama import AsyncClient, ChatResponse, Client, ResponseError
10
10
 
11
11
  from not_again_ai.llm.chat_completion.types import (
12
12
  AssistantMessage,
13
13
  ChatCompletionChoice,
14
+ ChatCompletionChoiceStream,
15
+ ChatCompletionChunk,
16
+ ChatCompletionDelta,
14
17
  ChatCompletionRequest,
15
18
  ChatCompletionResponse,
16
19
  Function,
20
+ PartialFunction,
21
+ PartialToolCall,
22
+ Role,
17
23
  ToolCall,
18
24
  )
19
25
 
@@ -28,6 +34,7 @@ OLLAMA_PARAMETER_MAP = {
28
34
  "logit_bias": None,
29
35
  "top_logprobs": None,
30
36
  "presence_penalty": None,
37
+ "max_tokens": "num_predict",
31
38
  }
32
39
 
33
40
 
@@ -45,15 +52,13 @@ def validate(request: ChatCompletionRequest) -> None:
45
52
  logger.warning("Parameter 'stop' needs to be a string and not a list. It will be ignored.")
46
53
  request.stop = None
47
54
 
55
+ # Raise an error if both "max_tokens" and "max_completion_tokens" are provided
56
+ if request.max_tokens is not None and request.max_completion_tokens is not None:
57
+ raise ValueError("`max_tokens` and `max_completion_tokens` cannot both be provided.")
48
58
 
49
- def ollama_chat_completion(
50
- request: ChatCompletionRequest,
51
- client: Callable[..., Any],
52
- ) -> ChatCompletionResponse:
53
- validate(request)
54
59
 
60
+ def format_kwargs(request: ChatCompletionRequest) -> dict[str, Any]:
55
61
  kwargs = request.model_dump(mode="json", exclude_none=True)
56
-
57
62
  # For each key in OLLAMA_PARAMETER_MAP
58
63
  # If it is not None, set the key in kwargs to the value of the corresponding value in OLLAMA_PARAMETER_MAP
59
64
  # If it is None, remove that key from kwargs
@@ -136,6 +141,16 @@ def ollama_chat_completion(
136
141
  logger.warning("Ollama model only supports a single image per message. Using only the first images.")
137
142
  message["images"] = images
138
143
 
144
+ return kwargs
145
+
146
+
147
+ def ollama_chat_completion(
148
+ request: ChatCompletionRequest,
149
+ client: Callable[..., Any],
150
+ ) -> ChatCompletionResponse:
151
+ validate(request)
152
+ kwargs = format_kwargs(request)
153
+
139
154
  try:
140
155
  start_time = time.time()
141
156
  response: ChatResponse = client(**kwargs)
@@ -159,7 +174,7 @@ def ollama_chat_completion(
159
174
  tool_name = tool_call.function.name
160
175
  if request.tools and tool_name not in [tool["function"]["name"] for tool in request.tools]:
161
176
  errors += f"Tool call {tool_call} has an invalid tool name: {tool_name}\n"
162
- tool_args = tool_call.function.arguments
177
+ tool_args = dict(tool_call.function.arguments)
163
178
  parsed_tool_calls.append(
164
179
  ToolCall(
165
180
  id="",
@@ -201,7 +216,65 @@ def ollama_chat_completion(
201
216
  )
202
217
 
203
218
 
204
- def ollama_client(host: str | None = None, timeout: float | None = None) -> Callable[..., Any]:
219
+ async def ollama_chat_completion_stream(
220
+ request: ChatCompletionRequest,
221
+ client: Callable[..., Any],
222
+ ) -> AsyncGenerator[ChatCompletionChunk, None]:
223
+ validate(request)
224
+ kwargs = format_kwargs(request)
225
+
226
+ start_time = time.time()
227
+ stream = await client(**kwargs)
228
+
229
+ async for chunk in stream:
230
+ errors = ""
231
+ # Handle tool calls
232
+ tool_calls: list[PartialToolCall] | None = None
233
+ if chunk.message.tool_calls:
234
+ parsed_tool_calls: list[PartialToolCall] = []
235
+ for tool_call in chunk.message.tool_calls:
236
+ tool_name = tool_call.function.name
237
+ if request.tools and tool_name not in [tool["function"]["name"] for tool in request.tools]:
238
+ errors += f"Tool call {tool_call} has an invalid tool name: {tool_name}\n"
239
+ tool_args = tool_call.function.arguments
240
+
241
+ parsed_tool_calls.append(
242
+ PartialToolCall(
243
+ id="",
244
+ function=PartialFunction(
245
+ name=tool_name,
246
+ arguments=tool_args,
247
+ ),
248
+ )
249
+ )
250
+ tool_calls = parsed_tool_calls
251
+
252
+ current_time = time.time()
253
+ response_duration = round(current_time - start_time, 4)
254
+
255
+ delta = ChatCompletionDelta(
256
+ content=chunk.message.content or "",
257
+ role=Role.ASSISTANT,
258
+ tool_calls=tool_calls,
259
+ )
260
+ choice_obj = ChatCompletionChoiceStream(
261
+ delta=delta,
262
+ finish_reason=chunk.done_reason,
263
+ index=0,
264
+ )
265
+ chunk_obj = ChatCompletionChunk(
266
+ choices=[choice_obj],
267
+ errors=errors.strip(),
268
+ completion_tokens=chunk.get("eval_count", None),
269
+ prompt_tokens=chunk.get("prompt_eval_count", None),
270
+ response_duration=response_duration,
271
+ )
272
+ yield chunk_obj
273
+
274
+
275
+ def ollama_client(
276
+ host: str | None = None, timeout: float | None = None, async_client: bool = False
277
+ ) -> Callable[..., Any]:
205
278
  """Create an Ollama client instance based on the specified host or will read from the OLLAMA_HOST environment variable.
206
279
 
207
280
  Args:
@@ -221,7 +294,7 @@ def ollama_client(host: str | None = None, timeout: float | None = None) -> Call
221
294
  host = "http://localhost:11434"
222
295
 
223
296
  def client_callable(**kwargs: Any) -> Any:
224
- client = Client(host=host, timeout=timeout)
297
+ client = AsyncClient(host=host, timeout=timeout) if async_client else Client(host=host, timeout=timeout)
225
298
  return client.chat(**kwargs)
226
299
 
227
300
  return client_callable
@@ -1,17 +1,23 @@
1
- from collections.abc import Callable
1
+ from collections.abc import AsyncGenerator, Callable, Coroutine
2
2
  import json
3
3
  import time
4
4
  from typing import Any, Literal
5
5
 
6
6
  from azure.identity import DefaultAzureCredential, get_bearer_token_provider
7
- from openai import AzureOpenAI, OpenAI
7
+ from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
8
8
 
9
9
  from not_again_ai.llm.chat_completion.types import (
10
10
  AssistantMessage,
11
11
  ChatCompletionChoice,
12
+ ChatCompletionChoiceStream,
13
+ ChatCompletionChunk,
14
+ ChatCompletionDelta,
12
15
  ChatCompletionRequest,
13
16
  ChatCompletionResponse,
14
17
  Function,
18
+ PartialFunction,
19
+ PartialToolCall,
20
+ Role,
15
21
  ToolCall,
16
22
  )
17
23
 
@@ -31,13 +37,12 @@ def validate(request: ChatCompletionRequest) -> None:
31
37
  if request.json_mode and request.structured_outputs is not None:
32
38
  raise ValueError("json_schema and json_mode cannot be used together.")
33
39
 
40
+ # Raise an error if both "max_tokens" and "max_completion_tokens" are provided
41
+ if request.max_tokens is not None and request.max_completion_tokens is not None:
42
+ raise ValueError("`max_tokens` and `max_completion_tokens` cannot both be provided.")
34
43
 
35
- def openai_chat_completion(
36
- request: ChatCompletionRequest,
37
- client: Callable[..., Any],
38
- ) -> ChatCompletionResponse:
39
- validate(request)
40
44
 
45
+ def format_kwargs(request: ChatCompletionRequest) -> dict[str, Any]:
41
46
  # Format the response format parameters to be compatible with OpenAI API
42
47
  if request.json_mode:
43
48
  response_format: dict[str, Any] = {"type": "json_object"}
@@ -57,7 +62,6 @@ def openai_chat_completion(
57
62
  elif value is None and key in kwargs:
58
63
  del kwargs[key]
59
64
 
60
- # Iterate over each message and
61
65
  for message in kwargs["messages"]:
62
66
  role = message.get("role", None)
63
67
  # For each ToolMessage, change the "name" field to be named "tool_call_id" instead
@@ -80,6 +84,49 @@ def openai_chat_completion(
80
84
  if request.tool_choice is not None and request.tool_choice not in ["none", "auto", "required"]:
81
85
  kwargs["tool_choice"] = {"type": "function", "function": {"name": request.tool_choice}}
82
86
 
87
+ return kwargs
88
+
89
+
90
+ def process_logprobs(logprobs_content: list[dict[str, Any]]) -> list[dict[str, Any] | list[dict[str, Any]]]:
91
+ """Process logprobs content from OpenAI API response.
92
+
93
+ Args:
94
+ logprobs_content: List of logprob entries from the API response
95
+
96
+ Returns:
97
+ Processed logprobs list containing either single token info or lists of top token infos
98
+ """
99
+ logprobs_list: list[dict[str, Any] | list[dict[str, Any]]] = []
100
+ for logprob in logprobs_content:
101
+ if logprob.get("top_logprobs", None):
102
+ curr_logprob_infos: list[dict[str, Any]] = []
103
+ for top_logprob in logprob.get("top_logprobs", []):
104
+ curr_logprob_infos.append(
105
+ {
106
+ "token": top_logprob.get("token", ""),
107
+ "logprob": top_logprob.get("logprob", 0),
108
+ "bytes": top_logprob.get("bytes", 0),
109
+ }
110
+ )
111
+ logprobs_list.append(curr_logprob_infos)
112
+ else:
113
+ logprobs_list.append(
114
+ {
115
+ "token": logprob.get("token", ""),
116
+ "logprob": logprob.get("logprob", 0),
117
+ "bytes": logprob.get("bytes", 0),
118
+ }
119
+ )
120
+ return logprobs_list
121
+
122
+
123
+ def openai_chat_completion(
124
+ request: ChatCompletionRequest,
125
+ client: Callable[..., Any],
126
+ ) -> ChatCompletionResponse:
127
+ validate(request)
128
+ kwargs = format_kwargs(request)
129
+
83
130
  start_time = time.time()
84
131
  response = client(**kwargs)
85
132
  end_time = time.time()
@@ -129,28 +176,7 @@ def openai_chat_completion(
129
176
  # Handle logprobs
130
177
  logprobs: list[dict[str, Any] | list[dict[str, Any]]] | None = None
131
178
  if choice.get("logprobs", None) and choice["logprobs"].get("content", None) is not None:
132
- logprobs_list: list[dict[str, Any] | list[dict[str, Any]]] = []
133
- for logprob in choice["logprobs"]["content"]:
134
- if logprob.get("top_logprobs", None):
135
- curr_logprob_infos: list[dict[str, Any]] = []
136
- for top_logprob in logprob.get("top_logprobs", []):
137
- curr_logprob_infos.append(
138
- {
139
- "token": top_logprob.get("token", ""),
140
- "logprob": top_logprob.get("logprob", 0),
141
- "bytes": top_logprob.get("bytes", 0),
142
- }
143
- )
144
- logprobs_list.append(curr_logprob_infos)
145
- else:
146
- logprobs_list.append(
147
- {
148
- "token": logprob.get("token", ""),
149
- "logprob": logprob.get("logprob", 0),
150
- "bytes": logprob.get("bytes", 0),
151
- }
152
- )
153
- logprobs = logprobs_list
179
+ logprobs = process_logprobs(choice["logprobs"]["content"])
154
180
 
155
181
  # Handle extras that OpenAI or Azure OpenAI return
156
182
  if choice.get("content_filter_results", None):
@@ -191,6 +217,107 @@ def openai_chat_completion(
191
217
  )
192
218
 
193
219
 
220
+ async def openai_chat_completion_stream(
221
+ request: ChatCompletionRequest,
222
+ client: Callable[..., Any],
223
+ ) -> AsyncGenerator[ChatCompletionChunk, None]:
224
+ validate(request)
225
+ kwargs = format_kwargs(request)
226
+
227
+ start_time = time.time()
228
+ stream = await client(**kwargs)
229
+
230
+ async for chunk in stream:
231
+ errors = ""
232
+ # This kind of a hack. To make this processing generic for clients that do not return the correct
233
+ # data structure, we convert the chunk to a dict
234
+ if not isinstance(chunk, dict):
235
+ chunk = chunk.to_dict()
236
+
237
+ choices: list[ChatCompletionChoiceStream] = []
238
+ for choice in chunk["choices"]:
239
+ content = choice.get("delta", {}).get("content", "")
240
+ if not content:
241
+ content = ""
242
+
243
+ role = Role.ASSISTANT
244
+ if choice.get("delta", {}).get("role", None):
245
+ role = Role(choice["delta"]["role"])
246
+
247
+ # Handle tool calls
248
+ tool_calls: list[PartialToolCall] | None = None
249
+ if choice["delta"].get("tool_calls", None):
250
+ parsed_tool_calls: list[PartialToolCall] = []
251
+ for tool_call in choice["delta"]["tool_calls"]:
252
+ tool_name = tool_call.get("function", {}).get("name", None)
253
+ if not tool_name:
254
+ tool_name = ""
255
+ tool_args = tool_call.get("function", {}).get("arguments", "")
256
+ if not tool_args:
257
+ tool_args = ""
258
+
259
+ tool_id = tool_call.get("id", None)
260
+ parsed_tool_calls.append(
261
+ PartialToolCall(
262
+ id=tool_id,
263
+ function=PartialFunction(
264
+ name=tool_name,
265
+ arguments=tool_args,
266
+ ),
267
+ )
268
+ )
269
+ tool_calls = parsed_tool_calls
270
+
271
+ refusal = None
272
+ if choice["delta"].get("refusal", None):
273
+ refusal = choice["delta"]["refusal"]
274
+
275
+ delta = ChatCompletionDelta(
276
+ content=content,
277
+ role=role,
278
+ tool_calls=tool_calls,
279
+ refusal=refusal,
280
+ )
281
+
282
+ index = choice.get("index", 0)
283
+ finish_reason = choice.get("finish_reason", None)
284
+
285
+ # Handle logprobs
286
+ logprobs: list[dict[str, Any] | list[dict[str, Any]]] | None = None
287
+ if choice.get("logprobs", None) and choice["logprobs"].get("content", None) is not None:
288
+ logprobs = process_logprobs(choice["logprobs"]["content"])
289
+
290
+ choice_obj = ChatCompletionChoiceStream(
291
+ delta=delta,
292
+ finish_reason=finish_reason,
293
+ logprobs=logprobs,
294
+ index=index,
295
+ )
296
+ choices.append(choice_obj)
297
+
298
+ current_time = time.time()
299
+ response_duration = round(current_time - start_time, 4)
300
+
301
+ if "usage" in chunk and chunk["usage"] is not None:
302
+ completion_tokens = chunk["usage"].get("completion_tokens", None)
303
+ prompt_tokens = chunk["usage"].get("prompt_tokens", None)
304
+ system_fingerprint = chunk.get("system_fingerprint", None)
305
+ else:
306
+ completion_tokens = None
307
+ prompt_tokens = None
308
+ system_fingerprint = None
309
+
310
+ chunk_obj = ChatCompletionChunk(
311
+ choices=choices,
312
+ errors=errors.strip(),
313
+ completion_tokens=completion_tokens,
314
+ prompt_tokens=prompt_tokens,
315
+ response_duration=response_duration,
316
+ system_fingerprint=system_fingerprint,
317
+ )
318
+ yield chunk_obj
319
+
320
+
194
321
  def create_client_callable(client_class: type[OpenAI | AzureOpenAI], **client_args: Any) -> Callable[..., Any]:
195
322
  """Creates a callable that instantiates and uses an OpenAI client.
196
323
 
@@ -211,6 +338,20 @@ def create_client_callable(client_class: type[OpenAI | AzureOpenAI], **client_ar
211
338
  return client_callable
212
339
 
213
340
 
341
+ def create_client_callable_stream(
342
+ client_class: type[AsyncOpenAI | AsyncAzureOpenAI], **client_args: Any
343
+ ) -> Callable[..., Any]:
344
+ filtered_args = {k: v for k, v in client_args.items() if v is not None}
345
+
346
+ def client_callable(**kwargs: Any) -> Coroutine[Any, Any, Any]:
347
+ client = client_class(**filtered_args)
348
+ kwargs["stream_options"] = {"include_usage": True}
349
+ stream = client.chat.completions.create(**kwargs)
350
+ return stream
351
+
352
+ return client_callable
353
+
354
+
214
355
  class InvalidOAIAPITypeError(Exception):
215
356
  """Raised when an invalid OAIAPIType string is provided."""
216
357
 
@@ -223,6 +364,7 @@ def openai_client(
223
364
  azure_endpoint: str | None = None,
224
365
  timeout: float | None = None,
225
366
  max_retries: int | None = None,
367
+ async_client: bool = False,
226
368
  ) -> Callable[..., Any]:
227
369
  """Create an OpenAI or Azure OpenAI client instance based on the specified API type and other provided parameters.
228
370
 
@@ -243,11 +385,11 @@ def openai_client(
243
385
  max_retries (int, optional): Certain errors are automatically retried 2 times by default,
244
386
  with a short exponential backoff. Connection errors (for example, due to a network connectivity problem),
245
387
  408 Request Timeout, 409 Conflict, 429 Rate Limit, and >=500 Internal errors are all retried by default.
388
+ async_client (bool, optional): Whether to return an async client. Defaults to False.
246
389
 
247
390
  Returns:
248
391
  Callable[..., Any]: A callable that creates a client and returns completion results
249
392
 
250
-
251
393
  Raises:
252
394
  InvalidOAIAPITypeError: If an invalid API type string is provided.
253
395
  NotImplementedError: If the specified API type is recognized but not yet supported (e.g., 'azure_openai').
@@ -256,17 +398,21 @@ def openai_client(
256
398
  raise InvalidOAIAPITypeError(f"Invalid OAIAPIType: {api_type}. Must be 'openai' or 'azure_openai'.")
257
399
 
258
400
  if api_type == "openai":
259
- return create_client_callable(
260
- OpenAI,
401
+ client_class = AsyncOpenAI if async_client else OpenAI
402
+ callable_creator = create_client_callable_stream if async_client else create_client_callable
403
+ return callable_creator(
404
+ client_class, # type: ignore
261
405
  api_key=api_key,
262
406
  organization=organization,
263
407
  timeout=timeout,
264
408
  max_retries=max_retries,
265
409
  )
266
410
  elif api_type == "azure_openai":
411
+ azure_client_class = AsyncAzureOpenAI if async_client else AzureOpenAI
412
+ callable_creator = create_client_callable_stream if async_client else create_client_callable
267
413
  if api_key:
268
- return create_client_callable(
269
- AzureOpenAI,
414
+ return callable_creator(
415
+ azure_client_class, # type: ignore
270
416
  api_version=aoai_api_version,
271
417
  azure_endpoint=azure_endpoint,
272
418
  api_key=api_key,
@@ -278,8 +424,8 @@ def openai_client(
278
424
  ad_token_provider = get_bearer_token_provider(
279
425
  azure_credential, "https://cognitiveservices.azure.com/.default"
280
426
  )
281
- return create_client_callable(
282
- AzureOpenAI,
427
+ return callable_creator(
428
+ azure_client_class, # type: ignore
283
429
  api_version=aoai_api_version,
284
430
  azure_endpoint=azure_endpoint,
285
431
  azure_ad_token_provider=ad_token_provider,
@@ -52,12 +52,23 @@ class Function(BaseModel):
52
52
  arguments: dict[str, Any]
53
53
 
54
54
 
55
+ class PartialFunction(BaseModel):
56
+ name: str
57
+ arguments: str | dict[str, Any]
58
+
59
+
55
60
  class ToolCall(BaseModel):
56
61
  id: str
57
62
  function: Function
58
63
  type: Literal["function"] = "function"
59
64
 
60
65
 
66
+ class PartialToolCall(BaseModel):
67
+ id: str | None
68
+ function: PartialFunction
69
+ type: Literal["function"] = "function"
70
+
71
+
61
72
  class DeveloperMessage(BaseMessage[str]):
62
73
  role: Literal[Role.DEVELOPER] = Role.DEVELOPER
63
74
 
@@ -87,6 +98,7 @@ MessageT = AssistantMessage | DeveloperMessage | SystemMessage | ToolMessage | U
87
98
  class ChatCompletionRequest(BaseModel):
88
99
  messages: list[MessageT]
89
100
  model: str
101
+ stream: bool = Field(default=False)
90
102
 
91
103
  max_completion_tokens: int | None = Field(default=None)
92
104
  context_window: int | None = Field(default=None)
@@ -118,6 +130,11 @@ class ChatCompletionRequest(BaseModel):
118
130
  top_k: int | None = Field(default=None)
119
131
  min_p: float | None = Field(default=None)
120
132
 
133
+ max_tokens: int | None = Field(
134
+ default=None,
135
+ description="Sometimes `max_completion_tokens` is not correctly supported so we provide this as a fallback.",
136
+ )
137
+
121
138
 
122
139
  class ChatCompletionChoice(BaseModel):
123
140
  message: AssistantMessage
@@ -143,3 +160,35 @@ class ChatCompletionResponse(BaseModel):
143
160
  system_fingerprint: str | None = Field(default=None)
144
161
 
145
162
  extras: Any | None = Field(default=None)
163
+
164
+
165
+ class ChatCompletionDelta(BaseModel):
166
+ content: str
167
+ role: Role = Field(default=Role.ASSISTANT)
168
+
169
+ tool_calls: list[PartialToolCall] | None = Field(default=None)
170
+
171
+ refusal: str | None = Field(default=None)
172
+
173
+
174
+ class ChatCompletionChoiceStream(BaseModel):
175
+ delta: ChatCompletionDelta
176
+ index: int
177
+ finish_reason: Literal["stop", "length", "tool_calls", "content_filter"] | None
178
+
179
+ logprobs: list[dict[str, Any] | list[dict[str, Any]]] | None = Field(default=None)
180
+
181
+ extras: Any | None = Field(default=None)
182
+
183
+
184
+ class ChatCompletionChunk(BaseModel):
185
+ choices: list[ChatCompletionChoiceStream]
186
+
187
+ errors: str = Field(default="")
188
+
189
+ completion_tokens: int | None = Field(default=None)
190
+ prompt_tokens: int | None = Field(default=None)
191
+ response_duration: float | None = Field(default=None)
192
+
193
+ system_fingerprint: str | None = Field(default=None)
194
+ extras: Any | None = Field(default=None)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: not-again-ai
3
- Version: 0.16.0
3
+ Version: 0.17.0
4
4
  Summary: Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place.
5
5
  License: MIT
6
6
  Author: DaveCoDev
@@ -27,7 +27,7 @@ Requires-Dist: numpy (>=2.2) ; extra == "viz"
27
27
  Requires-Dist: ollama (>=0.4) ; extra == "llm"
28
28
  Requires-Dist: openai (>=1) ; extra == "llm"
29
29
  Requires-Dist: pandas (>=2.2) ; extra == "viz"
30
- Requires-Dist: playwright (>=1.49) ; extra == "data"
30
+ Requires-Dist: playwright (>=1.50) ; extra == "data"
31
31
  Requires-Dist: pydantic (>=2.10)
32
32
  Requires-Dist: pytest-playwright (>=0.7) ; extra == "data"
33
33
  Requires-Dist: python-liquid (>=1.12) ; extra == "llm"
@@ -5,12 +5,12 @@ not_again_ai/base/parallel.py,sha256=fcYhKBYBWvob84iKp3O93wvFFdXeidljZsShgBLTNGA
5
5
  not_again_ai/data/__init__.py,sha256=1jF6mwvtB2PT7IEc3xpbRtZm3g3Lyf8zUqH4AEE4qlQ,244
6
6
  not_again_ai/data/web.py,sha256=wjx9cc33jcoJBGonYCIpwygPBFOwz7F-dx_ominmbnI,1838
7
7
  not_again_ai/llm/__init__.py,sha256=_wNUL6FDaT369Z8W48FsaC_NkcOZ-ib2MMUvnaLOS-0,451
8
- not_again_ai/llm/chat_completion/__init__.py,sha256=a2qmmmrXjMKyHGZDjt_xdqYbSrEOBea_VvZArzMboe0,200
9
- not_again_ai/llm/chat_completion/interface.py,sha256=FCyE-1gLdhwuS0Lv8iTbZvraa4iZjnKB8qb31WF53uk,1204
8
+ not_again_ai/llm/chat_completion/__init__.py,sha256=HozawvdRkTFgq8XR16GJUHN1ukEa4Ya68wVPVrl-afs,250
9
+ not_again_ai/llm/chat_completion/interface.py,sha256=u9DLDOv3Vp2IfvmgsRM2va6EK9NKdmkk0tXO2P9nLuY,2308
10
10
  not_again_ai/llm/chat_completion/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- not_again_ai/llm/chat_completion/providers/ollama_api.py,sha256=iBTMyF8edo8uxxrorNPtShzmCXG7m0RlEBunWLSO4Mo,7999
12
- not_again_ai/llm/chat_completion/providers/openai_api.py,sha256=S7TZhDIQ_xpp3JakRVcd3Gpw2UjeHCETdA9MfRKUjCU,12294
13
- not_again_ai/llm/chat_completion/types.py,sha256=q8APUWWzwCKL0Rs_zEFfph9uBcwh5nAT0f0rp4crvk0,4039
11
+ not_again_ai/llm/chat_completion/providers/ollama_api.py,sha256=Puo2eE2VynvZOoqrUlNYtPgRGCRMVa8syc3TfBxS1hs,10661
12
+ not_again_ai/llm/chat_completion/providers/openai_api.py,sha256=1wdeV50KYX_KIf2uofsICKYoHVSvj4kTRpS1Vuw3NSQ,17887
13
+ not_again_ai/llm/chat_completion/types.py,sha256=yjSrcR9N5hrrMQAjzNvRIfQXQ-lVRgZfrIoKuhMbmjo,5399
14
14
  not_again_ai/llm/embedding/__init__.py,sha256=wscUfROukvw0M0vYccfaVTdXV0P-eICAT5mqM0LaHHc,182
15
15
  not_again_ai/llm/embedding/interface.py,sha256=Hj3UiktXEeCUeMwpIDtRkwBfKgaJSnJvclLNyjwUAtE,1144
16
16
  not_again_ai/llm/embedding/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -32,7 +32,7 @@ not_again_ai/viz/distributions.py,sha256=OyWwJaNI6lMRm_iSrhq-CORLNvXfeuLSgDtVo3u
32
32
  not_again_ai/viz/scatterplot.py,sha256=5CUOWeknbBOaZPeX9oPin5sBkRKEwk8qeFH45R-9LlY,2292
33
33
  not_again_ai/viz/time_series.py,sha256=pOGZqXp_2nd6nKo-PUQNCtmMh__69jxQ6bQibTGLwZA,5212
34
34
  not_again_ai/viz/utils.py,sha256=hN7gwxtBt3U6jQni2K8j5m5pCXpaJDoNzGhBBikEU28,238
35
- not_again_ai-0.16.0.dist-info/LICENSE,sha256=btjOgNGpp-ux5xOo1Gx1MddxeWtT9sof3s3Nui29QfA,1071
36
- not_again_ai-0.16.0.dist-info/METADATA,sha256=kvwxTcEi-elRl-LuHyh2QtFLrpYHd-U6HjyuAkHYvWQ,15035
37
- not_again_ai-0.16.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
38
- not_again_ai-0.16.0.dist-info/RECORD,,
35
+ not_again_ai-0.17.0.dist-info/LICENSE,sha256=btjOgNGpp-ux5xOo1Gx1MddxeWtT9sof3s3Nui29QfA,1071
36
+ not_again_ai-0.17.0.dist-info/METADATA,sha256=rguTCxLjkxsY-JDXsCbZ2Jl9wwCUOsvCiQVWYC6BBxI,15035
37
+ not_again_ai-0.17.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
38
+ not_again_ai-0.17.0.dist-info/RECORD,,