PyPI - not-again-ai - Versions diffs - 0.16.1__tar.gz → 0.17.0__tar.gz - Mend

not-again-ai 0.16.1tar.gz → 0.17.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{not_again_ai-0.16.1 → not_again_ai-0.17.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: not-again-ai
-Version: 0.16.1
+Version: 0.17.0
 Summary: Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place.
 License: MIT
 Author: DaveCoDev

{not_again_ai-0.16.1 → not_again_ai-0.17.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "not-again-ai"
-version = "0.16.1"
+version = "0.17.0"
 description = "Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place."
 authors = [
     { name = "DaveCoDev", email = "dave.co.dev@gmail.com" }
@@ -70,6 +70,7 @@ nox-poetry = "*"
 [tool.poetry.group.test.dependencies]
 pytest = "*"
+pytest-asyncio = "*"
 pytest-cov = "*"
 pytest-randomly = "*"
@@ -153,6 +154,8 @@ filterwarnings = [
     # "ignore::DeprecationWarning:typer",
     "ignore::pytest.PytestUnraisableExceptionWarning"
 ]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"
 [tool.coverage.run]
 branch = true

{not_again_ai-0.16.1 → not_again_ai-0.17.0}/src/not_again_ai/llm/chat_completion/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from not_again_ai.llm.chat_completion.interface import chat_completion
+from not_again_ai.llm.chat_completion.interface import chat_completion, chat_completion_stream
 from not_again_ai.llm.chat_completion.types import ChatCompletionRequest
-__all__ = ["ChatCompletionRequest", "chat_completion"]
+__all__ = ["ChatCompletionRequest", "chat_completion", "chat_completion_stream"]

not_again_ai-0.17.0/src/not_again_ai/llm/chat_completion/interface.py ADDED Viewed

@@ -0,0 +1,61 @@
+from collections.abc import AsyncGenerator, Callable
+from typing import Any
+from not_again_ai.llm.chat_completion.providers.ollama_api import ollama_chat_completion, ollama_chat_completion_stream
+from not_again_ai.llm.chat_completion.providers.openai_api import openai_chat_completion, openai_chat_completion_stream
+from not_again_ai.llm.chat_completion.types import ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse
+def chat_completion(
+    request: ChatCompletionRequest,
+    provider: str,
+    client: Callable[..., Any],
+) -> ChatCompletionResponse:
+    """Get a chat completion response from the given provider. Currently supported providers:
+    - `openai` - OpenAI
+    - `azure_openai` - Azure OpenAI
+    - `ollama` - Ollama
+    Args:
+        request: Request parameter object
+        provider: The supported provider name
+        client: Client information, see the provider's implementation for what can be provided
+    Returns:
+        ChatCompletionResponse: The chat completion response.
+    """
+    if provider == "openai" or provider == "azure_openai":
+        return openai_chat_completion(request, client)
+    elif provider == "ollama":
+        return ollama_chat_completion(request, client)
+    else:
+        raise ValueError(f"Provider {provider} not supported")
+async def chat_completion_stream(
+    request: ChatCompletionRequest,
+    provider: str,
+    client: Callable[..., Any],
+) -> AsyncGenerator[ChatCompletionChunk, None]:
+    """Stream a chat completion response from the given provider. Currently supported providers:
+    - `openai` - OpenAI
+    - `azure_openai` - Azure OpenAI
+    - `ollama` - Ollama
+    Args:
+        request: Request parameter object
+        provider: The supported provider name
+        client: Client information, see the provider's implementation for what can be provided
+    Returns:
+        AsyncGenerator[ChatCompletionChunk, None]
+    """
+    request.stream = True
+    if provider == "openai" or provider == "azure_openai":
+        async for chunk in openai_chat_completion_stream(request, client):
+            yield chunk
+    elif provider == "ollama":
+        async for chunk in ollama_chat_completion_stream(request, client):
+            yield chunk
+    else:
+        raise ValueError(f"Provider {provider} not supported")

{not_again_ai-0.16.1 → not_again_ai-0.17.0}/src/not_again_ai/llm/chat_completion/providers/ollama_api.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from collections.abc import Callable
+from collections.abc import AsyncGenerator, Callable
 import json
 import os
 import re
@@ -6,14 +6,20 @@ import time
 from typing import Any, Literal, cast
 from loguru import logger
-from ollama import ChatResponse, Client, ResponseError
+from ollama import AsyncClient, ChatResponse, Client, ResponseError
 from not_again_ai.llm.chat_completion.types import (
     AssistantMessage,
     ChatCompletionChoice,
+    ChatCompletionChoiceStream,
+    ChatCompletionChunk,
+    ChatCompletionDelta,
     ChatCompletionRequest,
     ChatCompletionResponse,
     Function,
+    PartialFunction,
+    PartialToolCall,
+    Role,
     ToolCall,
 )
@@ -51,14 +57,8 @@ def validate(request: ChatCompletionRequest) -> None:
         raise ValueError("`max_tokens` and `max_completion_tokens` cannot both be provided.")
-def ollama_chat_completion(
-    request: ChatCompletionRequest,
-    client: Callable[..., Any],
-) -> ChatCompletionResponse:
-    validate(request)
+def format_kwargs(request: ChatCompletionRequest) -> dict[str, Any]:
     kwargs = request.model_dump(mode="json", exclude_none=True)
     # For each key in OLLAMA_PARAMETER_MAP
     # If it is not None, set the key in kwargs to the value of the corresponding value in OLLAMA_PARAMETER_MAP
     # If it is None, remove that key from kwargs
@@ -141,6 +141,16 @@ def ollama_chat_completion(
             logger.warning("Ollama model only supports a single image per message. Using only the first images.")
         message["images"] = images
+    return kwargs
+def ollama_chat_completion(
+    request: ChatCompletionRequest,
+    client: Callable[..., Any],
+) -> ChatCompletionResponse:
+    validate(request)
+    kwargs = format_kwargs(request)
     try:
         start_time = time.time()
         response: ChatResponse = client(**kwargs)
@@ -164,7 +174,7 @@ def ollama_chat_completion(
             tool_name = tool_call.function.name
             if request.tools and tool_name not in [tool["function"]["name"] for tool in request.tools]:
                 errors += f"Tool call {tool_call} has an invalid tool name: {tool_name}\n"
-            tool_args = tool_call.function.arguments
+            tool_args = dict(tool_call.function.arguments)
             parsed_tool_calls.append(
                 ToolCall(
                     id="",
@@ -206,7 +216,65 @@ def ollama_chat_completion(
     )
-def ollama_client(host: str | None = None, timeout: float | None = None) -> Callable[..., Any]:
+async def ollama_chat_completion_stream(
+    request: ChatCompletionRequest,
+    client: Callable[..., Any],
+) -> AsyncGenerator[ChatCompletionChunk, None]:
+    validate(request)
+    kwargs = format_kwargs(request)
+    start_time = time.time()
+    stream = await client(**kwargs)
+    async for chunk in stream:
+        errors = ""
+        # Handle tool calls
+        tool_calls: list[PartialToolCall] | None = None
+        if chunk.message.tool_calls:
+            parsed_tool_calls: list[PartialToolCall] = []
+            for tool_call in chunk.message.tool_calls:
+                tool_name = tool_call.function.name
+                if request.tools and tool_name not in [tool["function"]["name"] for tool in request.tools]:
+                    errors += f"Tool call {tool_call} has an invalid tool name: {tool_name}\n"
+                tool_args = tool_call.function.arguments
+                parsed_tool_calls.append(
+                    PartialToolCall(
+                        id="",
+                        function=PartialFunction(
+                            name=tool_name,
+                            arguments=tool_args,
+                        ),
+                    )
+                )
+            tool_calls = parsed_tool_calls
+        current_time = time.time()
+        response_duration = round(current_time - start_time, 4)
+        delta = ChatCompletionDelta(
+            content=chunk.message.content or "",
+            role=Role.ASSISTANT,
+            tool_calls=tool_calls,
+        )
+        choice_obj = ChatCompletionChoiceStream(
+            delta=delta,
+            finish_reason=chunk.done_reason,
+            index=0,
+        )
+        chunk_obj = ChatCompletionChunk(
+            choices=[choice_obj],
+            errors=errors.strip(),
+            completion_tokens=chunk.get("eval_count", None),
+            prompt_tokens=chunk.get("prompt_eval_count", None),
+            response_duration=response_duration,
+        )
+        yield chunk_obj
+def ollama_client(
+    host: str | None = None, timeout: float | None = None, async_client: bool = False
+) -> Callable[..., Any]:
     """Create an Ollama client instance based on the specified host or will read from the OLLAMA_HOST environment variable.
     Args:
@@ -226,7 +294,7 @@ def ollama_client(host: str | None = None, timeout: float | None = None) -> Call
             host = "http://localhost:11434"
     def client_callable(**kwargs: Any) -> Any:
-        client = Client(host=host, timeout=timeout)
+        client = AsyncClient(host=host, timeout=timeout) if async_client else Client(host=host, timeout=timeout)
         return client.chat(**kwargs)
     return client_callable

{not_again_ai-0.16.1 → not_again_ai-0.17.0}/src/not_again_ai/llm/chat_completion/providers/openai_api.py RENAMED Viewed

@@ -1,17 +1,23 @@
-from collections.abc import Callable
+from collections.abc import AsyncGenerator, Callable, Coroutine
 import json
 import time
 from typing import Any, Literal
 from azure.identity import DefaultAzureCredential, get_bearer_token_provider
-from openai import AzureOpenAI, OpenAI
+from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
 from not_again_ai.llm.chat_completion.types import (
     AssistantMessage,
     ChatCompletionChoice,
+    ChatCompletionChoiceStream,
+    ChatCompletionChunk,
+    ChatCompletionDelta,
     ChatCompletionRequest,
     ChatCompletionResponse,
     Function,
+    PartialFunction,
+    PartialToolCall,
+    Role,
     ToolCall,
 )
@@ -36,12 +42,7 @@ def validate(request: ChatCompletionRequest) -> None:
         raise ValueError("`max_tokens` and `max_completion_tokens` cannot both be provided.")
-def openai_chat_completion(
-    request: ChatCompletionRequest,
-    client: Callable[..., Any],
-) -> ChatCompletionResponse:
-    validate(request)
+def format_kwargs(request: ChatCompletionRequest) -> dict[str, Any]:
     # Format the response format parameters to be compatible with OpenAI API
     if request.json_mode:
         response_format: dict[str, Any] = {"type": "json_object"}
@@ -61,7 +62,6 @@ def openai_chat_completion(
         elif value is None and key in kwargs:
             del kwargs[key]
-    # Iterate over each message and
     for message in kwargs["messages"]:
         role = message.get("role", None)
         # For each ToolMessage, change the "name" field to be named "tool_call_id" instead
@@ -84,6 +84,49 @@ def openai_chat_completion(
     if request.tool_choice is not None and request.tool_choice not in ["none", "auto", "required"]:
         kwargs["tool_choice"] = {"type": "function", "function": {"name": request.tool_choice}}
+    return kwargs
+def process_logprobs(logprobs_content: list[dict[str, Any]]) -> list[dict[str, Any] | list[dict[str, Any]]]:
+    """Process logprobs content from OpenAI API response.
+    Args:
+        logprobs_content: List of logprob entries from the API response
+    Returns:
+        Processed logprobs list containing either single token info or lists of top token infos
+    """
+    logprobs_list: list[dict[str, Any] | list[dict[str, Any]]] = []
+    for logprob in logprobs_content:
+        if logprob.get("top_logprobs", None):
+            curr_logprob_infos: list[dict[str, Any]] = []
+            for top_logprob in logprob.get("top_logprobs", []):
+                curr_logprob_infos.append(
+                    {
+                        "token": top_logprob.get("token", ""),
+                        "logprob": top_logprob.get("logprob", 0),
+                        "bytes": top_logprob.get("bytes", 0),
+                    }
+                )
+            logprobs_list.append(curr_logprob_infos)
+        else:
+            logprobs_list.append(
+                {
+                    "token": logprob.get("token", ""),
+                    "logprob": logprob.get("logprob", 0),
+                    "bytes": logprob.get("bytes", 0),
+                }
+            )
+    return logprobs_list
+def openai_chat_completion(
+    request: ChatCompletionRequest,
+    client: Callable[..., Any],
+) -> ChatCompletionResponse:
+    validate(request)
+    kwargs = format_kwargs(request)
     start_time = time.time()
     response = client(**kwargs)
     end_time = time.time()
@@ -133,28 +176,7 @@ def openai_chat_completion(
         # Handle logprobs
         logprobs: list[dict[str, Any] | list[dict[str, Any]]] | None = None
         if choice.get("logprobs", None) and choice["logprobs"].get("content", None) is not None:
-            logprobs_list: list[dict[str, Any] | list[dict[str, Any]]] = []
-            for logprob in choice["logprobs"]["content"]:
-                if logprob.get("top_logprobs", None):
-                    curr_logprob_infos: list[dict[str, Any]] = []
-                    for top_logprob in logprob.get("top_logprobs", []):
-                        curr_logprob_infos.append(
-                            {
-                                "token": top_logprob.get("token", ""),
-                                "logprob": top_logprob.get("logprob", 0),
-                                "bytes": top_logprob.get("bytes", 0),
-                            }
-                        )
-                    logprobs_list.append(curr_logprob_infos)
-                else:
-                    logprobs_list.append(
-                        {
-                            "token": logprob.get("token", ""),
-                            "logprob": logprob.get("logprob", 0),
-                            "bytes": logprob.get("bytes", 0),
-                        }
-                    )
-            logprobs = logprobs_list
+            logprobs = process_logprobs(choice["logprobs"]["content"])
         # Handle extras that OpenAI or Azure OpenAI return
         if choice.get("content_filter_results", None):
@@ -195,6 +217,107 @@ def openai_chat_completion(
     )
+async def openai_chat_completion_stream(
+    request: ChatCompletionRequest,
+    client: Callable[..., Any],
+) -> AsyncGenerator[ChatCompletionChunk, None]:
+    validate(request)
+    kwargs = format_kwargs(request)
+    start_time = time.time()
+    stream = await client(**kwargs)
+    async for chunk in stream:
+        errors = ""
+        # This kind of a hack. To make this processing generic for clients that do not return the correct
+        # data structure, we convert the chunk to a dict
+        if not isinstance(chunk, dict):
+            chunk = chunk.to_dict()
+        choices: list[ChatCompletionChoiceStream] = []
+        for choice in chunk["choices"]:
+            content = choice.get("delta", {}).get("content", "")
+            if not content:
+                content = ""
+            role = Role.ASSISTANT
+            if choice.get("delta", {}).get("role", None):
+                role = Role(choice["delta"]["role"])
+            # Handle tool calls
+            tool_calls: list[PartialToolCall] | None = None
+            if choice["delta"].get("tool_calls", None):
+                parsed_tool_calls: list[PartialToolCall] = []
+                for tool_call in choice["delta"]["tool_calls"]:
+                    tool_name = tool_call.get("function", {}).get("name", None)
+                    if not tool_name:
+                        tool_name = ""
+                    tool_args = tool_call.get("function", {}).get("arguments", "")
+                    if not tool_args:
+                        tool_args = ""
+                    tool_id = tool_call.get("id", None)
+                    parsed_tool_calls.append(
+                        PartialToolCall(
+                            id=tool_id,
+                            function=PartialFunction(
+                                name=tool_name,
+                                arguments=tool_args,
+                            ),
+                        )
+                    )
+                tool_calls = parsed_tool_calls
+            refusal = None
+            if choice["delta"].get("refusal", None):
+                refusal = choice["delta"]["refusal"]
+            delta = ChatCompletionDelta(
+                content=content,
+                role=role,
+                tool_calls=tool_calls,
+                refusal=refusal,
+            )
+            index = choice.get("index", 0)
+            finish_reason = choice.get("finish_reason", None)
+            # Handle logprobs
+            logprobs: list[dict[str, Any] | list[dict[str, Any]]] | None = None
+            if choice.get("logprobs", None) and choice["logprobs"].get("content", None) is not None:
+                logprobs = process_logprobs(choice["logprobs"]["content"])
+            choice_obj = ChatCompletionChoiceStream(
+                delta=delta,
+                finish_reason=finish_reason,
+                logprobs=logprobs,
+                index=index,
+            )
+            choices.append(choice_obj)
+        current_time = time.time()
+        response_duration = round(current_time - start_time, 4)
+        if "usage" in chunk and chunk["usage"] is not None:
+            completion_tokens = chunk["usage"].get("completion_tokens", None)
+            prompt_tokens = chunk["usage"].get("prompt_tokens", None)
+            system_fingerprint = chunk.get("system_fingerprint", None)
+        else:
+            completion_tokens = None
+            prompt_tokens = None
+            system_fingerprint = None
+        chunk_obj = ChatCompletionChunk(
+            choices=choices,
+            errors=errors.strip(),
+            completion_tokens=completion_tokens,
+            prompt_tokens=prompt_tokens,
+            response_duration=response_duration,
+            system_fingerprint=system_fingerprint,
+        )
+        yield chunk_obj
 def create_client_callable(client_class: type[OpenAI | AzureOpenAI], **client_args: Any) -> Callable[..., Any]:
     """Creates a callable that instantiates and uses an OpenAI client.
@@ -215,6 +338,20 @@ def create_client_callable(client_class: type[OpenAI | AzureOpenAI], **client_ar
     return client_callable
+def create_client_callable_stream(
+    client_class: type[AsyncOpenAI | AsyncAzureOpenAI], **client_args: Any
+) -> Callable[..., Any]:
+    filtered_args = {k: v for k, v in client_args.items() if v is not None}
+    def client_callable(**kwargs: Any) -> Coroutine[Any, Any, Any]:
+        client = client_class(**filtered_args)
+        kwargs["stream_options"] = {"include_usage": True}
+        stream = client.chat.completions.create(**kwargs)
+        return stream
+    return client_callable
 class InvalidOAIAPITypeError(Exception):
     """Raised when an invalid OAIAPIType string is provided."""
@@ -227,6 +364,7 @@ def openai_client(
     azure_endpoint: str | None = None,
     timeout: float | None = None,
     max_retries: int | None = None,
+    async_client: bool = False,
 ) -> Callable[..., Any]:
     """Create an OpenAI or Azure OpenAI client instance based on the specified API type and other provided parameters.
@@ -247,11 +385,11 @@ def openai_client(
         max_retries (int, optional): Certain errors are automatically retried 2 times by default,
             with a short exponential backoff. Connection errors (for example, due to a network connectivity problem),
             408 Request Timeout, 409 Conflict, 429 Rate Limit, and >=500 Internal errors are all retried by default.
+        async_client (bool, optional): Whether to return an async client. Defaults to False.
     Returns:
         Callable[..., Any]: A callable that creates a client and returns completion results
     Raises:
         InvalidOAIAPITypeError: If an invalid API type string is provided.
         NotImplementedError: If the specified API type is recognized but not yet supported (e.g., 'azure_openai').
@@ -260,17 +398,21 @@ def openai_client(
         raise InvalidOAIAPITypeError(f"Invalid OAIAPIType: {api_type}. Must be 'openai' or 'azure_openai'.")
     if api_type == "openai":
-        return create_client_callable(
-            OpenAI,
+        client_class = AsyncOpenAI if async_client else OpenAI
+        callable_creator = create_client_callable_stream if async_client else create_client_callable
+        return callable_creator(
+            client_class,  # type: ignore
             api_key=api_key,
             organization=organization,
             timeout=timeout,
             max_retries=max_retries,
         )
     elif api_type == "azure_openai":
+        azure_client_class = AsyncAzureOpenAI if async_client else AzureOpenAI
+        callable_creator = create_client_callable_stream if async_client else create_client_callable
         if api_key:
-            return create_client_callable(
-                AzureOpenAI,
+            return callable_creator(
+                azure_client_class,  # type: ignore
                 api_version=aoai_api_version,
                 azure_endpoint=azure_endpoint,
                 api_key=api_key,
@@ -282,8 +424,8 @@ def openai_client(
             ad_token_provider = get_bearer_token_provider(
                 azure_credential, "https://cognitiveservices.azure.com/.default"
             )
-            return create_client_callable(
-                AzureOpenAI,
+            return callable_creator(
+                azure_client_class,  # type: ignore
                 api_version=aoai_api_version,
                 azure_endpoint=azure_endpoint,
                 azure_ad_token_provider=ad_token_provider,

{not_again_ai-0.16.1 → not_again_ai-0.17.0}/src/not_again_ai/llm/chat_completion/types.py RENAMED Viewed

@@ -52,12 +52,23 @@ class Function(BaseModel):
     arguments: dict[str, Any]
+class PartialFunction(BaseModel):
+    name: str
+    arguments: str | dict[str, Any]
 class ToolCall(BaseModel):
     id: str
     function: Function
     type: Literal["function"] = "function"
+class PartialToolCall(BaseModel):
+    id: str | None
+    function: PartialFunction
+    type: Literal["function"] = "function"
 class DeveloperMessage(BaseMessage[str]):
     role: Literal[Role.DEVELOPER] = Role.DEVELOPER
@@ -87,6 +98,7 @@ MessageT = AssistantMessage | DeveloperMessage | SystemMessage | ToolMessage | U
 class ChatCompletionRequest(BaseModel):
     messages: list[MessageT]
     model: str
+    stream: bool = Field(default=False)
     max_completion_tokens: int | None = Field(default=None)
     context_window: int | None = Field(default=None)
@@ -148,3 +160,35 @@ class ChatCompletionResponse(BaseModel):
     system_fingerprint: str | None = Field(default=None)
     extras: Any | None = Field(default=None)
+class ChatCompletionDelta(BaseModel):
+    content: str
+    role: Role = Field(default=Role.ASSISTANT)
+    tool_calls: list[PartialToolCall] | None = Field(default=None)
+    refusal: str | None = Field(default=None)
+class ChatCompletionChoiceStream(BaseModel):
+    delta: ChatCompletionDelta
+    index: int
+    finish_reason: Literal["stop", "length", "tool_calls", "content_filter"] | None
+    logprobs: list[dict[str, Any] | list[dict[str, Any]]] | None = Field(default=None)
+    extras: Any | None = Field(default=None)
+class ChatCompletionChunk(BaseModel):
+    choices: list[ChatCompletionChoiceStream]
+    errors: str = Field(default="")
+    completion_tokens: int | None = Field(default=None)
+    prompt_tokens: int | None = Field(default=None)
+    response_duration: float | None = Field(default=None)
+    system_fingerprint: str | None = Field(default=None)
+    extras: Any | None = Field(default=None)

not_again_ai-0.16.1/src/not_again_ai/llm/chat_completion/interface.py DELETED Viewed

@@ -1,32 +0,0 @@
-from collections.abc import Callable
-from typing import Any
-from not_again_ai.llm.chat_completion.providers.ollama_api import ollama_chat_completion
-from not_again_ai.llm.chat_completion.providers.openai_api import openai_chat_completion
-from not_again_ai.llm.chat_completion.types import ChatCompletionRequest, ChatCompletionResponse
-def chat_completion(
-    request: ChatCompletionRequest,
-    provider: str,
-    client: Callable[..., Any],
-) -> ChatCompletionResponse:
-    """Get a chat completion response from the given provider. Currently supported providers:
-    - `openai` - OpenAI
-    - `azure_openai` - Azure OpenAI
-    - `ollama` - Ollama
-    Args:
-        request: Request parameter object
-        provider: The supported provider name
-        client: Client information, see the provider's implementation for what can be provided
-    Returns:
-        ChatCompletionResponse: The chat completion response.
-    """
-    if provider == "openai" or provider == "azure_openai":
-        return openai_chat_completion(request, client)
-    elif provider == "ollama":
-        return ollama_chat_completion(request, client)
-    else:
-        raise ValueError(f"Provider {provider} not supported")