PyPI - not-again-ai - Versions diffs - 0.12.1__py3-none-any.whl → 0.14.0__py3-none-any.whl - Mend

not-again-ai 0.12.1py3-none-any.whl → 0.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

not_again_ai/data/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+import importlib.util
+if importlib.util.find_spec("playwright") is None:
+    raise ImportError(
+        "not_again_ai.data requires the 'data' extra to be installed. "
+        "You can install it using 'pip install not_again_ai[data]'."
+    )

not_again_ai/data/web.py ADDED Viewed

@@ -0,0 +1,56 @@
+from loguru import logger
+from playwright.sync_api import Browser, Playwright, sync_playwright
+def create_browser(headless: bool = True) -> tuple[Playwright, Browser]:
+    """Creates and returns a new Playwright instance and browser.
+    Args:
+        headless (bool, optional): Whether to run the browser in headless mode. Defaults to True.
+    Returns:
+        tuple[Playwright, Browser]: A tuple containing the Playwright instance and browser.
+    """
+    pwright = sync_playwright().start()
+    browser = pwright.chromium.launch(
+        headless=headless,
+        chromium_sandbox=False,
+        timeout=15000,
+    )
+    return pwright, browser
+def get_raw_web_content(url: str, browser: Browser | None = None, headless: bool = True) -> str:
+    """Fetches raw web content from a given URL using Playwright.
+    Args:
+        url (str): The URL to fetch content from.
+        browser (Browser | None, optional): An existing browser instance to use. Defaults to None.
+        headless (bool, optional): Whether to run the browser in headless mode. Defaults to True.
+    Returns:
+        str: The raw web content.
+    """
+    p = None
+    try:
+        if browser is None:
+            p, browser = create_browser(headless)
+        page = browser.new_page(
+            accept_downloads=False,
+            java_script_enabled=True,
+            viewport={"width": 1366, "height": 768},
+            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3",
+        )
+        page.goto(url)
+        content = page.content()
+        page.close()
+        return content
+    except Exception as e:
+        logger.error(f"Failed to get web content: {e}")
+        return ""
+    finally:
+        if browser:
+            browser.close()
+        if p:
+            p.stop()

not_again_ai/llm/gh_models/chat_completion.py CHANGED Viewed

@@ -64,8 +64,8 @@ def chat_completion(
         tool_names = []
         tool_args_list = []
         for tool_call in tool_calls:
-            tool_names.append(tool_call.function.name)  # type: ignore
-            tool_args_list.append(json.loads(tool_call.function.arguments))  # type: ignore
+            tool_names.append(tool_call.function.name)
+            tool_args_list.append(json.loads(tool_call.function.arguments))
         response_data["tool_names"] = tool_names
         response_data["tool_args_list"] = tool_args_list

not_again_ai/llm/openai_api/chat_completion.py CHANGED Viewed

@@ -1,16 +1,16 @@
+from collections.abc import Generator
 import contextlib
 import json
 import time
 from typing import Any
-from openai import OpenAI
-from pydantic import BaseModel
+from openai import AzureOpenAI, OpenAI
 def chat_completion(
     messages: list[dict[str, Any]],
     model: str,
-    client: OpenAI,
+    client: OpenAI | AzureOpenAI | Any,
     tools: list[dict[str, Any]] | None = None,
     tool_choice: str = "auto",
     max_tokens: int | None = None,
@@ -33,7 +33,15 @@ def chat_completion(
         model (str): ID of the model to use. See the model endpoint compatibility table:
             https://platform.openai.com/docs/models/model-endpoint-compatibility
             for details on which models work with the Chat API.
-        client (OpenAI): An instance of the OpenAI client.
+        client (OpenAI | AzureOpenAI | Any): An instance of the OpenAI or AzureOpenAI client.
+            If anything else is provided, we assume that it follows the OpenAI spec and call it by passing kwargs directly.
+            For example you can provide something like:
+            ```
+            def custom_client(**kwargs):
+                client = openai_client()
+                completion = client.chat.completions.create(**kwargs)
+                return completion.to_dict()
+            ```
         tools (list[dict[str, Any]], optional):A list of tools the model may call.
             Use this to provide a list of functions the model may generate JSON inputs for. Defaults to None.
         tool_choice (str, optional): The tool choice to use. Can be "auto", "required", "none", or a specific function name.
@@ -88,8 +96,6 @@ def chat_completion(
     elif json_schema is not None:
         if isinstance(json_schema, dict):
             response_format = {"type": "json_schema", "json_schema": json_schema}
-        elif issubclass(json_schema, BaseModel):
-            response_format = json_schema
     else:
         response_format = {"type": "text"}
@@ -120,67 +126,71 @@ def chat_completion(
             kwargs["top_logprobs"] = logprobs[1]
     start_time = time.time()
-    response = client.chat.completions.create(**kwargs)
+    if isinstance(client, OpenAI | AzureOpenAI):
+        response = client.chat.completions.create(**kwargs)
+        response = response.to_dict()
+    else:
+        response = client(**kwargs)
     end_time = time.time()
     response_duration = end_time - start_time
     response_data: dict[str, Any] = {"choices": []}
-    for response_choice in response.choices:
+    for response_choice in response["choices"]:
         response_data_curr = {}
-        finish_reason = response_choice.finish_reason
+        finish_reason = response_choice["finish_reason"]
         response_data_curr["finish_reason"] = finish_reason
         # We first check for tool calls because even if the finish_reason is stop, the model may have called a tool
-        tool_calls = response_choice.message.tool_calls
+        tool_calls = response_choice["message"].get("tool_calls", None)
         if tool_calls:
             tool_names = []
             tool_args_list = []
             for tool_call in tool_calls:
-                tool_names.append(tool_call.function.name)
-                tool_args_list.append(json.loads(tool_call.function.arguments))
-            response_data_curr["message"] = response_choice.message.content
+                tool_names.append(tool_call["function"]["name"])
+                tool_args_list.append(json.loads(tool_call["function"]["arguments"]))
+            response_data_curr["message"] = response_choice["message"]["content"]
             response_data_curr["tool_names"] = tool_names
             response_data_curr["tool_args_list"] = tool_args_list
         elif finish_reason == "stop" or finish_reason == "length":
-            message = response_choice.message.content
+            message = response_choice["message"]["content"]
             if json_mode or json_schema is not None:
                 with contextlib.suppress(json.JSONDecodeError):
                     message = json.loads(message)
             response_data_curr["message"] = message
-        if response_choice.logprobs and response_choice.logprobs.content is not None:
+        if response_choice["logprobs"] and response_choice["logprobs"]["content"] is not None:
             logprobs_list: list[dict[str, Any] | list[dict[str, Any]]] = []
-            for logprob in response_choice.logprobs.content:
-                if logprob.top_logprobs:
+            for logprob in response_choice["logprobs"]["content"]:
+                if logprob["top_logprobs"]:
                     curr_logprob_infos = []
-                    for top_logprob in logprob.top_logprobs:
+                    for top_logprob in logprob["top_logprobs"]:
                         curr_logprob_infos.append(
                             {
-                                "token": top_logprob.token,
-                                "logprob": top_logprob.logprob,
-                                "bytes": top_logprob.bytes,
+                                "token": top_logprob["token"],
+                                "logprob": top_logprob["logprob"],
+                                "bytes": top_logprob["bytes"],
                             }
                         )
                     logprobs_list.append(curr_logprob_infos)
                 else:
                     logprobs_list.append(
                         {
-                            "token": logprob.token,
-                            "logprob": logprob.logprob,
-                            "bytes": logprob.bytes,
+                            "token": logprob["token"],
+                            "logprob": logprob["logprob"],
+                            "bytes": logprob["bytes"],
                         }
                     )
             response_data_curr["logprobs"] = logprobs_list
         response_data["choices"].append(response_data_curr)
-    usage = response.usage
+    usage = response["usage"]
     if usage is not None:
-        response_data["completion_tokens"] = usage.completion_tokens
-        response_data["prompt_tokens"] = usage.prompt_tokens
+        response_data["completion_tokens"] = usage["completion_tokens"]
+        response_data["prompt_tokens"] = usage["prompt_tokens"]
-    if seed is not None and response.system_fingerprint is not None:
-        response_data["system_fingerprint"] = response.system_fingerprint
+    if seed is not None and response["system_fingerprint"] is not None:
+        response_data["system_fingerprint"] = response["system_fingerprint"]
     response_data["response_duration"] = round(response_duration, 4)
@@ -189,3 +199,141 @@ def chat_completion(
         del response_data["choices"]
     return response_data
+def chat_completion_stream(
+    messages: list[dict[str, Any]],
+    model: str,
+    client: OpenAI | AzureOpenAI | Any,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str = "auto",
+    max_tokens: int | None = None,
+    temperature: float = 0.7,
+    seed: int | None = None,
+    **kwargs: Any,
+) -> Generator[dict[str, Any], None, None]:
+    """Stream a chat completion from the OpenAI API.
+    Args:
+        messages (list[dict[str, Any]]): The messages to send to the model.
+        model (str): The model to use for the chat completion.
+        client (OpenAI | AzureOpenAI | Any): The client to use to send the request.
+            If anything else is provided, we assume that it follows the OpenAI spec and call it by passing kwargs directly.
+            For example you can provide something like:
+            ```
+            def custom_client(**kwargs) -> Generator[dict[str, Any], None, None]:  # type: ignore
+                client = openai_client()
+                completion = client.chat.completions.create(**kwargs)
+                for chunk in completion:
+                    yield chunk.to_dict()
+            ```
+        tools (list[dict[str, Any]], optional):A list of tools the model may call.
+            Use this to provide a list of functions the model may generate JSON inputs for. Defaults to None.
+        tool_choice (str, optional): The tool choice to use. Can be "auto", "required", "none", or a specific function name.
+            Note the function name cannot be any of "auto", "required", or "none". Defaults to "auto".
+        max_tokens (int | None): The maximum number of tokens to generate.
+        temperature (float): The temperature to use for the chat completion.
+        seed (int, optional): If specified, OpenAI will make a best effort to sample deterministically,
+            such that repeated requests with the same `seed` and parameters should return the same result.
+            Does not currently return `system_fingerprint`.
+    Returns:
+        Generator[dict[str, Any], None, None]: A generator of chunks of the chat completion.
+        Each chunk is a dictionary with the following keys:
+            role (str): The role of the chunk. Can be "assistant", "tool", or "usage".
+            content (str): The content of the chunk.
+            tool_name (str | None): The name of the tool called by the model.
+            tool_call_id (str | None): The ID of the tool call.
+            completion_tokens (int | None): The number of tokens used by the model to generate the completion.
+            prompt_tokens (int | None): The number of tokens in the messages sent to the model.
+    """
+    class ChatCompletionStreamParser:
+        def __init__(self) -> None:
+            # Remembers if we are currently streaming an assistant message or tool call
+            self.last_type: str = ""
+            self.last_tool_name: str | None = None
+            self.last_tool_call_id: str | None = None
+        def process_chunk(self, chunk: dict[str, Any]) -> dict[str, Any] | None:
+            """Convert the current chunk into a more digestible format
+            {
+                "role": Literal["assistant", "tool", "usage"],
+                "content": str,
+                "tool_name": str | None,
+                "tool_call_id": str | None,
+                "completion_tokens": int | None,
+                "prompt_tokens": int | None,
+            }
+            """
+            processed_chunk: dict[str, Any] = {}
+            if chunk["choices"]:
+                choice = chunk["choices"][0]
+                # This checks if its just a regular message currently being streamed
+                if choice["delta"].get("role", "") and choice["delta"].get("tool_calls", None) is None:
+                    if choice["delta"]["role"] != self.last_type:
+                        self.last_type = choice["delta"]["role"]
+                        processed_chunk["role"] = self.last_type
+                        if not choice["delta"]["content"]:
+                            processed_chunk["content"] = ""
+                        else:
+                            processed_chunk["content"] = choice["delta"]["content"]
+                    else:
+                        processed_chunk["role"] = self.last_type
+                elif choice["delta"].get("tool_calls", None):
+                    # tool_calls will always be present if the model is calling a tool
+                    tool_call = choice["delta"]["tool_calls"][0]
+                    if tool_call["function"].get("name"):
+                        self.last_type = "tool"
+                        self.last_tool_name = tool_call["function"]["name"]
+                        self.last_tool_call_id = tool_call["id"]
+                    processed_chunk["role"] = "tool"
+                    processed_chunk["content"] = tool_call["function"]["arguments"]
+                    processed_chunk["tool_name"] = self.last_tool_name
+                    processed_chunk["tool_call_id"] = self.last_tool_call_id
+                elif choice["delta"].get("content", ""):
+                    # This is the case after the first regular assistant message
+                    processed_chunk["role"] = self.last_type
+                    processed_chunk["content"] = choice["delta"]["content"]
+            else:
+                if chunk.get("usage"):
+                    processed_chunk["role"] = "usage"
+                    processed_chunk["completion_tokens"] = chunk["usage"]["completion_tokens"]
+                    processed_chunk["prompt_tokens"] = chunk["usage"]["prompt_tokens"]
+                else:
+                    return None
+            return processed_chunk
+    kwargs.update(
+        {
+            "messages": messages,
+            "model": model,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+        }
+    )
+    if tools is not None:
+        kwargs["tools"] = tools
+        if tool_choice not in ["none", "auto", "required"]:
+            kwargs["tool_choice"] = {"type": "function", "function": {"name": tool_choice}}
+        else:
+            kwargs["tool_choice"] = tool_choice
+    if seed is not None:
+        kwargs["seed"] = seed
+    if isinstance(client, OpenAI | AzureOpenAI):
+        response = client.chat.completions.create(**kwargs)
+    else:
+        response = client(**kwargs)
+    parser = ChatCompletionStreamParser()
+    for chunk in response:
+        if isinstance(client, OpenAI | AzureOpenAI):
+            chunk = chunk.to_dict()
+        processed_chunk = parser.process_chunk(chunk)
+        if processed_chunk:
+            yield processed_chunk

not_again_ai/llm/openai_api/prompts.py CHANGED Viewed

@@ -5,6 +5,8 @@ from pathlib import Path
 from typing import Any
 from liquid import Template
+from openai.lib._pydantic import to_strict_json_schema
+from pydantic import BaseModel
 def _validate_message_vision(message: dict[str, list[dict[str, Path | str]] | str]) -> bool:
@@ -162,3 +164,28 @@ def chat_prompt(messages_unformatted: list[dict[str, Any]], variables: dict[str,
             message["content"] = Template(message["content"]).render(**variables)
     return messages_formatted
+def pydantic_to_json_schema(
+    pydantic_model: type[BaseModel], schema_name: str, description: str | None = None
+) -> dict[str, Any]:
+    """Converts a Pydantic model to a JSON schema expected by Structured Outputs.
+    Must adhere to the supported schemas: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
+    Args:
+        pydantic_model: The Pydantic model to convert.
+        schema_name: The name of the schema.
+        description: An optional description of the schema.
+    Returns:
+        A JSON schema dictionary representing the Pydantic model.
+    """
+    converted_pydantic = to_strict_json_schema(pydantic_model)
+    schema = {
+        "name": schema_name,
+        "strict": True,
+        "schema": converted_pydantic,
+    }
+    if description:
+        schema["description"] = description
+    return schema

not_again_ai/llm/openai_api/tokens.py CHANGED Viewed

@@ -1,3 +1,6 @@
+from collections.abc import Collection, Set
+from typing import Literal
 import tiktoken
@@ -18,18 +21,38 @@ def load_tokenizer(model: str) -> tiktoken.Encoding:
     return encoding
-def truncate_str(text: str, max_len: int, tokenizer: tiktoken.Encoding) -> str:
+def truncate_str(
+    text: str,
+    max_len: int,
+    tokenizer: tiktoken.Encoding,
+    allowed_special: Literal["all"] | Set[str] = set(),
+    disallowed_special: Literal["all"] | Collection[str] = (),
+) -> str:
     """Truncates a string to a maximum token length.
+    Special tokens are artificial tokens used to unlock capabilities from a model,
+    such as fill-in-the-middle. So we want to be careful about accidentally encoding special
+    tokens, since they can be used to trick a model into doing something we don't want it to do.
+    Hence, by default, encode will raise an error if it encounters text that corresponds
+    to a special token. This can be controlled on a per-token level using the `allowed_special`
+    and `disallowed_special` parameters. In particular:
+    - Setting `disallowed_special` to () will prevent this function from raising errors and
+        cause all text corresponding to special tokens to be encoded as natural text.
+    - Setting `allowed_special` to "all" will cause this function to treat all text
+        corresponding to special tokens to be encoded as special tokens.
     Args:
         text (str): The string to truncate.
         max_len (int): The maximum number of tokens to keep.
         tokenizer (tiktoken.Encoding): A tiktoken encoding object
+        allowed_special (str | set[str]):
+        disallowed_special (str | set[str]):
     Returns:
         str: The truncated string.
     """
-    tokens = tokenizer.encode(text)
+    tokens = tokenizer.encode(text, allowed_special=allowed_special, disallowed_special=disallowed_special)
     if len(tokens) > max_len:
         tokens = tokens[:max_len]
         # Decode the tokens back to a string
@@ -39,33 +62,70 @@ def truncate_str(text: str, max_len: int, tokenizer: tiktoken.Encoding) -> str:
         return text
-def num_tokens_in_string(text: str, tokenizer: tiktoken.Encoding) -> int:
+def num_tokens_in_string(
+    text: str,
+    tokenizer: tiktoken.Encoding,
+    allowed_special: Literal["all"] | Set[str] = set(),
+    disallowed_special: Literal["all"] | Collection[str] = (),
+) -> int:
     """Return the number of tokens in a string.
+    Special tokens are artificial tokens used to unlock capabilities from a model,
+    such as fill-in-the-middle. So we want to be careful about accidentally encoding special
+    tokens, since they can be used to trick a model into doing something we don't want it to do.
+    Hence, by default, encode will raise an error if it encounters text that corresponds
+    to a special token. This can be controlled on a per-token level using the `allowed_special`
+    and `disallowed_special` parameters. In particular:
+    - Setting `disallowed_special` to () will prevent this function from raising errors and
+        cause all text corresponding to special tokens to be encoded as natural text.
+    - Setting `allowed_special` to "all" will cause this function to treat all text
+        corresponding to special tokens to be encoded as special tokens.
     Args:
         text (str): The string to count the tokens.
         tokenizer (tiktoken.Encoding): A tiktoken encoding object
+        allowed_special (str | set[str]):
+        disallowed_special (str | set[str]):
     Returns:
         int: The number of tokens in the string.
     """
-    return len(tokenizer.encode(text))
+    return len(tokenizer.encode(text, allowed_special=allowed_special, disallowed_special=disallowed_special))
 def num_tokens_from_messages(
-    messages: list[dict[str, str]], tokenizer: tiktoken.Encoding, model: str = "gpt-3.5-turbo-0125"
+    messages: list[dict[str, str]],
+    tokenizer: tiktoken.Encoding,
+    model: str = "gpt-3.5-turbo-0125",
+    allowed_special: Literal["all"] | Set[str] = set(),
+    disallowed_special: Literal["all"] | Collection[str] = (),
 ) -> int:
     """Return the number of tokens used by a list of messages.
     NOTE: Does not support counting tokens used by function calling or prompts with images.
     Reference: # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
     and https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+    Special tokens are artificial tokens used to unlock capabilities from a model,
+    such as fill-in-the-middle. So we want to be careful about accidentally encoding special
+    tokens, since they can be used to trick a model into doing something we don't want it to do.
+    Hence, by default, encode will raise an error if it encounters text that corresponds
+    to a special token. This can be controlled on a per-token level using the `allowed_special`
+    and `disallowed_special` parameters. In particular:
+    - Setting `disallowed_special` to () will prevent this function from raising errors and
+        cause all text corresponding to special tokens to be encoded as natural text.
+    - Setting `allowed_special` to "all" will cause this function to treat all text
+        corresponding to special tokens to be encoded as special tokens.
     Args:
         messages (list[dict[str, str]]): A list of messages to count the tokens
             should ideally be the result after calling llm.prompts.chat_prompt.
         tokenizer (tiktoken.Encoding): A tiktoken encoding object
         model (str): The model to use for tokenization. Defaults to "gpt-3.5-turbo-0125".
             See https://platform.openai.com/docs/models for a list of OpenAI models.
+        allowed_special (str | set[str]):
+        disallowed_special (str | set[str]):
     Returns:
         int: The number of tokens used by the messages.
@@ -111,7 +171,13 @@ See https://github.com/openai/openai-python/blob/main/chatml.md for information
     for message in messages:
         num_tokens += tokens_per_message
         for key, value in message.items():
-            num_tokens += len(tokenizer.encode(value))
+            num_tokens += len(
+                tokenizer.encode(
+                    value,
+                    allowed_special=allowed_special,
+                    disallowed_special=disallowed_special,
+                )
+            )
             if key == "name":
                 num_tokens += tokens_per_name
     num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>

{not_again_ai-0.12.1.dist-info → not_again_ai-0.14.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: not-again-ai
-Version: 0.12.1
+Version: 0.14.0
 Summary: Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place.
 Home-page: https://github.com/DaveCoDev/not-again-ai
 License: MIT
@@ -17,25 +17,27 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Typing :: Typed
+Provides-Extra: data
 Provides-Extra: llm
 Provides-Extra: local-llm
 Provides-Extra: statistics
 Provides-Extra: viz
-Requires-Dist: azure-ai-inference (==1.0.0b3) ; extra == "llm"
-Requires-Dist: azure-identity (>=1.17,<2.0) ; extra == "llm"
+Requires-Dist: azure-ai-inference (==1.0.0b5) ; extra == "llm"
+Requires-Dist: azure-identity (>=1.19,<2.0) ; extra == "llm"
 Requires-Dist: jinja2 (>=3.1,<4.0) ; extra == "local-llm"
-Requires-Dist: loguru (==0.7.2)
-Requires-Dist: numpy (>=1.26,<2.0) ; extra == "statistics" or extra == "viz"
+Requires-Dist: loguru (>=0.7,<0.8)
+Requires-Dist: numpy (>=2.1,<3.0) ; extra == "statistics" or extra == "viz"
 Requires-Dist: ollama (>=0.3,<0.4) ; extra == "local-llm"
-Requires-Dist: openai (>=1.41,<2.0) ; extra == "llm"
+Requires-Dist: openai (>=1.52,<2.0) ; extra == "llm"
 Requires-Dist: pandas (>=2.2,<3.0) ; extra == "viz"
-Requires-Dist: pydantic (>=2.8,<3.0) ; extra == "llm"
+Requires-Dist: pydantic (>=2.9,<3.0)
+Requires-Dist: pytest-playwright (>=0.5,<0.6) ; extra == "data"
 Requires-Dist: python-liquid (>=1.12,<2.0) ; extra == "llm"
 Requires-Dist: scikit-learn (>=1.5,<2.0) ; extra == "statistics"
 Requires-Dist: scipy (>=1.14,<2.0) ; extra == "statistics"
 Requires-Dist: seaborn (>=0.13,<0.14) ; extra == "viz"
-Requires-Dist: tiktoken (>=0.7,<0.8) ; extra == "llm"
-Requires-Dist: transformers (>=4.44,<5.0) ; extra == "local-llm"
+Requires-Dist: tiktoken (>=0.8,<0.9) ; extra == "llm"
+Requires-Dist: transformers (>=4.45,<5.0) ; extra == "local-llm"
 Project-URL: Documentation, https://github.com/DaveCoDev/not-again-ai
 Project-URL: Repository, https://github.com/DaveCoDev/not-again-ai
 Description-Content-Type: text/markdown
@@ -72,34 +74,53 @@ $ pip install not_again_ai[llm,local_llm,statistics,viz]
 Note that local LLM requires separate installations and will not work out of the box due to how hardware dependent it is. Be sure to check the [notebooks](notebooks/local_llm/) for more details.
 The package is split into subpackages, so you can install only the parts you need.
-* **Base only**: `pip install not_again_ai`
-* **LLM**: `pip install not_again_ai[llm]`
-    1. OpenAI API
-        1. Go to https://platform.openai.com/settings/profile?tab=api-keys to get your API key.
-        1. (Optional) Set the `OPENAI_API_KEY` and the `OPENAI_ORG_ID` environment variables.
-    1. Azure OpenAI (AOAI)
-       1. Using AOAI requires using Entra ID authentication. See https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity for how to set this up for your AOAI deployment.
-          1. Requires the correct role assigned to your user account and being signed into the Azure CLI.
-       1. (Optional) Set the `AZURE_OPENAI_ENDPOINT` environment variable.
-    1. GitHub Models
-       1. Get a Personal Access Token from https://github.com/settings/tokens and set the `GITHUB_TOKEN` environment variable. The token does not need any permissions.
-       1. Check the [Github Marketplace](https://github.com/marketplace/models) to see which models are available.
-* **Local LLM**: `pip install not_again_ai[llm,local_llm]`
-    1. Some HuggingFace transformers tokenizers are gated behind access requests. If you wish to use these, you will need to request access from HuggingFace on the model card.
-       1. Then set the `HF_TOKEN` environment variable to your HuggingFace API token which can be found here: https://huggingface.co/settings/tokens
-    2. If you wish to use Ollama:
-        1. Follow the instructions at https://github.com/ollama/ollama to install Ollama for your system.
-        2. (Optional) [Add Ollama as a startup service (recommended)](https://github.com/ollama/ollama/blob/main/docs/linux.md#adding-ollama-as-a-startup-service-recommended)
-        3. (Optional) To make the Ollama service accessible on your local network from a Linux server, add the following to the `/etc/systemd/system/ollama.service` file which will make Ollama available at `http://<local_address>:11434`:
-            ```bash
-            [Service]
-            ...
-            Environment="OLLAMA_HOST=0.0.0.0"
-            ```
-        4. It is recommended to always have the latest version of Ollama. To update Ollama check the [docs](https://github.com/ollama/ollama/blob/main/docs/). The command for Linux is: `curl -fsSL https://ollama.com/install.sh | sh`
-    3. HuggingFace transformers and other requirements are hardware dependent so for providers other than Ollama, this only installs some generic dependencies. Check the [notebooks](notebooks/local_llm/) for more details on what is available and how to install it.
-* **Statistics**: `pip install not_again_ai[statistics]`
-* **Visualization**: `pip install not_again_ai[viz]`
+### Base
+1. `pip install not_again_ai`
+### Data
+1. `pip install not_again_ai[data]`
+1. `playwright install` to download the browser binaries.
+### LLM
+1. `pip install not_again_ai[llm]`
+1. Setup OpenAI API
+   1. Go to https://platform.openai.com/settings/profile?tab=api-keys to get your API key.
+   1. (Optional) Set the `OPENAI_API_KEY` and the `OPENAI_ORG_ID` environment variables.
+1. Setup Azure OpenAI (AOAI)
+   1. Using AOAI requires using Entra ID authentication. See https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity for how to set this up for your AOAI deployment.
+      * Requires the correct role assigned to your user account and being signed into the Azure CLI.
+   1. (Optional) Set the `AZURE_OPENAI_ENDPOINT` environment variable.
+1. Setup GitHub Models
+   1. Get a Personal Access Token from https://github.com/settings/tokens and set the `GITHUB_TOKEN` environment variable. The token does not need any permissions.
+   1. Check the [Github Marketplace](https://github.com/marketplace/models) to see which models are available.
+### Local LLM
+ 1. `pip install not_again_ai[llm,local_llm]`
+ 1. Some HuggingFace transformers tokenizers are gated behind access requests. If you wish to use these, you will need to request access from HuggingFace on the model card.
+    * Then set the `HF_TOKEN` environment variable to your HuggingFace API token which can be found here: https://huggingface.co/settings/tokens
+ 1. If you wish to use Ollama:
+     1. Follow the instructions at https://github.com/ollama/ollama to install Ollama for your system.
+     1. (Optional) [Add Ollama as a startup service (recommended)](https://github.com/ollama/ollama/blob/main/docs/linux.md#adding-ollama-as-a-startup-service-recommended)
+     1. (Optional) To make the Ollama service accessible on your local network from a Linux server, add the following to the `/etc/systemd/system/ollama.service` file which will make Ollama available at `http://<local_address>:11434`:
+         ```bash
+         [Service]
+         ...
+         Environment="OLLAMA_HOST=0.0.0.0"
+         ```
+     1. It is recommended to always have the latest version of Ollama. To update Ollama check the [docs](https://github.com/ollama/ollama/blob/main/docs/). The command for Linux is: `curl -fsSL https://ollama.com/install.sh | sh`
+ 1. HuggingFace transformers and other requirements are hardware dependent so for providers other than Ollama, this only installs some generic dependencies. Check the [notebooks](notebooks/local_llm/) for more details on what is available and how to install it.
+### Statistics
+1. `pip install not_again_ai[statistics]`
+### Visualization
+1. `pip install not_again_ai[viz]`
 # Development Information
@@ -229,10 +250,10 @@ areas of the project that are currently not tested.
 pytest and code coverage are configured in [`pyproject.toml`](./pyproject.toml).
-To pass arguments to `pytest` through `nox`:
+To run selected tests:
 ```bash
-(.venv) $ nox -s test -- -k invalid_factorial
+(.venv) $ nox -s test -- -k "test_web"
 ```
 ## Code Style Checking

{not_again_ai-0.12.1.dist-info → not_again_ai-0.14.0.dist-info}/RECORD RENAMED Viewed

@@ -2,17 +2,19 @@ not_again_ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 not_again_ai/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 not_again_ai/base/file_system.py,sha256=KNQmacO4Q__CQuq2oPzWrg3rQO48n3evglc9bNiP7KM,949
 not_again_ai/base/parallel.py,sha256=fcYhKBYBWvob84iKp3O93wvFFdXeidljZsShgBLTNGA,3448
+not_again_ai/data/__init__.py,sha256=1jF6mwvtB2PT7IEc3xpbRtZm3g3Lyf8zUqH4AEE4qlQ,244
+not_again_ai/data/web.py,sha256=wjx9cc33jcoJBGonYCIpwygPBFOwz7F-dx_ominmbnI,1838
 not_again_ai/llm/__init__.py,sha256=_wNUL6FDaT369Z8W48FsaC_NkcOZ-ib2MMUvnaLOS-0,451
 not_again_ai/llm/gh_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 not_again_ai/llm/gh_models/azure_ai_client.py,sha256=GkVn9ZwYbsLm3X0A3pGKKHuoqrxc-BZnZ4n9ExelRUQ,580
-not_again_ai/llm/gh_models/chat_completion.py,sha256=t6HfwOh8UKtE7OqJsCaFOjE2CqpnJV3gQPNXZvoSyYo,3631
+not_again_ai/llm/gh_models/chat_completion.py,sha256=zI6Kfqb9AW0t_Yd1ecaXy7q70gygJ_XKcFbtYrKIbn4,3599
 not_again_ai/llm/openai_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-not_again_ai/llm/openai_api/chat_completion.py,sha256=xlqVAp2YaCXqw5zU_vAPCW7SaJfUe1vdeUqKSjoqDtE,9771
+not_again_ai/llm/openai_api/chat_completion.py,sha256=5jO-J97zcKPJrzHn6V4NZB3nZa1RZwbIeMC3gbXlWWQ,17100
 not_again_ai/llm/openai_api/context_management.py,sha256=BJSG100_qw9MeTCZGztDV5CBXjVOxU4x7gyoRlLxWnI,3561
 not_again_ai/llm/openai_api/embeddings.py,sha256=4OBnxZicrY6q4dQhuPqMdAnifyjwrsKMTDj-kVre0yc,2500
 not_again_ai/llm/openai_api/openai_client.py,sha256=AK9SDBkpP94u5Q73-Q5i5HRPQh_D8cF8Dfl0IgPsJDQ,3816
-not_again_ai/llm/openai_api/prompts.py,sha256=B62xs3WKaTv7SfT_TVC-PqO9oeWWpO0xS4_oxW9MYMQ,7093
-not_again_ai/llm/openai_api/tokens.py,sha256=RYBzl5vqE_MzWM60QbWC_6X9YOQoOgBOeR-68rM34II,4421
+not_again_ai/llm/openai_api/prompts.py,sha256=lZYxgzoM2VqXWKUDToKWKR6w49KNYKu5TnqKLxG3TsM,8034
+not_again_ai/llm/openai_api/tokens.py,sha256=Q4xdCEPrmgDCNjmcB4rg6ipvo4_McwSjc-b9gAHjUJs,8024
 not_again_ai/local_llm/__init__.py,sha256=BsUn39U3QQaw6yomQHfp_HIPHRIBoMAgjcP3CDADx04,882
 not_again_ai/local_llm/chat_completion.py,sha256=PmICXrGZJXIuqY00ULBGi2bKnPG8ticqTXZHSTzZK9o,4828
 not_again_ai/local_llm/huggingface/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -35,8 +37,8 @@ not_again_ai/viz/distributions.py,sha256=OyWwJaNI6lMRm_iSrhq-CORLNvXfeuLSgDtVo3u
 not_again_ai/viz/scatterplot.py,sha256=5CUOWeknbBOaZPeX9oPin5sBkRKEwk8qeFH45R-9LlY,2292
 not_again_ai/viz/time_series.py,sha256=pOGZqXp_2nd6nKo-PUQNCtmMh__69jxQ6bQibTGLwZA,5212
 not_again_ai/viz/utils.py,sha256=hN7gwxtBt3U6jQni2K8j5m5pCXpaJDoNzGhBBikEU28,238
-not_again_ai-0.12.1.dist-info/LICENSE,sha256=btjOgNGpp-ux5xOo1Gx1MddxeWtT9sof3s3Nui29QfA,1071
-not_again_ai-0.12.1.dist-info/METADATA,sha256=VydzFufICQyP6paN15KJTudJi6rSpwWn5H_W1v46p6Y,16389
-not_again_ai-0.12.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-not_again_ai-0.12.1.dist-info/entry_points.txt,sha256=EMJegugnmJUd-jMUA_qIRMIPAasbei8gP6O4-ER0BxQ,61
-not_again_ai-0.12.1.dist-info/RECORD,,
+not_again_ai-0.14.0.dist-info/LICENSE,sha256=btjOgNGpp-ux5xOo1Gx1MddxeWtT9sof3s3Nui29QfA,1071
+not_again_ai-0.14.0.dist-info/METADATA,sha256=kNL0KybcNVoN7fcCMNO1CohIWZAxc74gnV68zzoEDfI,16475
+not_again_ai-0.14.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+not_again_ai-0.14.0.dist-info/entry_points.txt,sha256=EMJegugnmJUd-jMUA_qIRMIPAasbei8gP6O4-ER0BxQ,61
+not_again_ai-0.14.0.dist-info/RECORD,,

{not_again_ai-0.12.1.dist-info → not_again_ai-0.14.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{not_again_ai-0.12.1.dist-info → not_again_ai-0.14.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{not_again_ai-0.12.1.dist-info → not_again_ai-0.14.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

not-again-ai 0.12.1__py3-none-any.whl → 0.14.0__py3-none-any.whl

not-again-ai 0.12.1py3-none-any.whl → 0.14.0py3-none-any.whl