not-again-ai 0.10.3__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/PKG-INFO +17 -11
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/README.md +10 -7
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/pyproject.toml +8 -5
- not_again_ai-0.12.0/src/not_again_ai/llm/gh_models/azure_ai_client.py +20 -0
- not_again_ai-0.12.0/src/not_again_ai/llm/gh_models/chat_completion.py +81 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/openai_api/chat_completion.py +31 -14
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/openai_api/openai_client.py +27 -6
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/openai_api/prompts.py +1 -1
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/openai_api/tokens.py +1 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/chat_completion.py +26 -1
- not_again_ai-0.12.0/src/not_again_ai/local_llm/ollama/__init__.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/ollama/chat_completion.py +18 -7
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/ollama/model_mapping.py +1 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/LICENSE +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/__init__.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/base/__init__.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/base/file_system.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/base/parallel.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/__init__.py +0 -0
- {not_again_ai-0.10.3/src/not_again_ai/llm/openai_api → not_again_ai-0.12.0/src/not_again_ai/llm/gh_models}/__init__.py +0 -0
- {not_again_ai-0.10.3/src/not_again_ai/local_llm/huggingface → not_again_ai-0.12.0/src/not_again_ai/llm/openai_api}/__init__.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/openai_api/context_management.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/openai_api/embeddings.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/__init__.py +0 -0
- {not_again_ai-0.10.3/src/not_again_ai/local_llm/ollama → not_again_ai-0.12.0/src/not_again_ai/local_llm/huggingface}/__init__.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/huggingface/chat_completion.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/huggingface/helpers.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/ollama/ollama_client.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/ollama/service.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/ollama/tokens.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/prompts.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/tokens.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/py.typed +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/statistics/__init__.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/statistics/dependence.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/viz/__init__.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/viz/barplots.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/viz/distributions.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/viz/scatterplot.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/viz/time_series.py +0 -0
- {not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/viz/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: not-again-ai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.12.0
|
4
4
|
Summary: Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place.
|
5
5
|
Home-page: https://github.com/DaveCoDev/not-again-ai
|
6
6
|
License: MIT
|
@@ -21,18 +21,21 @@ Provides-Extra: llm
|
|
21
21
|
Provides-Extra: local-llm
|
22
22
|
Provides-Extra: statistics
|
23
23
|
Provides-Extra: viz
|
24
|
+
Requires-Dist: azure-ai-inference (==1.0.0b3) ; extra == "llm"
|
25
|
+
Requires-Dist: azure-identity (>=1.17,<2.0) ; extra == "llm"
|
24
26
|
Requires-Dist: jinja2 (>=3.1,<4.0) ; extra == "local-llm"
|
25
27
|
Requires-Dist: loguru (==0.7.2)
|
26
28
|
Requires-Dist: numpy (>=1.26,<2.0) ; extra == "statistics" or extra == "viz"
|
27
|
-
Requires-Dist: ollama (>=0.
|
28
|
-
Requires-Dist: openai (>=1.
|
29
|
+
Requires-Dist: ollama (>=0.3,<0.4) ; extra == "local-llm"
|
30
|
+
Requires-Dist: openai (>=1.40,<2.0) ; extra == "llm"
|
29
31
|
Requires-Dist: pandas (>=2.2,<3.0) ; extra == "viz"
|
32
|
+
Requires-Dist: pydantic (>=2.8,<3.0) ; extra == "llm"
|
30
33
|
Requires-Dist: python-liquid (>=1.12,<2.0) ; extra == "llm"
|
31
34
|
Requires-Dist: scikit-learn (>=1.5,<2.0) ; extra == "statistics"
|
32
35
|
Requires-Dist: scipy (>=1.14,<2.0) ; extra == "statistics"
|
33
36
|
Requires-Dist: seaborn (>=0.13,<0.14) ; extra == "viz"
|
34
37
|
Requires-Dist: tiktoken (>=0.7,<0.8) ; extra == "llm"
|
35
|
-
Requires-Dist: transformers (>=4.
|
38
|
+
Requires-Dist: transformers (>=4.43,<5.0) ; extra == "local-llm"
|
36
39
|
Project-URL: Documentation, https://github.com/DaveCoDev/not-again-ai
|
37
40
|
Project-URL: Repository, https://github.com/DaveCoDev/not-again-ai
|
38
41
|
Description-Content-Type: text/markdown
|
@@ -71,23 +74,26 @@ Note that local LLM requires separate installations and will not work out of the
|
|
71
74
|
The package is split into subpackages, so you can install only the parts you need.
|
72
75
|
* **Base only**: `pip install not_again_ai`
|
73
76
|
* **LLM**: `pip install not_again_ai[llm]`
|
74
|
-
1.
|
77
|
+
1. OpenAI API
|
75
78
|
1. Go to https://platform.openai.com/settings/profile?tab=api-keys to get your API key.
|
76
79
|
1. (Optional) Set the `OPENAI_API_KEY` and the `OPENAI_ORG_ID` environment variables.
|
80
|
+
1. GitHub Models
|
81
|
+
1. Get a Personal Access Token from https://github.com/settings/tokens and set the `GITHUB_TOKEN` environment variable. The token does not need any permissions.
|
82
|
+
1. Check the [Github Marketplace](https://github.com/marketplace/models) to see which models are available.
|
77
83
|
* **Local LLM**: `pip install not_again_ai[llm,local_llm]`
|
78
84
|
1. Some HuggingFace transformers tokenizers are gated behind access requests. If you wish to use these, you will need to request access from HuggingFace on the model card.
|
79
85
|
1. Then set the `HF_TOKEN` environment variable to your HuggingFace API token which can be found here: https://huggingface.co/settings/tokens
|
80
|
-
|
86
|
+
2. If you wish to use Ollama:
|
81
87
|
1. Follow the instructions at https://github.com/ollama/ollama to install Ollama for your system.
|
82
|
-
|
83
|
-
|
88
|
+
2. (Optional) [Add Ollama as a startup service (recommended)](https://github.com/ollama/ollama/blob/main/docs/linux.md#adding-ollama-as-a-startup-service-recommended)
|
89
|
+
3. (Optional) To make the Ollama service accessible on your local network from a Linux server, add the following to the `/etc/systemd/system/ollama.service` file which will make Ollama available at `http://<local_address>:11434`:
|
84
90
|
```bash
|
85
91
|
[Service]
|
86
92
|
...
|
87
93
|
Environment="OLLAMA_HOST=0.0.0.0"
|
88
94
|
```
|
89
|
-
|
90
|
-
|
95
|
+
4. It is recommended to always have the latest version of Ollama. To update Ollama check the [docs](https://github.com/ollama/ollama/blob/main/docs/). The command for Linux is: `curl -fsSL https://ollama.com/install.sh | sh`
|
96
|
+
3. HuggingFace transformers and other requirements are hardware dependent so for providers other than Ollama, this only installs some generic dependencies. Check the [notebooks](notebooks/local_llm/) for more details on what is available and how to install it.
|
91
97
|
* **Statistics**: `pip install not_again_ai[statistics]`
|
92
98
|
* **Visualization**: `pip install not_again_ai[viz]`
|
93
99
|
|
@@ -295,7 +301,7 @@ installed package to indicate that inline type annotations should be checked.
|
|
295
301
|
|
296
302
|
## Typos
|
297
303
|
|
298
|
-
|
304
|
+
Check for typos using [typos](https://github.com/crate-ci/typos)
|
299
305
|
|
300
306
|
```bash
|
301
307
|
(.venv) $ nox -s typos
|
@@ -32,23 +32,26 @@ Note that local LLM requires separate installations and will not work out of the
|
|
32
32
|
The package is split into subpackages, so you can install only the parts you need.
|
33
33
|
* **Base only**: `pip install not_again_ai`
|
34
34
|
* **LLM**: `pip install not_again_ai[llm]`
|
35
|
-
1.
|
35
|
+
1. OpenAI API
|
36
36
|
1. Go to https://platform.openai.com/settings/profile?tab=api-keys to get your API key.
|
37
37
|
1. (Optional) Set the `OPENAI_API_KEY` and the `OPENAI_ORG_ID` environment variables.
|
38
|
+
1. GitHub Models
|
39
|
+
1. Get a Personal Access Token from https://github.com/settings/tokens and set the `GITHUB_TOKEN` environment variable. The token does not need any permissions.
|
40
|
+
1. Check the [Github Marketplace](https://github.com/marketplace/models) to see which models are available.
|
38
41
|
* **Local LLM**: `pip install not_again_ai[llm,local_llm]`
|
39
42
|
1. Some HuggingFace transformers tokenizers are gated behind access requests. If you wish to use these, you will need to request access from HuggingFace on the model card.
|
40
43
|
1. Then set the `HF_TOKEN` environment variable to your HuggingFace API token which can be found here: https://huggingface.co/settings/tokens
|
41
|
-
|
44
|
+
2. If you wish to use Ollama:
|
42
45
|
1. Follow the instructions at https://github.com/ollama/ollama to install Ollama for your system.
|
43
|
-
|
44
|
-
|
46
|
+
2. (Optional) [Add Ollama as a startup service (recommended)](https://github.com/ollama/ollama/blob/main/docs/linux.md#adding-ollama-as-a-startup-service-recommended)
|
47
|
+
3. (Optional) To make the Ollama service accessible on your local network from a Linux server, add the following to the `/etc/systemd/system/ollama.service` file which will make Ollama available at `http://<local_address>:11434`:
|
45
48
|
```bash
|
46
49
|
[Service]
|
47
50
|
...
|
48
51
|
Environment="OLLAMA_HOST=0.0.0.0"
|
49
52
|
```
|
50
|
-
|
51
|
-
|
53
|
+
4. It is recommended to always have the latest version of Ollama. To update Ollama check the [docs](https://github.com/ollama/ollama/blob/main/docs/). The command for Linux is: `curl -fsSL https://ollama.com/install.sh | sh`
|
54
|
+
3. HuggingFace transformers and other requirements are hardware dependent so for providers other than Ollama, this only installs some generic dependencies. Check the [notebooks](notebooks/local_llm/) for more details on what is available and how to install it.
|
52
55
|
* **Statistics**: `pip install not_again_ai[statistics]`
|
53
56
|
* **Visualization**: `pip install not_again_ai[viz]`
|
54
57
|
|
@@ -256,7 +259,7 @@ installed package to indicate that inline type annotations should be checked.
|
|
256
259
|
|
257
260
|
## Typos
|
258
261
|
|
259
|
-
|
262
|
+
Check for typos using [typos](https://github.com/crate-ci/typos)
|
260
263
|
|
261
264
|
```bash
|
262
265
|
(.venv) $ nox -s typos
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "not-again-ai"
|
3
|
-
version = "0.
|
3
|
+
version = "0.12.0"
|
4
4
|
description = "Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place."
|
5
5
|
authors = ["DaveCoDev <dave.co.dev@gmail.com>"]
|
6
6
|
license = "MIT"
|
@@ -29,20 +29,23 @@ python = "^3.11 || ^3.12"
|
|
29
29
|
loguru = { version = "==0.7.2" }
|
30
30
|
|
31
31
|
# Optional dependencies are defined here, and groupings are defined below.
|
32
|
+
azure-ai-inference = { version = "==1.0.0b3", optional = true }
|
33
|
+
azure-identity = { version = "^1.17", optional = true }
|
32
34
|
jinja2 = { version = "^3.1", optional = true }
|
33
35
|
numpy = { version = "^1.26", optional = true }
|
34
|
-
ollama = { version = "^0.
|
35
|
-
openai = { version = "^1.
|
36
|
+
ollama = { version = "^0.3", optional = true }
|
37
|
+
openai = { version = "^1.40", optional = true }
|
36
38
|
pandas = { version = "^2.2", optional = true }
|
39
|
+
pydantic = { version = "^2.8", optional = true }
|
37
40
|
python-liquid = { version = "^1.12", optional = true }
|
38
41
|
scipy = { version = "^1.14", optional = true }
|
39
42
|
scikit-learn = { version = "^1.5", optional = true }
|
40
43
|
seaborn = { version = "^0.13", optional = true }
|
41
44
|
tiktoken = { version = "^0.7", optional = true }
|
42
|
-
transformers = { version = "^4.
|
45
|
+
transformers = { version = "^4.43", optional = true }
|
43
46
|
|
44
47
|
[tool.poetry.extras]
|
45
|
-
llm = ["openai", "python-liquid", "tiktoken"]
|
48
|
+
llm = ["azure-ai-inference", "azure-identity", "openai", "pydantic", "python-liquid", "tiktoken"]
|
46
49
|
local_llm = ["jinja2", "ollama", "transformers"]
|
47
50
|
statistics = ["numpy", "scikit-learn", "scipy"]
|
48
51
|
viz = ["numpy", "pandas", "seaborn"]
|
@@ -0,0 +1,20 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
from azure.ai.inference import ChatCompletionsClient
|
4
|
+
from azure.core.credentials import AzureKeyCredential
|
5
|
+
|
6
|
+
|
7
|
+
def azure_ai_client(
|
8
|
+
token: str | None = None,
|
9
|
+
endpoint: str = "https://models.inference.ai.azure.com",
|
10
|
+
) -> ChatCompletionsClient:
|
11
|
+
if not token:
|
12
|
+
token = os.getenv("GITHUB_TOKEN")
|
13
|
+
if not token:
|
14
|
+
raise ValueError("Token must be provided or GITHUB_TOKEN environment variable must be set")
|
15
|
+
|
16
|
+
client = ChatCompletionsClient(
|
17
|
+
endpoint=endpoint,
|
18
|
+
credential=AzureKeyCredential(token),
|
19
|
+
)
|
20
|
+
return client
|
@@ -0,0 +1,81 @@
|
|
1
|
+
import contextlib
|
2
|
+
import json
|
3
|
+
import time
|
4
|
+
from typing import Any
|
5
|
+
|
6
|
+
from azure.ai.inference import ChatCompletionsClient
|
7
|
+
from azure.ai.inference.models import ChatCompletionsToolDefinition, ChatRequestMessage
|
8
|
+
|
9
|
+
|
10
|
+
def chat_completion(
|
11
|
+
messages: list[ChatRequestMessage],
|
12
|
+
model: str,
|
13
|
+
client: ChatCompletionsClient,
|
14
|
+
tools: list[ChatCompletionsToolDefinition] | None = None,
|
15
|
+
max_tokens: int | None = None,
|
16
|
+
temperature: float | None = None,
|
17
|
+
json_mode: bool = False,
|
18
|
+
seed: int | None = None,
|
19
|
+
) -> dict[str, Any]:
|
20
|
+
"""Gets a response from GitHub Models using the Azure AI Inference SDK.
|
21
|
+
See the available models at https://github.com/marketplace/models
|
22
|
+
Full documentation of the SDK is at: https://learn.microsoft.com/en-us/azure/ai-studio/reference/reference-model-inference-chat-completions
|
23
|
+
And samples at: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference/samples
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
dict[str, Any]: A dictionary with the following keys
|
27
|
+
message (str | dict): The content of the generated assistant message.
|
28
|
+
If json_mode is True, this will be a dictionary.
|
29
|
+
tool_names (list[str], optional): The names of the tools called by the model.
|
30
|
+
If the model does not support tools, a ResponseError is raised.
|
31
|
+
tool_args_list (list[dict], optional): The arguments of the tools called by the model.
|
32
|
+
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
33
|
+
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
34
|
+
response_duration (float): The time, in seconds, taken to generate the response by using the model.
|
35
|
+
system_fingerprint (str, optional): If seed is set, a unique identifier for the model used to generate the response.
|
36
|
+
"""
|
37
|
+
response_format = {"type": "json_object"} if json_mode else None
|
38
|
+
start_time = time.time()
|
39
|
+
response = client.complete( # type: ignore
|
40
|
+
messages=messages,
|
41
|
+
model=model,
|
42
|
+
response_format=response_format, # type: ignore
|
43
|
+
max_tokens=max_tokens,
|
44
|
+
temperature=temperature,
|
45
|
+
tools=tools,
|
46
|
+
seed=seed,
|
47
|
+
)
|
48
|
+
end_time = time.time()
|
49
|
+
response_duration = end_time - start_time
|
50
|
+
|
51
|
+
response_data = {}
|
52
|
+
finish_reason = response.choices[0].finish_reason
|
53
|
+
response_data["finish_reason"] = finish_reason.value # type: ignore
|
54
|
+
|
55
|
+
message = response.choices[0].message.content
|
56
|
+
if message and json_mode:
|
57
|
+
with contextlib.suppress(json.JSONDecodeError):
|
58
|
+
message = json.loads(message)
|
59
|
+
response_data["message"] = message
|
60
|
+
|
61
|
+
# Check for tool calls because even if the finish_reason is stop, the model may have called a tool
|
62
|
+
tool_calls = response.choices[0].message.tool_calls
|
63
|
+
if tool_calls:
|
64
|
+
tool_names = []
|
65
|
+
tool_args_list = []
|
66
|
+
for tool_call in tool_calls:
|
67
|
+
tool_names.append(tool_call.function.name) # type: ignore
|
68
|
+
tool_args_list.append(json.loads(tool_call.function.arguments)) # type: ignore
|
69
|
+
response_data["tool_names"] = tool_names
|
70
|
+
response_data["tool_args_list"] = tool_args_list
|
71
|
+
|
72
|
+
if seed is not None and hasattr(response, "system_fingerprint"):
|
73
|
+
response_data["system_fingerprint"] = response.system_fingerprint
|
74
|
+
|
75
|
+
usage = response.usage
|
76
|
+
if usage is not None:
|
77
|
+
response_data["completion_tokens"] = usage.completion_tokens
|
78
|
+
response_data["prompt_tokens"] = usage.prompt_tokens
|
79
|
+
response_data["response_duration"] = round(response_duration, 4)
|
80
|
+
|
81
|
+
return response_data
|
{not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/openai_api/chat_completion.py
RENAMED
@@ -4,6 +4,7 @@ import time
|
|
4
4
|
from typing import Any
|
5
5
|
|
6
6
|
from openai import OpenAI
|
7
|
+
from pydantic import BaseModel
|
7
8
|
|
8
9
|
|
9
10
|
def chat_completion(
|
@@ -15,6 +16,7 @@ def chat_completion(
|
|
15
16
|
max_tokens: int | None = None,
|
16
17
|
temperature: float = 0.7,
|
17
18
|
json_mode: bool = False,
|
19
|
+
json_schema: dict[str, Any] | None = None,
|
18
20
|
seed: int | None = None,
|
19
21
|
logprobs: tuple[bool, int | None] | None = None,
|
20
22
|
n: int = 1,
|
@@ -32,8 +34,8 @@ def chat_completion(
|
|
32
34
|
https://platform.openai.com/docs/models/model-endpoint-compatibility
|
33
35
|
for details on which models work with the Chat API.
|
34
36
|
client (OpenAI): An instance of the OpenAI client.
|
35
|
-
tools (list[dict[str, Any]], optional):
|
36
|
-
Defaults to None.
|
37
|
+
tools (list[dict[str, Any]], optional):A list of tools the model may call.
|
38
|
+
Use this to provide a list of functions the model may generate JSON inputs for. Defaults to None.
|
37
39
|
tool_choice (str, optional): The tool choice to use. Can be "auto", "required", "none", or a specific function name.
|
38
40
|
Note the function name cannot be any of "auto", "required", or "none". Defaults to "auto".
|
39
41
|
max_tokens (int, optional): The maximum number of tokens to generate in the chat completion.
|
@@ -44,6 +46,9 @@ def chat_completion(
|
|
44
46
|
json_mode (bool, optional): When JSON mode is enabled, the model is constrained to only
|
45
47
|
generate strings that parse into valid JSON object and will return a dictionary.
|
46
48
|
See https://platform.openai.com/docs/guides/text-generation/json-mode
|
49
|
+
json_schema (dict, optional): Enables Structured Outputs which ensures the model will
|
50
|
+
always generate responses that adhere to your supplied JSON Schema.
|
51
|
+
See https://platform.openai.com/docs/guides/structured-outputs/structured-outputs
|
47
52
|
seed (int, optional): If specified, OpenAI will make a best effort to sample deterministically,
|
48
53
|
such that repeated requests with the same `seed` and parameters should return the same result.
|
49
54
|
Determinism is not guaranteed, and you should refer to the `system_fingerprint` response
|
@@ -58,23 +63,35 @@ def chat_completion(
|
|
58
63
|
|
59
64
|
Returns:
|
60
65
|
dict[str, Any]: A dictionary with the following keys:
|
61
|
-
|
66
|
+
finish_reason (str): The reason the model stopped generating further tokens.
|
62
67
|
Can be 'stop', 'length', or 'tool_calls'.
|
63
|
-
|
64
|
-
|
65
|
-
|
68
|
+
tool_names (list[str], optional): The names of the tools called by the model.
|
69
|
+
tool_args_list (list[dict], optional): The arguments of the tools called by the model.
|
70
|
+
message (str | dict): The content of the generated assistant message.
|
66
71
|
If json_mode is True, this will be a dictionary.
|
67
|
-
|
72
|
+
logprobs (list[dict[str, Any] | list[dict[str, Any]]]): If logprobs[1] is between 1 and 5, each element in the list
|
68
73
|
will be a list of dictionaries containing the token, logprob, and bytes for the top `logprobs[1]` logprobs. Otherwise,
|
69
74
|
this will be a list of dictionaries containing the token, logprob, and bytes for each token in the message.
|
70
|
-
|
71
|
-
|
75
|
+
choices (list[dict], optional): A list of chat completion choices if n > 1 where each dict contains the above fields.
|
76
|
+
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
72
77
|
NOTE: If n > 1 this is the sum of all completions.
|
73
|
-
|
74
|
-
|
75
|
-
|
78
|
+
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
79
|
+
system_fingerprint (str, optional): If seed is set, a unique identifier for the model used to generate the response.
|
80
|
+
response_duration (float): The time, in seconds, taken to generate the response from the API.
|
76
81
|
"""
|
77
|
-
|
82
|
+
|
83
|
+
if json_mode and json_schema is not None:
|
84
|
+
raise ValueError("json_schema and json_mode cannot be used together.")
|
85
|
+
|
86
|
+
if json_mode:
|
87
|
+
response_format: dict[str, Any] = {"type": "json_object"}
|
88
|
+
elif json_schema is not None:
|
89
|
+
if isinstance(json_schema, dict):
|
90
|
+
response_format = {"type": "json_schema", "json_schema": json_schema}
|
91
|
+
elif issubclass(json_schema, BaseModel):
|
92
|
+
response_format = json_schema
|
93
|
+
else:
|
94
|
+
response_format = {"type": "text"}
|
78
95
|
|
79
96
|
kwargs.update(
|
80
97
|
{
|
@@ -126,7 +143,7 @@ def chat_completion(
|
|
126
143
|
response_data_curr["tool_args_list"] = tool_args_list
|
127
144
|
elif finish_reason == "stop" or finish_reason == "length":
|
128
145
|
message = response_choice.message.content
|
129
|
-
if json_mode:
|
146
|
+
if json_mode or json_schema is not None:
|
130
147
|
with contextlib.suppress(json.JSONDecodeError):
|
131
148
|
message = json.loads(message)
|
132
149
|
response_data_curr["message"] = message
|
{not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/openai_api/openai_client.py
RENAMED
@@ -1,4 +1,7 @@
|
|
1
|
-
from
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
|
4
|
+
from openai import AzureOpenAI, OpenAI
|
2
5
|
|
3
6
|
|
4
7
|
class InvalidOAIAPITypeError(Exception):
|
@@ -8,13 +11,18 @@ class InvalidOAIAPITypeError(Exception):
|
|
8
11
|
|
9
12
|
|
10
13
|
def openai_client(
|
11
|
-
api_type:
|
14
|
+
api_type: Literal["openai", "azure_openai"] = "openai",
|
12
15
|
api_key: str | None = None,
|
13
16
|
organization: str | None = None,
|
17
|
+
aoai_api_version: str = "2024-06-01",
|
18
|
+
azure_endpoint: str | None = None,
|
14
19
|
timeout: float | None = None,
|
15
20
|
max_retries: int | None = None,
|
16
|
-
) -> OpenAI:
|
17
|
-
"""Create an OpenAI client instance based on the specified API type and other provided parameters.
|
21
|
+
) -> OpenAI | AzureOpenAI:
|
22
|
+
"""Create an OpenAI or Azure OpenAI client instance based on the specified API type and other provided parameters.
|
23
|
+
|
24
|
+
Azure OpenAI requires RBAC authentication. You must be signed in with the Azure CLI and have correct role assigned.
|
25
|
+
See https://techcommunity.microsoft.com/t5/microsoft-developer-community/using-keyless-authentication-with-azure-openai/ba-p/4111521
|
18
26
|
|
19
27
|
Args:
|
20
28
|
api_type (str, optional): Type of the API to be used. Accepted values are 'openai' or 'azure_openai'.
|
@@ -23,6 +31,9 @@ def openai_client(
|
|
23
31
|
OpenAI automatically uses `OPENAI_API_KEY` from the environment.
|
24
32
|
organization (str, optional): The ID of the organization. If not provided,
|
25
33
|
OpenAI automotically uses `OPENAI_ORG_ID` from the environment.
|
34
|
+
aoai_api_version (str, optional): Only applicable if using Azure OpenAI https://learn.microsoft.com/azure/ai-services/openai/reference#rest-api-versioning
|
35
|
+
azure_endpoint (str, optional): The endpoint to use for Azure OpenAI.
|
36
|
+
If not provided, will be read from the `AZURE_OPENAI_ENDPOINT` environment variable.
|
26
37
|
timeout (float, optional): By default requests time out after 10 minutes.
|
27
38
|
max_retries (int, optional): Certain errors are automatically retried 2 times by default,
|
28
39
|
with a short exponential backoff. Connection errors (for example, due to a network connectivity problem),
|
@@ -52,6 +63,16 @@ def openai_client(
|
|
52
63
|
filtered_args = {k: v for k, v in args.items() if v is not None}
|
53
64
|
return OpenAI(**filtered_args) # type: ignore
|
54
65
|
elif api_type == "azure_openai":
|
55
|
-
|
66
|
+
azure_credential = DefaultAzureCredential()
|
67
|
+
ad_token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
|
68
|
+
args = {
|
69
|
+
"api_version": aoai_api_version,
|
70
|
+
"azure_endpoint": azure_endpoint,
|
71
|
+
"azure_ad_token_provider": ad_token_provider, # type: ignore
|
72
|
+
"timeout": timeout,
|
73
|
+
"max_retries": max_retries,
|
74
|
+
}
|
75
|
+
filtered_args = {k: v for k, v in args.items() if v is not None}
|
76
|
+
return AzureOpenAI(**filtered_args) # type: ignore
|
56
77
|
else:
|
57
|
-
raise NotImplementedError("
|
78
|
+
raise NotImplementedError(f"API type '{api_type}' is invalid.")
|
@@ -87,7 +87,7 @@ def chat_prompt(messages_unformatted: list[dict[str, Any]], variables: dict[str,
|
|
87
87
|
A list which represents messages in the format that OpenAI expects for its chat completions API.
|
88
88
|
See here for details: https://platform.openai.com/docs/api-reference/chat/create
|
89
89
|
|
90
|
-
|
90
|
+
Example:
|
91
91
|
>>> # Assume cat_image and dog_image are Path objects to image files
|
92
92
|
>>> messages = [
|
93
93
|
... {"role": "system", "content": "You are a helpful assistant."},
|
@@ -1,8 +1,10 @@
|
|
1
1
|
from typing import Any
|
2
2
|
|
3
|
+
from azure.ai.inference import ChatCompletionsClient
|
3
4
|
from ollama import Client
|
4
5
|
from openai import OpenAI
|
5
6
|
|
7
|
+
from not_again_ai.llm.gh_models import chat_completion as chat_completion_gh_models
|
6
8
|
from not_again_ai.llm.openai_api import chat_completion as chat_completion_openai
|
7
9
|
from not_again_ai.local_llm.ollama import chat_completion as chat_completion_ollama
|
8
10
|
|
@@ -10,7 +12,8 @@ from not_again_ai.local_llm.ollama import chat_completion as chat_completion_oll
|
|
10
12
|
def chat_completion(
|
11
13
|
messages: list[dict[str, Any]],
|
12
14
|
model: str,
|
13
|
-
client: OpenAI | Client,
|
15
|
+
client: OpenAI | Client | ChatCompletionsClient,
|
16
|
+
tools: list[dict[str, Any]] | None = None,
|
14
17
|
max_tokens: int | None = None,
|
15
18
|
temperature: float = 0.7,
|
16
19
|
json_mode: bool = False,
|
@@ -25,6 +28,8 @@ def chat_completion(
|
|
25
28
|
messages (list[dict[str, Any]]): A list of messages to send to the model.
|
26
29
|
model (str): The model name to use.
|
27
30
|
client (OpenAI | Client): The client object to use for chat completion.
|
31
|
+
tools (list[dict[str, Any]], optional):A list of tools the model may call.
|
32
|
+
Use this to provide a list of functions the model may generate JSON inputs for. Defaults to None.
|
28
33
|
max_tokens (int, optional): The maximum number of tokens to generate.
|
29
34
|
temperature (float, optional): The temperature of the model. Increasing the temperature will make the model answer more creatively.
|
30
35
|
json_mode (bool, optional): This will structure the response as a valid JSON object.
|
@@ -34,6 +39,9 @@ def chat_completion(
|
|
34
39
|
dict[str, Any]: A dictionary with the following keys
|
35
40
|
message (str | dict): The content of the generated assistant message.
|
36
41
|
If json_mode is True, this will be a dictionary.
|
42
|
+
tool_names (list[str], optional): The names of the tools called by the model.
|
43
|
+
If the model does not support tools, a ResponseError is raised.
|
44
|
+
tool_args_list (list[dict], optional): The arguments of the tools called by the model.
|
37
45
|
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
38
46
|
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
39
47
|
response_duration (float): The time, in seconds, taken to generate the response by using the model.
|
@@ -45,6 +53,7 @@ def chat_completion(
|
|
45
53
|
messages=messages,
|
46
54
|
model=model,
|
47
55
|
client=client,
|
56
|
+
tools=tools,
|
48
57
|
max_tokens=max_tokens,
|
49
58
|
temperature=temperature,
|
50
59
|
json_mode=json_mode,
|
@@ -56,6 +65,19 @@ def chat_completion(
|
|
56
65
|
messages=messages,
|
57
66
|
model=model,
|
58
67
|
client=client,
|
68
|
+
tools=tools,
|
69
|
+
max_tokens=max_tokens,
|
70
|
+
temperature=temperature,
|
71
|
+
json_mode=json_mode,
|
72
|
+
seed=seed,
|
73
|
+
**kwargs,
|
74
|
+
)
|
75
|
+
elif isinstance(client, ChatCompletionsClient):
|
76
|
+
response = chat_completion_gh_models.chat_completion(
|
77
|
+
messages=messages, # type: ignore
|
78
|
+
model=model,
|
79
|
+
client=client,
|
80
|
+
tools=tools, # type: ignore
|
59
81
|
max_tokens=max_tokens,
|
60
82
|
temperature=temperature,
|
61
83
|
json_mode=json_mode,
|
@@ -68,6 +90,9 @@ def chat_completion(
|
|
68
90
|
# Parse the responses to be consistent
|
69
91
|
response_data = {}
|
70
92
|
response_data["message"] = response.get("message")
|
93
|
+
if response.get("tool_names") and response.get("tool_args_list"):
|
94
|
+
response_data["tool_names"] = response.get("tool_names")
|
95
|
+
response_data["tool_args_list"] = response.get("tool_args_list")
|
71
96
|
response_data["completion_tokens"] = response.get("completion_tokens")
|
72
97
|
response_data["prompt_tokens"] = response.get("prompt_tokens")
|
73
98
|
response_data["response_duration"] = response.get("response_duration")
|
File without changes
|
{not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/ollama/chat_completion.py
RENAMED
@@ -13,6 +13,7 @@ def chat_completion(
|
|
13
13
|
messages: list[dict[str, Any]],
|
14
14
|
model: str,
|
15
15
|
client: Client,
|
16
|
+
tools: list[dict[str, Any]] | None = None,
|
16
17
|
max_tokens: int | None = None,
|
17
18
|
context_window: int | None = None,
|
18
19
|
temperature: float = 0.8,
|
@@ -27,6 +28,8 @@ def chat_completion(
|
|
27
28
|
messages (list[dict[str, Any]]): A list of messages to send to the model.
|
28
29
|
model (str): The model to use.
|
29
30
|
client (Client): The Ollama client.
|
31
|
+
tools (list[dict[str, Any]], optional):A list of tools the model may call.
|
32
|
+
Use this to provide a list of functions the model may generate JSON inputs for. Defaults to None.
|
30
33
|
max_tokens (int, optional): The maximum number of tokens to generate. Ollama calls this `num_predict`.
|
31
34
|
context_window (int, optional): The number of tokens to consider as context. Ollama calls this `num_ctx`.
|
32
35
|
temperature (float, optional): The temperature of the model. Increasing the temperature will make the model answer more creatively.
|
@@ -38,6 +41,9 @@ def chat_completion(
|
|
38
41
|
dict[str, Any]: A dictionary with the following keys
|
39
42
|
message (str | dict): The content of the generated assistant message.
|
40
43
|
If json_mode is True, this will be a dictionary.
|
44
|
+
tool_names (list[str], optional): The names of the tools called by the model.
|
45
|
+
If the model does not support tools, a ResponseError is raised.
|
46
|
+
tool_args_list (list[dict], optional): The arguments of the tools called by the model.
|
41
47
|
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
42
48
|
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
43
49
|
response_duration (float): The time, in seconds, taken to generate the response by using the model.
|
@@ -59,6 +65,8 @@ def chat_completion(
|
|
59
65
|
}
|
60
66
|
if json_mode:
|
61
67
|
all_args["format"] = "json"
|
68
|
+
if tools:
|
69
|
+
all_args["tools"] = tools
|
62
70
|
|
63
71
|
try:
|
64
72
|
start_time = time.time()
|
@@ -77,24 +85,27 @@ def chat_completion(
|
|
77
85
|
|
78
86
|
response_data: dict[str, Any] = {}
|
79
87
|
|
80
|
-
|
81
|
-
message = response["message"].get("content", None) # type: ignore
|
88
|
+
message = response["message"].get("content", "")
|
82
89
|
if message and json_mode:
|
83
90
|
with contextlib.suppress(json.JSONDecodeError):
|
84
91
|
message = json.loads(message)
|
85
|
-
|
86
|
-
|
92
|
+
response_data["message"] = message
|
93
|
+
|
94
|
+
if response["message"].get("tool_calls"):
|
95
|
+
tool_calls = response["message"]["tool_calls"]
|
96
|
+
tool_names = [tool_call["function"]["name"] for tool_call in tool_calls]
|
97
|
+
tool_args_list = [tool_call["function"]["arguments"] for tool_call in tool_calls]
|
98
|
+
response_data["tool_names"] = tool_names
|
99
|
+
response_data["tool_args_list"] = tool_args_list
|
87
100
|
|
88
101
|
tokenizer = load_tokenizer(model)
|
89
102
|
prompt_tokens = num_tokens_from_messages(messages, tokenizer)
|
90
103
|
response_data["prompt_tokens"] = prompt_tokens
|
91
104
|
|
92
|
-
|
93
|
-
response_data["completion_tokens"] = response.get("eval_count", None) # type: ignore
|
105
|
+
response_data["completion_tokens"] = response.get("eval_count", None)
|
94
106
|
if response_data["completion_tokens"] is None:
|
95
107
|
response_data["completion_tokens"] = num_tokens_in_string(str(response_data["message"]), tokenizer)
|
96
108
|
|
97
|
-
# Get the latency of the response
|
98
109
|
response_data["response_duration"] = round(response_duration, 4)
|
99
110
|
|
100
111
|
return response_data
|
{not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/ollama/model_mapping.py
RENAMED
@@ -7,6 +7,7 @@ i.e. all phi3 models will start with "phi3".
|
|
7
7
|
OLLAMA_MODEL_MAPPING = {
|
8
8
|
"phi3": "microsoft/Phi-3-mini-4k-instruct",
|
9
9
|
"llama3:": "nvidia/Llama3-ChatQA-1.5-8B", # Using this version to get around needed to accept an agreement to get access to the tokenizer
|
10
|
+
"llama3.1": "unsloth/Meta-Llama-3.1-8B-Instruct",
|
10
11
|
"gemma": "google/gemma-1.1-7b-it", # Requires HF_TOKEN set and accepting the agreement on the HF model page
|
11
12
|
"qwen2": "Qwen/Qwen2-7B-Instruct",
|
12
13
|
"granite-code": "ibm-granite/granite-34b-code-instruct",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/llm/openai_api/context_management.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/huggingface/helpers.py
RENAMED
File without changes
|
{not_again_ai-0.10.3 → not_again_ai-0.12.0}/src/not_again_ai/local_llm/ollama/ollama_client.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|