not-again-ai 0.9.0__tar.gz → 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/PKG-INFO +21 -18
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/README.md +12 -11
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/pyproject.toml +12 -9
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/llm/openai_api/chat_completion.py +7 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/llm/openai_api/context_management.py +5 -3
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/llm/openai_api/tokens.py +36 -34
- not_again_ai-0.10.0/src/not_again_ai/local_llm/__init__.py +23 -0
- {not_again_ai-0.9.0/src/not_again_ai/llm → not_again_ai-0.10.0/src/not_again_ai/local_llm}/chat_completion.py +7 -3
- {not_again_ai-0.9.0/src/not_again_ai/llm → not_again_ai-0.10.0/src/not_again_ai/local_llm}/ollama/chat_completion.py +18 -13
- not_again_ai-0.10.0/src/not_again_ai/local_llm/ollama/model_mapping.py +15 -0
- not_again_ai-0.10.0/src/not_again_ai/local_llm/ollama/tokens.py +110 -0
- not_again_ai-0.10.0/src/not_again_ai/local_llm/prompts.py +38 -0
- not_again_ai-0.10.0/src/not_again_ai/local_llm/tokens.py +90 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/statistics/dependence.py +5 -5
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/viz/barplots.py +2 -2
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/viz/scatterplot.py +2 -2
- not_again_ai-0.9.0/src/not_again_ai/local_llm/huggingface/__init__.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/LICENSE +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/__init__.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/base/__init__.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/base/file_system.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/base/parallel.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/llm/__init__.py +0 -0
- {not_again_ai-0.9.0/src/not_again_ai/llm/ollama → not_again_ai-0.10.0/src/not_again_ai/llm/openai_api}/__init__.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/llm/openai_api/embeddings.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/llm/openai_api/openai_client.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/llm/openai_api/prompts.py +0 -0
- {not_again_ai-0.9.0/src/not_again_ai/llm/openai_api → not_again_ai-0.10.0/src/not_again_ai/local_llm/huggingface}/__init__.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/local_llm/huggingface/chat_completion.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/local_llm/huggingface/helpers.py +0 -0
- {not_again_ai-0.9.0/src/not_again_ai/local_llm → not_again_ai-0.10.0/src/not_again_ai/local_llm/ollama}/__init__.py +0 -0
- {not_again_ai-0.9.0/src/not_again_ai/llm → not_again_ai-0.10.0/src/not_again_ai/local_llm}/ollama/ollama_client.py +0 -0
- {not_again_ai-0.9.0/src/not_again_ai/llm → not_again_ai-0.10.0/src/not_again_ai/local_llm}/ollama/service.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/py.typed +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/statistics/__init__.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/viz/__init__.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/viz/distributions.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/viz/time_series.py +0 -0
- {not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/viz/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: not-again-ai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.0
|
4
4
|
Summary: Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place.
|
5
5
|
Home-page: https://github.com/DaveCoDev/not-again-ai
|
6
6
|
License: MIT
|
@@ -21,16 +21,18 @@ Provides-Extra: llm
|
|
21
21
|
Provides-Extra: local-llm
|
22
22
|
Provides-Extra: statistics
|
23
23
|
Provides-Extra: viz
|
24
|
-
Requires-Dist:
|
25
|
-
Requires-Dist:
|
26
|
-
Requires-Dist:
|
24
|
+
Requires-Dist: jinja2 (==3.1.4) ; extra == "local-llm"
|
25
|
+
Requires-Dist: loguru (==0.7.2)
|
26
|
+
Requires-Dist: numpy (==2.0.0) ; extra == "statistics" or extra == "viz"
|
27
|
+
Requires-Dist: ollama (==0.2.1) ; extra == "local-llm"
|
28
|
+
Requires-Dist: openai (==1.35.3) ; extra == "llm"
|
27
29
|
Requires-Dist: pandas (==2.2.2) ; extra == "viz"
|
28
30
|
Requires-Dist: python-liquid (==1.12.1) ; extra == "llm"
|
29
|
-
Requires-Dist: scikit-learn (==1.
|
30
|
-
Requires-Dist: scipy (==1.13.
|
31
|
+
Requires-Dist: scikit-learn (==1.5.0) ; extra == "statistics"
|
32
|
+
Requires-Dist: scipy (==1.13.1) ; extra == "statistics"
|
31
33
|
Requires-Dist: seaborn (==0.13.2) ; extra == "viz"
|
32
34
|
Requires-Dist: tiktoken (==0.7.0) ; extra == "llm"
|
33
|
-
Requires-Dist: transformers (==4.41.
|
35
|
+
Requires-Dist: transformers (==4.41.2) ; extra == "local-llm"
|
34
36
|
Project-URL: Documentation, https://github.com/DaveCoDev/not-again-ai
|
35
37
|
Project-URL: Repository, https://github.com/DaveCoDev/not-again-ai
|
36
38
|
Description-Content-Type: text/markdown
|
@@ -50,7 +52,7 @@ Description-Content-Type: text/markdown
|
|
50
52
|
[ruff-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json
|
51
53
|
[mypy-badge]: https://www.mypy-lang.org/static/mypy_badge.svg
|
52
54
|
|
53
|
-
**not-again-ai** is a collection of various building blocks that come up over and over again when developing AI products. The key goals of this package are to have simple,
|
55
|
+
**not-again-ai** is a collection of various building blocks that come up over and over again when developing AI products. The key goals of this package are to have simple, yet flexible interfaces and to minimize dependencies. It is encouraged to also **a)** use this as a template for your own Python package. **b)** instead of installing the package, copy and paste functions into your own projects. We make this easier by limiting the number of dependencies and use an MIT license.
|
54
56
|
|
55
57
|
**Documentation** available within individual **[notebooks](notebooks)**, docstrings within the source, or auto-generated at [DaveCoDev.github.io/not-again-ai/](https://DaveCoDev.github.io/not-again-ai/).
|
56
58
|
|
@@ -66,24 +68,25 @@ $ pip install not_again_ai[llm,local_llm,statistics,viz]
|
|
66
68
|
|
67
69
|
Note that local LLM requires separate installations and will not work out of the box due to how hardware dependent it is. Be sure to check the [notebooks](notebooks/local_llm/) for more details.
|
68
70
|
|
69
|
-
The package is split into subpackages, so you can install only the parts you need.
|
71
|
+
The package is split into subpackages, so you can install only the parts you need.
|
70
72
|
* **Base only**: `pip install not_again_ai`
|
71
73
|
* **LLM**: `pip install not_again_ai[llm]`
|
72
74
|
1. If you wish to use OpenAI
|
73
75
|
1. Go to https://platform.openai.com/settings/profile?tab=api-keys to get your API key.
|
74
|
-
1. (
|
76
|
+
1. (Optional) Set the `OPENAI_API_KEY` and the `OPENAI_ORG_ID` environment variables.
|
77
|
+
* **Local LLM**: `pip install not_again_ai[llm,llm_local]`
|
78
|
+
1. Some HuggingFace transformers tokenizers are gated behind access requests. If you wish to use these, you will need to request access from HuggingFace on the model card.
|
79
|
+
1. Then set the `HF_TOKEN` environment variable to your HuggingFace API token which can be found here: https://huggingface.co/settings/tokens
|
75
80
|
1. If you wish to use Ollama:
|
76
|
-
1.
|
77
|
-
1. [Add Ollama as a startup service (recommended)](https://github.com/ollama/ollama/blob/main/docs/linux.md#adding-ollama-as-a-startup-service-recommended)
|
78
|
-
1.
|
81
|
+
1. Follow the instructions at https://github.com/ollama/ollama to install Ollama for your system.
|
82
|
+
1. (Optional) [Add Ollama as a startup service (recommended)](https://github.com/ollama/ollama/blob/main/docs/linux.md#adding-ollama-as-a-startup-service-recommended)
|
83
|
+
1. (Optional) To make the Ollama service accessible on your local network from a Linux server, add the following to the `/etc/systemd/system/ollama.service` file which will make Ollama available at `http://<local_address>:11434`:
|
79
84
|
```bash
|
80
85
|
[Service]
|
81
86
|
...
|
82
87
|
Environment="OLLAMA_HOST=0.0.0.0"
|
83
88
|
```
|
84
|
-
|
85
|
-
* **Local LLM**: `pip install not_again_ai[llm_local]`
|
86
|
-
- Most of this package is hardware dependent so this only installs some generic dependencies. Be sure to check the [notebooks](notebooks/local_llm/) for more details on what is available and how to install it.
|
89
|
+
1. HuggingFace transformers and other requirements are hardware dependent so for providers other than Ollama, this only installs some generic dependencies. Check the [notebooks](notebooks/local_llm/) for more details on what is available and how to install it.
|
87
90
|
* **Statistics**: `pip install not_again_ai[statistics]`
|
88
91
|
* **Visualization**: `pip install not_again_ai[viz]`
|
89
92
|
|
@@ -302,9 +305,9 @@ Install the [Python extension](https://marketplace.visualstudio.com/items?itemNa
|
|
302
305
|
|
303
306
|
Install the [Ruff extension](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) for VSCode.
|
304
307
|
|
305
|
-
Default settings are configured in [`.vscode/settings.json`](./.vscode/settings.json)
|
308
|
+
Default settings are configured in [`.vscode/settings.json`](./.vscode/settings.json) which will enable Ruff with consistent settings.
|
306
309
|
|
307
|
-
# Documentation
|
310
|
+
# Generating Documentation
|
308
311
|
|
309
312
|
## Generating a User Guide
|
310
313
|
|
@@ -13,7 +13,7 @@
|
|
13
13
|
[ruff-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json
|
14
14
|
[mypy-badge]: https://www.mypy-lang.org/static/mypy_badge.svg
|
15
15
|
|
16
|
-
**not-again-ai** is a collection of various building blocks that come up over and over again when developing AI products. The key goals of this package are to have simple,
|
16
|
+
**not-again-ai** is a collection of various building blocks that come up over and over again when developing AI products. The key goals of this package are to have simple, yet flexible interfaces and to minimize dependencies. It is encouraged to also **a)** use this as a template for your own Python package. **b)** instead of installing the package, copy and paste functions into your own projects. We make this easier by limiting the number of dependencies and use an MIT license.
|
17
17
|
|
18
18
|
**Documentation** available within individual **[notebooks](notebooks)**, docstrings within the source, or auto-generated at [DaveCoDev.github.io/not-again-ai/](https://DaveCoDev.github.io/not-again-ai/).
|
19
19
|
|
@@ -29,24 +29,25 @@ $ pip install not_again_ai[llm,local_llm,statistics,viz]
|
|
29
29
|
|
30
30
|
Note that local LLM requires separate installations and will not work out of the box due to how hardware dependent it is. Be sure to check the [notebooks](notebooks/local_llm/) for more details.
|
31
31
|
|
32
|
-
The package is split into subpackages, so you can install only the parts you need.
|
32
|
+
The package is split into subpackages, so you can install only the parts you need.
|
33
33
|
* **Base only**: `pip install not_again_ai`
|
34
34
|
* **LLM**: `pip install not_again_ai[llm]`
|
35
35
|
1. If you wish to use OpenAI
|
36
36
|
1. Go to https://platform.openai.com/settings/profile?tab=api-keys to get your API key.
|
37
|
-
1. (
|
37
|
+
1. (Optional) Set the `OPENAI_API_KEY` and the `OPENAI_ORG_ID` environment variables.
|
38
|
+
* **Local LLM**: `pip install not_again_ai[llm,llm_local]`
|
39
|
+
1. Some HuggingFace transformers tokenizers are gated behind access requests. If you wish to use these, you will need to request access from HuggingFace on the model card.
|
40
|
+
1. Then set the `HF_TOKEN` environment variable to your HuggingFace API token which can be found here: https://huggingface.co/settings/tokens
|
38
41
|
1. If you wish to use Ollama:
|
39
|
-
1.
|
40
|
-
1. [Add Ollama as a startup service (recommended)](https://github.com/ollama/ollama/blob/main/docs/linux.md#adding-ollama-as-a-startup-service-recommended)
|
41
|
-
1.
|
42
|
+
1. Follow the instructions at https://github.com/ollama/ollama to install Ollama for your system.
|
43
|
+
1. (Optional) [Add Ollama as a startup service (recommended)](https://github.com/ollama/ollama/blob/main/docs/linux.md#adding-ollama-as-a-startup-service-recommended)
|
44
|
+
1. (Optional) To make the Ollama service accessible on your local network from a Linux server, add the following to the `/etc/systemd/system/ollama.service` file which will make Ollama available at `http://<local_address>:11434`:
|
42
45
|
```bash
|
43
46
|
[Service]
|
44
47
|
...
|
45
48
|
Environment="OLLAMA_HOST=0.0.0.0"
|
46
49
|
```
|
47
|
-
|
48
|
-
* **Local LLM**: `pip install not_again_ai[llm_local]`
|
49
|
-
- Most of this package is hardware dependent so this only installs some generic dependencies. Be sure to check the [notebooks](notebooks/local_llm/) for more details on what is available and how to install it.
|
50
|
+
1. HuggingFace transformers and other requirements are hardware dependent so for providers other than Ollama, this only installs some generic dependencies. Check the [notebooks](notebooks/local_llm/) for more details on what is available and how to install it.
|
50
51
|
* **Statistics**: `pip install not_again_ai[statistics]`
|
51
52
|
* **Visualization**: `pip install not_again_ai[viz]`
|
52
53
|
|
@@ -265,9 +266,9 @@ Install the [Python extension](https://marketplace.visualstudio.com/items?itemNa
|
|
265
266
|
|
266
267
|
Install the [Ruff extension](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) for VSCode.
|
267
268
|
|
268
|
-
Default settings are configured in [`.vscode/settings.json`](./.vscode/settings.json)
|
269
|
+
Default settings are configured in [`.vscode/settings.json`](./.vscode/settings.json) which will enable Ruff with consistent settings.
|
269
270
|
|
270
|
-
# Documentation
|
271
|
+
# Generating Documentation
|
271
272
|
|
272
273
|
## Generating a User Guide
|
273
274
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "not-again-ai"
|
3
|
-
version = "0.
|
3
|
+
version = "0.10.0"
|
4
4
|
description = "Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place."
|
5
5
|
authors = ["DaveCoDev <dave.co.dev@gmail.com>"]
|
6
6
|
license = "MIT"
|
@@ -26,21 +26,24 @@ classifiers = [
|
|
26
26
|
# result in an old version being resolved/locked.
|
27
27
|
python = "^3.11 || ^3.12"
|
28
28
|
|
29
|
+
loguru = { version = "==0.7.2" }
|
30
|
+
|
29
31
|
# Optional dependencies are defined here, and groupings are defined below.
|
30
|
-
|
31
|
-
|
32
|
-
|
32
|
+
jinja2 = { version = "==3.1.4", optional = true }
|
33
|
+
numpy = { version = "==2.0.0", optional = true }
|
34
|
+
ollama = { version = "==0.2.1", optional = true }
|
35
|
+
openai = { version = "==1.35.3", optional = true }
|
33
36
|
pandas = { version = "==2.2.2", optional = true }
|
34
37
|
python-liquid = { version = "==1.12.1", optional = true }
|
35
|
-
scipy = { version = "==1.13.
|
36
|
-
scikit-learn = { version = "==1.
|
38
|
+
scipy = { version = "==1.13.1", optional = true }
|
39
|
+
scikit-learn = { version = "==1.5.0", optional = true }
|
37
40
|
seaborn = { version = "==0.13.2", optional = true }
|
38
41
|
tiktoken = { version = "==0.7.0", optional = true }
|
39
|
-
transformers = { version = "==4.41.
|
42
|
+
transformers = { version = "==4.41.2", optional = true }
|
40
43
|
|
41
44
|
[tool.poetry.extras]
|
42
|
-
llm = ["
|
43
|
-
local_llm = ["transformers"]
|
45
|
+
llm = ["openai", "python-liquid", "tiktoken"]
|
46
|
+
local_llm = ["jinja2", "ollama", "transformers"]
|
44
47
|
statistics = ["numpy", "scikit-learn", "scipy"]
|
45
48
|
viz = ["numpy", "pandas", "seaborn"]
|
46
49
|
|
{not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/llm/openai_api/chat_completion.py
RENAMED
@@ -1,5 +1,6 @@
|
|
1
1
|
import contextlib
|
2
2
|
import json
|
3
|
+
import time
|
3
4
|
from typing import Any
|
4
5
|
|
5
6
|
from openai import OpenAI
|
@@ -71,6 +72,7 @@ def chat_completion(
|
|
71
72
|
NOTE: If n > 1 this is the sum of all completions.
|
72
73
|
'prompt_tokens' (int): The number of tokens in the messages sent to the model.
|
73
74
|
'system_fingerprint' (str, optional): If seed is set, a unique identifier for the model used to generate the response.
|
75
|
+
'response_duration' (float): The time, in seconds, taken to generate the response from the API.
|
74
76
|
"""
|
75
77
|
response_format = {"type": "json_object"} if json_mode else None
|
76
78
|
|
@@ -100,7 +102,10 @@ def chat_completion(
|
|
100
102
|
if logprobs[0] and logprobs[1] is not None:
|
101
103
|
kwargs["top_logprobs"] = logprobs[1]
|
102
104
|
|
105
|
+
start_time = time.time()
|
103
106
|
response = client.chat.completions.create(**kwargs)
|
107
|
+
end_time = time.time()
|
108
|
+
response_duration = end_time - start_time
|
104
109
|
|
105
110
|
response_data: dict[str, Any] = {"choices": []}
|
106
111
|
for response_choice in response.choices:
|
@@ -160,6 +165,8 @@ def chat_completion(
|
|
160
165
|
if seed is not None and response.system_fingerprint is not None:
|
161
166
|
response_data["system_fingerprint"] = response.system_fingerprint
|
162
167
|
|
168
|
+
response_data["response_duration"] = response_duration
|
169
|
+
|
163
170
|
if len(response_data["choices"]) == 1:
|
164
171
|
response_data.update(response_data["choices"][0])
|
165
172
|
del response_data["choices"]
|
{not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/llm/openai_api/context_management.py
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
import copy
|
2
2
|
|
3
|
-
from not_again_ai.llm.openai_api.tokens import num_tokens_from_messages, truncate_str
|
3
|
+
from not_again_ai.llm.openai_api.tokens import load_tokenizer, num_tokens_from_messages, truncate_str
|
4
4
|
|
5
5
|
|
6
6
|
def _inject_variable(
|
@@ -39,6 +39,7 @@ def priority_truncation(
|
|
39
39
|
token_limit: The maximum number of tokens allowed in the messages.
|
40
40
|
model: The model to use for tokenization. Defaults to "gpt-3.5-turbo-0125".
|
41
41
|
"""
|
42
|
+
tokenizer = load_tokenizer(model)
|
42
43
|
|
43
44
|
# Check if all variables in the priority list are in the variables dict.
|
44
45
|
# If not, add the missing variables into priority in any order.
|
@@ -49,7 +50,8 @@ def priority_truncation(
|
|
49
50
|
messages_formatted = copy.deepcopy(messages_unformatted)
|
50
51
|
for var in priority:
|
51
52
|
# Count the current number of tokens in messages_formatted and compute a remaining token budget.
|
52
|
-
|
53
|
+
tokenizer = load_tokenizer(model)
|
54
|
+
num_tokens = num_tokens_from_messages(messages_formatted, tokenizer=tokenizer, model=model)
|
53
55
|
remaining_tokens = token_limit - num_tokens
|
54
56
|
if remaining_tokens <= 0:
|
55
57
|
break
|
@@ -60,7 +62,7 @@ def priority_truncation(
|
|
60
62
|
num_var_occurrences += message["content"].count("{{" + var + "}}")
|
61
63
|
|
62
64
|
# Truncate the variable to fit the remaining token budget taking into account the number of times it occurs in the messages.
|
63
|
-
truncated_var = truncate_str(variables[var], remaining_tokens // num_var_occurrences,
|
65
|
+
truncated_var = truncate_str(variables[var], remaining_tokens // num_var_occurrences, tokenizer=tokenizer)
|
64
66
|
|
65
67
|
# Inject the variable text into messages_formatted.
|
66
68
|
messages_formatted = _inject_variable(messages_formatted, var, truncated_var)
|
@@ -1,73 +1,75 @@
|
|
1
1
|
import tiktoken
|
2
2
|
|
3
3
|
|
4
|
-
def
|
5
|
-
"""
|
4
|
+
def load_tokenizer(model: str) -> tiktoken.Encoding:
|
5
|
+
"""Load the tokenizer for the given model
|
6
6
|
|
7
7
|
Args:
|
8
|
-
|
9
|
-
max_len: The maximum number of tokens to keep.
|
10
|
-
model: The model to use for tokenization. Defaults to "gpt-3.5-turbo-0125".
|
11
|
-
See https://platform.openai.com/docs/models for a list of OpenAI models.
|
8
|
+
model (str): The name of the language model to load the tokenizer for
|
12
9
|
|
13
10
|
Returns:
|
14
|
-
|
11
|
+
A tiktoken encoding object
|
15
12
|
"""
|
16
13
|
try:
|
17
14
|
encoding = tiktoken.encoding_for_model(model)
|
18
15
|
except KeyError:
|
19
16
|
print("Warning: model not found. Using cl100k_base encoding.")
|
20
17
|
encoding = tiktoken.get_encoding("cl100k_base")
|
18
|
+
return encoding
|
19
|
+
|
20
|
+
|
21
|
+
def truncate_str(text: str, max_len: int, tokenizer: tiktoken.Encoding) -> str:
|
22
|
+
"""Truncates a string to a maximum token length.
|
21
23
|
|
22
|
-
|
24
|
+
Args:
|
25
|
+
text (str): The string to truncate.
|
26
|
+
max_len (int): The maximum number of tokens to keep.
|
27
|
+
tokenizer (tiktoken.Encoding): A tiktoken encoding object
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
str: The truncated string.
|
31
|
+
"""
|
32
|
+
tokens = tokenizer.encode(text)
|
23
33
|
if len(tokens) > max_len:
|
24
34
|
tokens = tokens[:max_len]
|
25
35
|
# Decode the tokens back to a string
|
26
|
-
truncated_text =
|
36
|
+
truncated_text = tokenizer.decode(tokens)
|
27
37
|
return truncated_text
|
28
38
|
else:
|
29
39
|
return text
|
30
40
|
|
31
41
|
|
32
|
-
def num_tokens_in_string(text: str,
|
42
|
+
def num_tokens_in_string(text: str, tokenizer: tiktoken.Encoding) -> int:
|
33
43
|
"""Return the number of tokens in a string.
|
34
44
|
|
35
45
|
Args:
|
36
|
-
text: The string to count the tokens.
|
37
|
-
|
38
|
-
See https://platform.openai.com/docs/models for a list of OpenAI models.
|
46
|
+
text (str): The string to count the tokens.
|
47
|
+
tokenizer (tiktoken.Encoding): A tiktoken encoding object
|
39
48
|
|
40
49
|
Returns:
|
41
|
-
The number of tokens in the string.
|
50
|
+
int: The number of tokens in the string.
|
42
51
|
"""
|
43
|
-
|
44
|
-
encoding = tiktoken.encoding_for_model(model)
|
45
|
-
except KeyError:
|
46
|
-
print("Warning: model not found. Using cl100k_base encoding.")
|
47
|
-
encoding = tiktoken.get_encoding("cl100k_base")
|
48
|
-
return len(encoding.encode(text))
|
52
|
+
return len(tokenizer.encode(text))
|
49
53
|
|
50
54
|
|
51
|
-
def num_tokens_from_messages(
|
55
|
+
def num_tokens_from_messages(
|
56
|
+
messages: list[dict[str, str]], tokenizer: tiktoken.Encoding, model: str = "gpt-3.5-turbo-0125"
|
57
|
+
) -> int:
|
52
58
|
"""Return the number of tokens used by a list of messages.
|
53
|
-
NOTE: Does not support counting tokens used by function calling.
|
59
|
+
NOTE: Does not support counting tokens used by function calling or prompts with images.
|
54
60
|
Reference: # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
|
55
61
|
and https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
56
62
|
|
57
63
|
Args:
|
58
|
-
messages: A list of messages to count the tokens
|
64
|
+
messages (list[dict[str, str]]): A list of messages to count the tokens
|
59
65
|
should ideally be the result after calling llm.prompts.chat_prompt.
|
60
|
-
|
66
|
+
tokenizer (tiktoken.Encoding): A tiktoken encoding object
|
67
|
+
model (str): The model to use for tokenization. Defaults to "gpt-3.5-turbo-0125".
|
61
68
|
See https://platform.openai.com/docs/models for a list of OpenAI models.
|
62
69
|
|
63
70
|
Returns:
|
64
|
-
The number of tokens used by the messages.
|
71
|
+
int: The number of tokens used by the messages.
|
65
72
|
"""
|
66
|
-
try:
|
67
|
-
encoding = tiktoken.encoding_for_model(model)
|
68
|
-
except KeyError:
|
69
|
-
print("Warning: model not found. Using cl100k_base encoding.")
|
70
|
-
encoding = tiktoken.get_encoding("cl100k_base")
|
71
73
|
if model in {
|
72
74
|
"gpt-3.5-turbo-0613",
|
73
75
|
"gpt-3.5-turbo-16k-0613",
|
@@ -92,11 +94,11 @@ def num_tokens_from_messages(messages: list[dict[str, str]], model: str = "gpt-3
|
|
92
94
|
tokens_per_name = -1
|
93
95
|
# Approximate catch-all. Assumes future versions of 3.5 and 4 will have the same token counts as the 0613 versions.
|
94
96
|
elif "gpt-3.5-turbo" in model:
|
95
|
-
return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
|
97
|
+
return num_tokens_from_messages(messages, tokenizer=tokenizer, model="gpt-3.5-turbo-0613")
|
96
98
|
elif "gpt-4o" in model:
|
97
|
-
return num_tokens_from_messages(messages, model="gpt-4o-2024-05-13")
|
99
|
+
return num_tokens_from_messages(messages, tokenizer=tokenizer, model="gpt-4o-2024-05-13")
|
98
100
|
elif "gpt-4" in model:
|
99
|
-
return num_tokens_from_messages(messages, model="gpt-4-0613")
|
101
|
+
return num_tokens_from_messages(messages, tokenizer=tokenizer, model="gpt-4-0613")
|
100
102
|
else:
|
101
103
|
raise NotImplementedError(
|
102
104
|
f"""num_tokens_from_messages() is not implemented for model {model}.
|
@@ -106,7 +108,7 @@ See https://github.com/openai/openai-python/blob/main/chatml.md for information
|
|
106
108
|
for message in messages:
|
107
109
|
num_tokens += tokens_per_message
|
108
110
|
for key, value in message.items():
|
109
|
-
num_tokens += len(
|
111
|
+
num_tokens += len(tokenizer.encode(value))
|
110
112
|
if key == "name":
|
111
113
|
num_tokens += tokens_per_name
|
112
114
|
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
import importlib.util
|
2
|
+
import os
|
3
|
+
|
4
|
+
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
|
5
|
+
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
6
|
+
|
7
|
+
if (
|
8
|
+
importlib.util.find_spec("liquid") is None
|
9
|
+
or importlib.util.find_spec("ollama") is None
|
10
|
+
or importlib.util.find_spec("openai") is None
|
11
|
+
or importlib.util.find_spec("tiktoken") is None
|
12
|
+
or importlib.util.find_spec("transformers") is None
|
13
|
+
):
|
14
|
+
raise ImportError(
|
15
|
+
"not_again_ai.local_llm requires the 'llm' and 'local_llm' extra to be installed. "
|
16
|
+
"You can install it using 'pip install not_again_ai[llm,local_llm]'."
|
17
|
+
)
|
18
|
+
else:
|
19
|
+
import liquid # noqa: F401
|
20
|
+
import ollama # noqa: F401
|
21
|
+
import openai # noqa: F401
|
22
|
+
import tiktoken # noqa: F401
|
23
|
+
import transformers # noqa: F401
|
@@ -3,8 +3,8 @@ from typing import Any
|
|
3
3
|
from ollama import Client
|
4
4
|
from openai import OpenAI
|
5
5
|
|
6
|
-
from not_again_ai.llm.ollama import chat_completion as chat_completion_ollama
|
7
6
|
from not_again_ai.llm.openai_api import chat_completion as chat_completion_openai
|
7
|
+
from not_again_ai.local_llm.ollama import chat_completion as chat_completion_ollama
|
8
8
|
|
9
9
|
|
10
10
|
def chat_completion(
|
@@ -34,7 +34,9 @@ def chat_completion(
|
|
34
34
|
dict[str, Any]: A dictionary with the following keys
|
35
35
|
message (str | dict): The content of the generated assistant message.
|
36
36
|
If json_mode is True, this will be a dictionary.
|
37
|
+
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
37
38
|
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
39
|
+
response_duration (float): The time, in seconds, taken to generate the response by using the model.
|
38
40
|
extras (dict): This will contain any additional fields returned by corresponding provider.
|
39
41
|
"""
|
40
42
|
# Determine which chat_completion function to call based on the client type
|
@@ -65,8 +67,10 @@ def chat_completion(
|
|
65
67
|
|
66
68
|
# Parse the responses to be consistent
|
67
69
|
response_data = {}
|
68
|
-
response_data["message"] = response.get("message"
|
69
|
-
response_data["completion_tokens"] = response.get("completion_tokens"
|
70
|
+
response_data["message"] = response.get("message")
|
71
|
+
response_data["completion_tokens"] = response.get("completion_tokens")
|
72
|
+
response_data["prompt_tokens"] = response.get("prompt_tokens")
|
73
|
+
response_data["response_duration"] = response.get("response_duration")
|
70
74
|
|
71
75
|
# Return any additional fields from the response in an "extras" dictionary
|
72
76
|
extras = {k: v for k, v in response.items() if k not in response_data}
|
@@ -1,14 +1,12 @@
|
|
1
1
|
import contextlib
|
2
2
|
import json
|
3
3
|
import re
|
4
|
+
import time
|
4
5
|
from typing import Any
|
5
6
|
|
6
7
|
from ollama import Client, ResponseError
|
7
8
|
|
8
|
-
|
9
|
-
def _convert_duration(nanoseconds: int) -> float:
|
10
|
-
seconds = nanoseconds / 1_000_000_000
|
11
|
-
return round(seconds, 5)
|
9
|
+
from not_again_ai.local_llm.ollama.tokens import load_tokenizer, num_tokens_from_messages, num_tokens_in_string
|
12
10
|
|
13
11
|
|
14
12
|
def chat_completion(
|
@@ -40,8 +38,9 @@ def chat_completion(
|
|
40
38
|
dict[str, Any]: A dictionary with the following keys
|
41
39
|
message (str | dict): The content of the generated assistant message.
|
42
40
|
If json_mode is True, this will be a dictionary.
|
41
|
+
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
43
42
|
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
44
|
-
response_duration (float): The time taken to generate the response
|
43
|
+
response_duration (float): The time, in seconds, taken to generate the response by using the model.
|
45
44
|
"""
|
46
45
|
|
47
46
|
options = {
|
@@ -62,7 +61,10 @@ def chat_completion(
|
|
62
61
|
all_args["format"] = "json"
|
63
62
|
|
64
63
|
try:
|
65
|
-
|
64
|
+
start_time = time.time()
|
65
|
+
response = client.chat(**all_args) # type: ignore
|
66
|
+
end_time = time.time()
|
67
|
+
response_duration = end_time - start_time
|
66
68
|
except ResponseError as e:
|
67
69
|
# If the error says "model 'model' not found" use regex then raise a more specific error
|
68
70
|
expected_pattern = f"model '{model}' not found"
|
@@ -71,25 +73,28 @@ def chat_completion(
|
|
71
73
|
f"Model '{model}' not found. Please use not_again_ai.llm.ollama.service.pull() first."
|
72
74
|
) from e
|
73
75
|
else:
|
74
|
-
raise ResponseError(e.
|
76
|
+
raise ResponseError(e.error) from e
|
75
77
|
|
76
78
|
response_data: dict[str, Any] = {}
|
77
79
|
|
78
80
|
# Handle getting the message returned by the model
|
79
|
-
message = response["message"].get("content", None)
|
81
|
+
message = response["message"].get("content", None) # type: ignore
|
80
82
|
if message and json_mode:
|
81
83
|
with contextlib.suppress(json.JSONDecodeError):
|
82
84
|
message = json.loads(message)
|
83
85
|
if message:
|
84
86
|
response_data["message"] = message
|
85
87
|
|
88
|
+
tokenizer = load_tokenizer(model)
|
89
|
+
prompt_tokens = num_tokens_from_messages(messages, tokenizer)
|
90
|
+
response_data["prompt_tokens"] = prompt_tokens
|
91
|
+
|
86
92
|
# Get the number of tokens generated
|
87
|
-
response_data["completion_tokens"] = response.get("eval_count", None)
|
93
|
+
response_data["completion_tokens"] = response.get("eval_count", None) # type: ignore
|
94
|
+
if response_data["completion_tokens"] is None:
|
95
|
+
response_data["completion_tokens"] = num_tokens_in_string(str(response_data["message"]), tokenizer)
|
88
96
|
|
89
97
|
# Get the latency of the response
|
90
|
-
|
91
|
-
response_data["response_duration"] = _convert_duration(response["total_duration"])
|
92
|
-
else:
|
93
|
-
response_data["response_duration"] = None
|
98
|
+
response_data["response_duration"] = response_duration
|
94
99
|
|
95
100
|
return response_data
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"""Hardcoded mapping from ollama model names to their associated HuggingFace tokenizer.
|
2
|
+
|
3
|
+
Given the way that Ollama models are tagged, we can against the first part of the model name,
|
4
|
+
i.e. all phi3 models will start with "phi3".
|
5
|
+
"""
|
6
|
+
|
7
|
+
OLLAMA_MODEL_MAPPING = {
|
8
|
+
"phi3": "microsoft/Phi-3-mini-4k-instruct",
|
9
|
+
"llama3:": "nvidia/Llama3-ChatQA-1.5-8B", # Using this version to get around needed to accept an agreement to get access to the tokenizer
|
10
|
+
"gemma": "google/gemma-1.1-7b-it", # Requires HF_TOKEN set and accepting the agreement on the HF model page
|
11
|
+
"qwen2": "Qwen/Qwen2-7B-Instruct",
|
12
|
+
"granite-code": "ibm-granite/granite-34b-code-instruct",
|
13
|
+
"llama3-gradient": "nvidia/Llama3-ChatQA-1.5-8B",
|
14
|
+
"command-r": "CohereForAI/c4ai-command-r-v01",
|
15
|
+
}
|
@@ -0,0 +1,110 @@
|
|
1
|
+
"""By default use the associated huggingface transformer tokenizer.
|
2
|
+
If it does not exist in the mapping, default to tiktoken with some buffer (const + percentage)"""
|
3
|
+
|
4
|
+
import os
|
5
|
+
|
6
|
+
from loguru import logger
|
7
|
+
import tiktoken
|
8
|
+
|
9
|
+
from not_again_ai.llm.openai_api.tokens import num_tokens_from_messages as openai_num_tokens_from_messages
|
10
|
+
from not_again_ai.local_llm.ollama.model_mapping import OLLAMA_MODEL_MAPPING
|
11
|
+
|
12
|
+
# Prevents the transformers library from printing advisories that are not relevant to this code like not having torch installed.
|
13
|
+
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
14
|
+
|
15
|
+
from transformers import AutoTokenizer # noqa: E402
|
16
|
+
|
17
|
+
TIKTOKEN_NUM_TOKENS_BUFFER = 10
|
18
|
+
TIKTOKEN_PERCENT_TOKENS_BUFFER = 1.1
|
19
|
+
|
20
|
+
|
21
|
+
def load_tokenizer(model: str) -> AutoTokenizer | tiktoken.Encoding:
|
22
|
+
"""Use the model mapping to load the appropriate tokenizer
|
23
|
+
|
24
|
+
Args:
|
25
|
+
model: The name of the language model to load the tokenizer for
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
Either a HuggingFace tokenizer or a tiktoken encoding object
|
29
|
+
"""
|
30
|
+
|
31
|
+
# Loop over the keys in the model mapping checking if the model starts with the key
|
32
|
+
for key in OLLAMA_MODEL_MAPPING:
|
33
|
+
if model.startswith(key):
|
34
|
+
return AutoTokenizer.from_pretrained(OLLAMA_MODEL_MAPPING[key], use_fast=True)
|
35
|
+
|
36
|
+
# If the model does not start with any key in the model mapping, default to tiktoken
|
37
|
+
logger.warning(
|
38
|
+
f'Model "{model}" not found in OLLAMA_MODEL_MAPPING. Using tiktoken - token counts will have an added buffer of \
|
39
|
+
{TIKTOKEN_PERCENT_TOKENS_BUFFER * 100}% plus {TIKTOKEN_NUM_TOKENS_BUFFER} tokens.'
|
40
|
+
)
|
41
|
+
tokenizer = tiktoken.get_encoding("o200k_base")
|
42
|
+
return tokenizer
|
43
|
+
|
44
|
+
|
45
|
+
def truncate_str(text: str, max_len: int, tokenizer: AutoTokenizer | tiktoken.Encoding) -> str:
|
46
|
+
"""Truncates a string to a maximum token length.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
text: The string to truncate.
|
50
|
+
max_len: The maximum number of tokens to keep.
|
51
|
+
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
str: The truncated string.
|
55
|
+
"""
|
56
|
+
if isinstance(tokenizer, tiktoken.Encoding):
|
57
|
+
tokens = tokenizer.encode(text)
|
58
|
+
if len(tokens) > max_len:
|
59
|
+
tokens = tokens[:max_len]
|
60
|
+
truncated_text = tokenizer.decode(tokens)
|
61
|
+
return truncated_text
|
62
|
+
else:
|
63
|
+
tokens = tokenizer(text, return_tensors=None)["input_ids"]
|
64
|
+
if len(tokens) > max_len:
|
65
|
+
tokens = tokens[:max_len]
|
66
|
+
truncated_text = tokenizer.decode(tokens)
|
67
|
+
return truncated_text
|
68
|
+
|
69
|
+
return text
|
70
|
+
|
71
|
+
|
72
|
+
def num_tokens_in_string(text: str, tokenizer: AutoTokenizer | tiktoken.Encoding) -> int:
|
73
|
+
"""Return the number of tokens in a string.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
text: The string to count the tokens.
|
77
|
+
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
int: The number of tokens in the string.
|
81
|
+
"""
|
82
|
+
if isinstance(tokenizer, tiktoken.Encoding):
|
83
|
+
num_tokens = (len(tokenizer.encode(text)) * TIKTOKEN_PERCENT_TOKENS_BUFFER) + TIKTOKEN_NUM_TOKENS_BUFFER
|
84
|
+
return int(num_tokens)
|
85
|
+
else:
|
86
|
+
tokens = tokenizer(text, return_tensors=None)["input_ids"]
|
87
|
+
return len(tokens)
|
88
|
+
|
89
|
+
|
90
|
+
def num_tokens_from_messages(messages: list[dict[str, str]], tokenizer: AutoTokenizer | tiktoken.Encoding) -> int:
|
91
|
+
"""Return the number of tokens used by a list of messages.
|
92
|
+
For models with HuggingFace tokenizers, uses
|
93
|
+
|
94
|
+
Args:
|
95
|
+
messages: A list of messages to count the tokens
|
96
|
+
should ideally be the result after calling llm.prompts.chat_prompt.
|
97
|
+
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
int: The number of tokens used by the messages.
|
101
|
+
"""
|
102
|
+
if isinstance(tokenizer, tiktoken.Encoding):
|
103
|
+
num_tokens = (
|
104
|
+
openai_num_tokens_from_messages(messages, tokenizer=tokenizer, model="gpt-4o")
|
105
|
+
* TIKTOKEN_PERCENT_TOKENS_BUFFER
|
106
|
+
) + TIKTOKEN_NUM_TOKENS_BUFFER
|
107
|
+
return int(num_tokens)
|
108
|
+
else:
|
109
|
+
tokens = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors=None)
|
110
|
+
return len(tokens)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
from copy import deepcopy
|
2
|
+
|
3
|
+
from liquid import Template
|
4
|
+
|
5
|
+
|
6
|
+
def chat_prompt(messages_unformatted: list[dict[str, str]], variables: dict[str, str]) -> list[dict[str, str]]:
|
7
|
+
"""Formats a list of messages for chat completion models using Liquid templating.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
messages_unformatted: A list of dictionaries where each dictionary
|
11
|
+
represents a message. Each message must have 'role' and 'content'
|
12
|
+
keys with string values, where content is a Liquid template.
|
13
|
+
variables: A dictionary where each key-value pair represents a variable
|
14
|
+
name and its value for template rendering.
|
15
|
+
|
16
|
+
Returns:
|
17
|
+
A list of dictionaries with the same structure as `messages_unformatted`,
|
18
|
+
but with the 'content' of each message with the provided `variables`.
|
19
|
+
|
20
|
+
Examples:
|
21
|
+
>>> messages = [
|
22
|
+
... {"role": "system", "content": "You are a helpful assistant."},
|
23
|
+
... {"role": "user", "content": "Help me {{task}}"}
|
24
|
+
... ]
|
25
|
+
>>> vars = {"task": "write Python code for the fibonnaci sequence"}
|
26
|
+
>>> chat_prompt(messages, vars)
|
27
|
+
[
|
28
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
29
|
+
{"role": "user", "content": "Help me write Python code for the fibonnaci sequence"}
|
30
|
+
]
|
31
|
+
"""
|
32
|
+
|
33
|
+
messages_formatted = deepcopy(messages_unformatted)
|
34
|
+
for message in messages_formatted:
|
35
|
+
liquid_template = Template(message["content"])
|
36
|
+
message["content"] = liquid_template.render(**variables)
|
37
|
+
|
38
|
+
return messages_formatted
|
@@ -0,0 +1,90 @@
|
|
1
|
+
import tiktoken
|
2
|
+
from transformers import AutoTokenizer
|
3
|
+
|
4
|
+
from not_again_ai.llm.openai_api.tokens import load_tokenizer as openai_load_tokenizer
|
5
|
+
from not_again_ai.llm.openai_api.tokens import num_tokens_from_messages as openai_num_tokens_from_messages
|
6
|
+
from not_again_ai.llm.openai_api.tokens import num_tokens_in_string as openai_num_tokens_in_string
|
7
|
+
from not_again_ai.llm.openai_api.tokens import truncate_str as openai_truncate_str
|
8
|
+
from not_again_ai.local_llm.ollama.tokens import load_tokenizer as ollama_load_tokenizer
|
9
|
+
from not_again_ai.local_llm.ollama.tokens import num_tokens_from_messages as ollama_num_tokens_from_messages
|
10
|
+
from not_again_ai.local_llm.ollama.tokens import num_tokens_in_string as ollama_num_tokens_in_string
|
11
|
+
from not_again_ai.local_llm.ollama.tokens import truncate_str as ollama_truncate_str
|
12
|
+
|
13
|
+
|
14
|
+
def load_tokenizer(model: str, provider: str) -> AutoTokenizer | tiktoken.Encoding:
|
15
|
+
"""Load the tokenizer for the given model and providers
|
16
|
+
|
17
|
+
Args:
|
18
|
+
model (str): The name of the language model to load the tokenizer for
|
19
|
+
provider (str): Either "openai_api" or "ollama"
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
Either a HuggingFace tokenizer or a tiktoken encoding object
|
23
|
+
"""
|
24
|
+
if provider == "openai_api":
|
25
|
+
return openai_load_tokenizer(model)
|
26
|
+
elif provider == "ollama":
|
27
|
+
return ollama_load_tokenizer(model)
|
28
|
+
else:
|
29
|
+
raise ValueError(f"Unknown tokenizer provider {provider}")
|
30
|
+
|
31
|
+
|
32
|
+
def truncate_str(text: str, max_len: int, tokenizer: AutoTokenizer | tiktoken.Encoding, provider: str) -> str:
|
33
|
+
"""Truncates a string to a maximum token length.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
text: The string to truncate.
|
37
|
+
max_len: The maximum number of tokens to keep.
|
38
|
+
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
39
|
+
provider (str): Either "openai_api" or "ollama"
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
str: The truncated string.
|
43
|
+
"""
|
44
|
+
if provider == "openai_api":
|
45
|
+
return openai_truncate_str(text, max_len, tokenizer)
|
46
|
+
elif provider == "ollama":
|
47
|
+
return ollama_truncate_str(text, max_len, tokenizer)
|
48
|
+
else:
|
49
|
+
raise ValueError(f'Unknown tokenizer provider "{provider}"')
|
50
|
+
|
51
|
+
|
52
|
+
def num_tokens_in_string(text: str, tokenizer: AutoTokenizer | tiktoken.Encoding, provider: str) -> int:
|
53
|
+
"""Return the number of tokens in a string.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
text: The string to count the tokens.
|
57
|
+
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
58
|
+
provider (str): Either "openai_api" or "ollama"
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
int: The number of tokens in the string.
|
62
|
+
"""
|
63
|
+
if provider == "openai_api":
|
64
|
+
return openai_num_tokens_in_string(text, tokenizer)
|
65
|
+
elif provider == "ollama":
|
66
|
+
return ollama_num_tokens_in_string(text, tokenizer)
|
67
|
+
else:
|
68
|
+
raise ValueError(f'Unknown tokenizer provider "{provider}"')
|
69
|
+
|
70
|
+
|
71
|
+
def num_tokens_from_messages(
|
72
|
+
messages: list[dict[str, str]], tokenizer: AutoTokenizer | tiktoken.Encoding, provider: str
|
73
|
+
) -> int:
|
74
|
+
"""Return the number of tokens used by a list of messages.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
messages: A list of messages to count the tokens
|
78
|
+
should ideally be the result after calling llm.prompts.chat_prompt.
|
79
|
+
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
80
|
+
provider (str): Either "openai_api" or "ollama"
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
int: The number of tokens used by the messages.
|
84
|
+
"""
|
85
|
+
if provider == "openai_api":
|
86
|
+
return openai_num_tokens_from_messages(messages, tokenizer)
|
87
|
+
elif provider == "ollama":
|
88
|
+
return ollama_num_tokens_from_messages(messages, tokenizer)
|
89
|
+
else:
|
90
|
+
raise ValueError(f'Unknown tokenizer provider "{provider}"')
|
@@ -8,8 +8,8 @@ import sklearn.tree as sktree
|
|
8
8
|
|
9
9
|
|
10
10
|
def _process_variable(
|
11
|
-
x: npt.NDArray[np.int_] | (npt.NDArray[np.
|
12
|
-
) -> npt.NDArray[np.int_] | (npt.NDArray[np.
|
11
|
+
x: npt.NDArray[np.int_] | (npt.NDArray[np.float64] | npt.NDArray[np.str_]),
|
12
|
+
) -> npt.NDArray[np.int_] | (npt.NDArray[np.float64] | npt.NDArray[np.str_]):
|
13
13
|
"""Process variable by encoding it as a numeric array."""
|
14
14
|
le = skpreprocessing.LabelEncoder()
|
15
15
|
x = le.fit_transform(x)
|
@@ -18,9 +18,9 @@ def _process_variable(
|
|
18
18
|
|
19
19
|
def pearson_correlation(
|
20
20
|
x: list[int]
|
21
|
-
| (list[float] | (list[str] | (npt.NDArray[np.int_] | (npt.NDArray[np.
|
21
|
+
| (list[float] | (list[str] | (npt.NDArray[np.int_] | (npt.NDArray[np.float64] | npt.NDArray[np.str_])))),
|
22
22
|
y: list[int]
|
23
|
-
| (list[float] | (list[str] | (npt.NDArray[np.int_] | (npt.NDArray[np.
|
23
|
+
| (list[float] | (list[str] | (npt.NDArray[np.int_] | (npt.NDArray[np.float64] | npt.NDArray[np.str_])))),
|
24
24
|
is_x_categorical: bool = False,
|
25
25
|
is_y_categorical: bool = False,
|
26
26
|
print_diagnostics: bool = False,
|
@@ -60,7 +60,7 @@ def pearson_correlation(
|
|
60
60
|
|
61
61
|
def pred_power_score_classification(
|
62
62
|
x: list[int]
|
63
|
-
| (list[float] | (list[str] | (npt.NDArray[np.int_] | (npt.NDArray[np.
|
63
|
+
| (list[float] | (list[str] | (npt.NDArray[np.int_] | (npt.NDArray[np.float64] | npt.NDArray[np.str_])))),
|
64
64
|
y: list[int] | (list[str] | npt.NDArray[np.int_]),
|
65
65
|
cv_splits: int = 5,
|
66
66
|
print_diagnostics: bool = False,
|
@@ -8,8 +8,8 @@ from not_again_ai.viz.utils import reset_plot_libs
|
|
8
8
|
|
9
9
|
|
10
10
|
def simple_barplot(
|
11
|
-
x: list[str] | (list[float] | (npt.NDArray[np.int_] | npt.NDArray[np.
|
12
|
-
y: list[str] | (list[float] | (npt.NDArray[np.int_] | npt.NDArray[np.
|
11
|
+
x: list[str] | (list[float] | (npt.NDArray[np.int_] | npt.NDArray[np.float64])),
|
12
|
+
y: list[str] | (list[float] | (npt.NDArray[np.int_] | npt.NDArray[np.float64])),
|
13
13
|
save_pathname: str,
|
14
14
|
order: str | None = None,
|
15
15
|
orient_bars_vertically: bool = True,
|
@@ -9,8 +9,8 @@ from not_again_ai.viz.utils import reset_plot_libs
|
|
9
9
|
|
10
10
|
|
11
11
|
def scatterplot_basic(
|
12
|
-
x: list[float] | (npt.NDArray[np.int_] | npt.NDArray[np.
|
13
|
-
y: list[float] | (npt.NDArray[np.int_] | npt.NDArray[np.
|
12
|
+
x: list[float] | (npt.NDArray[np.int_] | npt.NDArray[np.float64]),
|
13
|
+
y: list[float] | (npt.NDArray[np.int_] | npt.NDArray[np.float64]),
|
14
14
|
save_pathname: str,
|
15
15
|
title: str | None = None,
|
16
16
|
xlim: tuple[float, float] | None = None,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/local_llm/huggingface/chat_completion.py
RENAMED
File without changes
|
{not_again_ai-0.9.0 → not_again_ai-0.10.0}/src/not_again_ai/local_llm/huggingface/helpers.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|