not-again-ai 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- not_again_ai/llm/chat_completion/__init__.py +4 -0
- not_again_ai/llm/chat_completion/interface.py +32 -0
- not_again_ai/llm/chat_completion/providers/ollama_api.py +227 -0
- not_again_ai/llm/chat_completion/providers/openai_api.py +290 -0
- not_again_ai/llm/chat_completion/types.py +145 -0
- not_again_ai/llm/embedding/__init__.py +4 -0
- not_again_ai/llm/embedding/interface.py +28 -0
- not_again_ai/llm/embedding/providers/ollama_api.py +87 -0
- not_again_ai/llm/embedding/providers/openai_api.py +126 -0
- not_again_ai/llm/embedding/types.py +23 -0
- not_again_ai/llm/prompting/__init__.py +3 -0
- not_again_ai/llm/prompting/compile_prompt.py +125 -0
- not_again_ai/llm/prompting/interface.py +46 -0
- not_again_ai/llm/prompting/providers/openai_tiktoken.py +122 -0
- not_again_ai/llm/prompting/types.py +43 -0
- {not_again_ai-0.14.0.dist-info → not_again_ai-0.16.0.dist-info}/METADATA +24 -40
- not_again_ai-0.16.0.dist-info/RECORD +38 -0
- {not_again_ai-0.14.0.dist-info → not_again_ai-0.16.0.dist-info}/WHEEL +1 -1
- not_again_ai/llm/gh_models/azure_ai_client.py +0 -20
- not_again_ai/llm/gh_models/chat_completion.py +0 -81
- not_again_ai/llm/openai_api/chat_completion.py +0 -339
- not_again_ai/llm/openai_api/context_management.py +0 -70
- not_again_ai/llm/openai_api/embeddings.py +0 -62
- not_again_ai/llm/openai_api/openai_client.py +0 -78
- not_again_ai/llm/openai_api/prompts.py +0 -191
- not_again_ai/llm/openai_api/tokens.py +0 -184
- not_again_ai/local_llm/__init__.py +0 -27
- not_again_ai/local_llm/chat_completion.py +0 -105
- not_again_ai/local_llm/huggingface/chat_completion.py +0 -59
- not_again_ai/local_llm/huggingface/helpers.py +0 -23
- not_again_ai/local_llm/ollama/__init__.py +0 -0
- not_again_ai/local_llm/ollama/chat_completion.py +0 -111
- not_again_ai/local_llm/ollama/model_mapping.py +0 -17
- not_again_ai/local_llm/ollama/ollama_client.py +0 -24
- not_again_ai/local_llm/ollama/service.py +0 -81
- not_again_ai/local_llm/ollama/tokens.py +0 -104
- not_again_ai/local_llm/prompts.py +0 -38
- not_again_ai/local_llm/tokens.py +0 -90
- not_again_ai-0.14.0.dist-info/RECORD +0 -44
- not_again_ai-0.14.0.dist-info/entry_points.txt +0 -3
- /not_again_ai/llm/{gh_models → chat_completion/providers}/__init__.py +0 -0
- /not_again_ai/llm/{openai_api → embedding/providers}/__init__.py +0 -0
- /not_again_ai/{local_llm/huggingface → llm/prompting/providers}/__init__.py +0 -0
- {not_again_ai-0.14.0.dist-info → not_again_ai-0.16.0.dist-info}/LICENSE +0 -0
@@ -1,111 +0,0 @@
|
|
1
|
-
import contextlib
|
2
|
-
import json
|
3
|
-
import re
|
4
|
-
import time
|
5
|
-
from typing import Any
|
6
|
-
|
7
|
-
from ollama import Client, ResponseError
|
8
|
-
|
9
|
-
from not_again_ai.local_llm.ollama.tokens import load_tokenizer, num_tokens_from_messages, num_tokens_in_string
|
10
|
-
|
11
|
-
|
12
|
-
def chat_completion(
|
13
|
-
messages: list[dict[str, Any]],
|
14
|
-
model: str,
|
15
|
-
client: Client,
|
16
|
-
tools: list[dict[str, Any]] | None = None,
|
17
|
-
max_tokens: int | None = None,
|
18
|
-
context_window: int | None = None,
|
19
|
-
temperature: float = 0.8,
|
20
|
-
json_mode: bool = False,
|
21
|
-
seed: int | None = None,
|
22
|
-
**kwargs: Any,
|
23
|
-
) -> dict[str, Any]:
|
24
|
-
"""Gets a Ollama chat completion response, see https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion
|
25
|
-
For a full list of valid parameters: https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
|
26
|
-
|
27
|
-
Args:
|
28
|
-
messages (list[dict[str, Any]]): A list of messages to send to the model.
|
29
|
-
model (str): The model to use.
|
30
|
-
client (Client): The Ollama client.
|
31
|
-
tools (list[dict[str, Any]], optional):A list of tools the model may call.
|
32
|
-
Use this to provide a list of functions the model may generate JSON inputs for. Defaults to None.
|
33
|
-
max_tokens (int, optional): The maximum number of tokens to generate. Ollama calls this `num_predict`.
|
34
|
-
context_window (int, optional): The number of tokens to consider as context. Ollama calls this `num_ctx`.
|
35
|
-
temperature (float, optional): The temperature of the model. Increasing the temperature will make the model answer more creatively.
|
36
|
-
json_mode (bool, optional): This will structure the response as a valid JSON object.
|
37
|
-
It is important to instruct the model to use JSON in the prompt. Otherwise, the model may generate large amounts whitespace.
|
38
|
-
seed (int, optional): The seed to use for the model for reproducible outputs. Defaults to None.
|
39
|
-
|
40
|
-
Returns:
|
41
|
-
dict[str, Any]: A dictionary with the following keys
|
42
|
-
message (str | dict): The content of the generated assistant message.
|
43
|
-
If json_mode is True, this will be a dictionary.
|
44
|
-
tool_names (list[str], optional): The names of the tools called by the model.
|
45
|
-
If the model does not support tools, a ResponseError is raised.
|
46
|
-
tool_args_list (list[dict], optional): The arguments of the tools called by the model.
|
47
|
-
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
48
|
-
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
49
|
-
response_duration (float): The time, in seconds, taken to generate the response by using the model.
|
50
|
-
"""
|
51
|
-
|
52
|
-
options = {
|
53
|
-
"num_predict": max_tokens,
|
54
|
-
"num_ctx": context_window,
|
55
|
-
"temperature": temperature,
|
56
|
-
}
|
57
|
-
if seed is not None:
|
58
|
-
options["seed"] = seed
|
59
|
-
options.update(kwargs)
|
60
|
-
|
61
|
-
all_args = {
|
62
|
-
"model": model,
|
63
|
-
"messages": messages,
|
64
|
-
"options": options,
|
65
|
-
}
|
66
|
-
if json_mode:
|
67
|
-
all_args["format"] = "json"
|
68
|
-
if tools:
|
69
|
-
all_args["tools"] = tools
|
70
|
-
|
71
|
-
try:
|
72
|
-
start_time = time.time()
|
73
|
-
response = client.chat(**all_args) # type: ignore
|
74
|
-
end_time = time.time()
|
75
|
-
response_duration = end_time - start_time
|
76
|
-
except ResponseError as e:
|
77
|
-
# If the error says "model 'model' not found" use regex then raise a more specific error
|
78
|
-
expected_pattern = f"model '{model}' not found"
|
79
|
-
if re.search(expected_pattern, e.error):
|
80
|
-
raise ResponseError(
|
81
|
-
f"Model '{model}' not found. Please use not_again_ai.llm.ollama.service.pull() first."
|
82
|
-
) from e
|
83
|
-
else:
|
84
|
-
raise ResponseError(e.error) from e
|
85
|
-
|
86
|
-
response_data: dict[str, Any] = {}
|
87
|
-
|
88
|
-
message = response["message"].get("content", "")
|
89
|
-
if message and json_mode:
|
90
|
-
with contextlib.suppress(json.JSONDecodeError):
|
91
|
-
message = json.loads(message)
|
92
|
-
response_data["message"] = message
|
93
|
-
|
94
|
-
if response["message"].get("tool_calls"):
|
95
|
-
tool_calls = response["message"]["tool_calls"]
|
96
|
-
tool_names = [tool_call["function"]["name"] for tool_call in tool_calls]
|
97
|
-
tool_args_list = [tool_call["function"]["arguments"] for tool_call in tool_calls]
|
98
|
-
response_data["tool_names"] = tool_names
|
99
|
-
response_data["tool_args_list"] = tool_args_list
|
100
|
-
|
101
|
-
tokenizer = load_tokenizer(model)
|
102
|
-
prompt_tokens = num_tokens_from_messages(messages, tokenizer)
|
103
|
-
response_data["prompt_tokens"] = prompt_tokens
|
104
|
-
|
105
|
-
response_data["completion_tokens"] = response.get("eval_count", None)
|
106
|
-
if response_data["completion_tokens"] is None:
|
107
|
-
response_data["completion_tokens"] = num_tokens_in_string(str(response_data["message"]), tokenizer)
|
108
|
-
|
109
|
-
response_data["response_duration"] = round(response_duration, 4)
|
110
|
-
|
111
|
-
return response_data
|
@@ -1,17 +0,0 @@
|
|
1
|
-
"""Hardcoded mapping from ollama model names to their associated HuggingFace tokenizer.
|
2
|
-
|
3
|
-
Given the way that Ollama models are tagged, we can against the first part of the model name,
|
4
|
-
i.e. all phi3 models will start with "phi3".
|
5
|
-
"""
|
6
|
-
|
7
|
-
OLLAMA_MODEL_MAPPING = {
|
8
|
-
"phi3": "microsoft/Phi-3-mini-4k-instruct",
|
9
|
-
"llama3:": "nvidia/Llama3-ChatQA-1.5-8B", # Using this version to get around needed to accept an agreement to get access to the tokenizer
|
10
|
-
"llama3.1": "unsloth/Meta-Llama-3.1-8B-Instruct",
|
11
|
-
"gemma": "google/gemma-1.1-7b-it", # Requires HF_TOKEN set and accepting the agreement on the HF model page
|
12
|
-
"qwen2": "Qwen/Qwen2-7B-Instruct",
|
13
|
-
"granite-code": "ibm-granite/granite-34b-code-instruct",
|
14
|
-
"llama3-gradient": "nvidia/Llama3-ChatQA-1.5-8B",
|
15
|
-
"command-r": "CohereForAI/c4ai-command-r-v01",
|
16
|
-
"deepseek-coder-v2": "deepseek-ai/DeepSeek-Coder-V2-Lite-Base",
|
17
|
-
}
|
@@ -1,24 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
|
3
|
-
from ollama import Client
|
4
|
-
|
5
|
-
|
6
|
-
def ollama_client(host: str | None = None, timeout: float | None = None) -> Client:
|
7
|
-
"""Create an Ollama client instance based on the specified host or will read from the OLLAMA_HOST environment variable.
|
8
|
-
|
9
|
-
Args:
|
10
|
-
host (str, optional): The host URL of the Ollama server.
|
11
|
-
timeout (float, optional): The timeout for requests
|
12
|
-
|
13
|
-
Returns:
|
14
|
-
Client: An instance of the Ollama client.
|
15
|
-
|
16
|
-
Examples:
|
17
|
-
>>> client = client(host="http://localhost:11434")
|
18
|
-
"""
|
19
|
-
if host is None:
|
20
|
-
host = os.getenv("OLLAMA_HOST")
|
21
|
-
if host is None:
|
22
|
-
raise ValueError("Host must be provided or OLLAMA_HOST environment variable must be set.")
|
23
|
-
|
24
|
-
return Client(host=host, timeout=timeout)
|
@@ -1,81 +0,0 @@
|
|
1
|
-
from typing import Any
|
2
|
-
|
3
|
-
from ollama import Client
|
4
|
-
|
5
|
-
from not_again_ai.base.file_system import readable_size
|
6
|
-
|
7
|
-
|
8
|
-
def list_models(client: Client) -> list[dict[str, Any]]:
|
9
|
-
"""List models that are available locally.
|
10
|
-
|
11
|
-
Args:
|
12
|
-
client (Client): The Ollama client.
|
13
|
-
|
14
|
-
Returns:
|
15
|
-
list[dict[str, Any]]: A list of dictionaries (each corresponding to an available model) with the following keys:
|
16
|
-
name (str): Name of the model
|
17
|
-
model (str): Name of the model. This should be the same as the name.
|
18
|
-
modified_at (str): The date and time the model was last modified.
|
19
|
-
size (int): The size of the model in bytes.
|
20
|
-
size_readable (str): The size of the model in a human-readable format.
|
21
|
-
details (dict[str, Any]): Additional details about the model.
|
22
|
-
"""
|
23
|
-
response = client.list().get("models", [])
|
24
|
-
|
25
|
-
response_data = []
|
26
|
-
for model_data in response:
|
27
|
-
curr_model_data = {}
|
28
|
-
curr_model_data["name"] = model_data["name"]
|
29
|
-
curr_model_data["model"] = model_data["model"]
|
30
|
-
curr_model_data["modified_at"] = model_data["modified_at"]
|
31
|
-
curr_model_data["size"] = model_data["size"]
|
32
|
-
curr_model_data["size_readable"] = readable_size(model_data["size"])
|
33
|
-
curr_model_data["details"] = model_data["details"]
|
34
|
-
|
35
|
-
response_data.append(curr_model_data)
|
36
|
-
|
37
|
-
return response_data
|
38
|
-
|
39
|
-
|
40
|
-
def is_model_available(model_name: str, client: Client) -> bool:
|
41
|
-
"""Check if a model is available locally.
|
42
|
-
|
43
|
-
Args:
|
44
|
-
model_name (str): The name of the model.
|
45
|
-
client (Client): The Ollama client.
|
46
|
-
|
47
|
-
Returns:
|
48
|
-
bool: True if the model is available locally, False otherwise.
|
49
|
-
"""
|
50
|
-
# If model_name does not have a ":", append ":latest"
|
51
|
-
if ":" not in model_name:
|
52
|
-
model_name = f"{model_name}:latest"
|
53
|
-
models = list_models(client)
|
54
|
-
return any(model["name"] == model_name for model in models)
|
55
|
-
|
56
|
-
|
57
|
-
def show(model_name: str, client: Client) -> dict[str, Any]:
|
58
|
-
"""Show information about a model including the modelfile, available parameters, template, and additional details.
|
59
|
-
|
60
|
-
Args:
|
61
|
-
model_name (str): The name of the model.
|
62
|
-
client (Client): The Ollama client.
|
63
|
-
"""
|
64
|
-
response = client.show(model_name)
|
65
|
-
|
66
|
-
response_data = {}
|
67
|
-
response_data["modelfile"] = response["modelfile"]
|
68
|
-
response_data["parameters"] = response["parameters"]
|
69
|
-
response_data["template"] = response["template"]
|
70
|
-
response_data["details"] = response["details"]
|
71
|
-
return response_data
|
72
|
-
|
73
|
-
|
74
|
-
def pull(model_name: str, client: Client) -> Any:
|
75
|
-
"""Pull a model from the Ollama server and returns the status of the pull operation."""
|
76
|
-
return client.pull(model_name)
|
77
|
-
|
78
|
-
|
79
|
-
def delete(model_name: str, client: Client) -> Any:
|
80
|
-
"""Delete a model from the local filesystem and returns the status of the delete operation."""
|
81
|
-
return client.delete(model_name)
|
@@ -1,104 +0,0 @@
|
|
1
|
-
"""By default use the associated huggingface transformer tokenizer.
|
2
|
-
If it does not exist in the mapping, default to tiktoken with some buffer (const + percentage)"""
|
3
|
-
|
4
|
-
from loguru import logger
|
5
|
-
import tiktoken
|
6
|
-
from transformers import AutoTokenizer
|
7
|
-
|
8
|
-
from not_again_ai.llm.openai_api.tokens import num_tokens_from_messages as openai_num_tokens_from_messages
|
9
|
-
from not_again_ai.local_llm.ollama.model_mapping import OLLAMA_MODEL_MAPPING
|
10
|
-
|
11
|
-
TIKTOKEN_NUM_TOKENS_BUFFER = 10
|
12
|
-
TIKTOKEN_PERCENT_TOKENS_BUFFER = 1.1
|
13
|
-
|
14
|
-
|
15
|
-
def load_tokenizer(model: str) -> AutoTokenizer | tiktoken.Encoding:
|
16
|
-
"""Use the model mapping to load the appropriate tokenizer
|
17
|
-
|
18
|
-
Args:
|
19
|
-
model: The name of the language model to load the tokenizer for
|
20
|
-
|
21
|
-
Returns:
|
22
|
-
Either a HuggingFace tokenizer or a tiktoken encoding object
|
23
|
-
"""
|
24
|
-
|
25
|
-
# Loop over the keys in the model mapping checking if the model starts with the key
|
26
|
-
for key in OLLAMA_MODEL_MAPPING:
|
27
|
-
if model.startswith(key):
|
28
|
-
return AutoTokenizer.from_pretrained(OLLAMA_MODEL_MAPPING[key], use_fast=True)
|
29
|
-
|
30
|
-
# If the model does not start with any key in the model mapping, default to tiktoken
|
31
|
-
logger.warning(
|
32
|
-
f'Model "{model}" not found in OLLAMA_MODEL_MAPPING. Using tiktoken - token counts will have an added buffer of \
|
33
|
-
{TIKTOKEN_PERCENT_TOKENS_BUFFER * 100}% plus {TIKTOKEN_NUM_TOKENS_BUFFER} tokens.'
|
34
|
-
)
|
35
|
-
tokenizer = tiktoken.get_encoding("o200k_base")
|
36
|
-
return tokenizer
|
37
|
-
|
38
|
-
|
39
|
-
def truncate_str(text: str, max_len: int, tokenizer: AutoTokenizer | tiktoken.Encoding) -> str:
|
40
|
-
"""Truncates a string to a maximum token length.
|
41
|
-
|
42
|
-
Args:
|
43
|
-
text: The string to truncate.
|
44
|
-
max_len: The maximum number of tokens to keep.
|
45
|
-
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
46
|
-
|
47
|
-
Returns:
|
48
|
-
str: The truncated string.
|
49
|
-
"""
|
50
|
-
if isinstance(tokenizer, tiktoken.Encoding):
|
51
|
-
tokens = tokenizer.encode(text)
|
52
|
-
if len(tokens) > max_len:
|
53
|
-
tokens = tokens[:max_len]
|
54
|
-
truncated_text = tokenizer.decode(tokens)
|
55
|
-
return truncated_text
|
56
|
-
else:
|
57
|
-
tokens = tokenizer(text, return_tensors=None)["input_ids"]
|
58
|
-
if len(tokens) > max_len:
|
59
|
-
tokens = tokens[:max_len]
|
60
|
-
truncated_text = tokenizer.decode(tokens)
|
61
|
-
return truncated_text
|
62
|
-
|
63
|
-
return text
|
64
|
-
|
65
|
-
|
66
|
-
def num_tokens_in_string(text: str, tokenizer: AutoTokenizer | tiktoken.Encoding) -> int:
|
67
|
-
"""Return the number of tokens in a string.
|
68
|
-
|
69
|
-
Args:
|
70
|
-
text: The string to count the tokens.
|
71
|
-
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
72
|
-
|
73
|
-
Returns:
|
74
|
-
int: The number of tokens in the string.
|
75
|
-
"""
|
76
|
-
if isinstance(tokenizer, tiktoken.Encoding):
|
77
|
-
num_tokens = (len(tokenizer.encode(text)) * TIKTOKEN_PERCENT_TOKENS_BUFFER) + TIKTOKEN_NUM_TOKENS_BUFFER
|
78
|
-
return int(num_tokens)
|
79
|
-
else:
|
80
|
-
tokens = tokenizer(text, return_tensors=None)["input_ids"]
|
81
|
-
return len(tokens)
|
82
|
-
|
83
|
-
|
84
|
-
def num_tokens_from_messages(messages: list[dict[str, str]], tokenizer: AutoTokenizer | tiktoken.Encoding) -> int:
|
85
|
-
"""Return the number of tokens used by a list of messages.
|
86
|
-
For models with HuggingFace tokenizers, uses
|
87
|
-
|
88
|
-
Args:
|
89
|
-
messages: A list of messages to count the tokens
|
90
|
-
should ideally be the result after calling llm.prompts.chat_prompt.
|
91
|
-
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
92
|
-
|
93
|
-
Returns:
|
94
|
-
int: The number of tokens used by the messages.
|
95
|
-
"""
|
96
|
-
if isinstance(tokenizer, tiktoken.Encoding):
|
97
|
-
num_tokens = (
|
98
|
-
openai_num_tokens_from_messages(messages, tokenizer=tokenizer, model="gpt-4o")
|
99
|
-
* TIKTOKEN_PERCENT_TOKENS_BUFFER
|
100
|
-
) + TIKTOKEN_NUM_TOKENS_BUFFER
|
101
|
-
return int(num_tokens)
|
102
|
-
else:
|
103
|
-
tokens = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors=None)
|
104
|
-
return len(tokens)
|
@@ -1,38 +0,0 @@
|
|
1
|
-
from copy import deepcopy
|
2
|
-
|
3
|
-
from liquid import Template
|
4
|
-
|
5
|
-
|
6
|
-
def chat_prompt(messages_unformatted: list[dict[str, str]], variables: dict[str, str]) -> list[dict[str, str]]:
|
7
|
-
"""Formats a list of messages for chat completion models using Liquid templating.
|
8
|
-
|
9
|
-
Args:
|
10
|
-
messages_unformatted: A list of dictionaries where each dictionary
|
11
|
-
represents a message. Each message must have 'role' and 'content'
|
12
|
-
keys with string values, where content is a Liquid template.
|
13
|
-
variables: A dictionary where each key-value pair represents a variable
|
14
|
-
name and its value for template rendering.
|
15
|
-
|
16
|
-
Returns:
|
17
|
-
A list of dictionaries with the same structure as `messages_unformatted`,
|
18
|
-
but with the 'content' of each message with the provided `variables`.
|
19
|
-
|
20
|
-
Examples:
|
21
|
-
>>> messages = [
|
22
|
-
... {"role": "system", "content": "You are a helpful assistant."},
|
23
|
-
... {"role": "user", "content": "Help me {{task}}"}
|
24
|
-
... ]
|
25
|
-
>>> vars = {"task": "write Python code for the fibonnaci sequence"}
|
26
|
-
>>> chat_prompt(messages, vars)
|
27
|
-
[
|
28
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
29
|
-
{"role": "user", "content": "Help me write Python code for the fibonnaci sequence"}
|
30
|
-
]
|
31
|
-
"""
|
32
|
-
|
33
|
-
messages_formatted = deepcopy(messages_unformatted)
|
34
|
-
for message in messages_formatted:
|
35
|
-
liquid_template = Template(message["content"])
|
36
|
-
message["content"] = liquid_template.render(**variables)
|
37
|
-
|
38
|
-
return messages_formatted
|
not_again_ai/local_llm/tokens.py
DELETED
@@ -1,90 +0,0 @@
|
|
1
|
-
import tiktoken
|
2
|
-
from transformers import AutoTokenizer
|
3
|
-
|
4
|
-
from not_again_ai.llm.openai_api.tokens import load_tokenizer as openai_load_tokenizer
|
5
|
-
from not_again_ai.llm.openai_api.tokens import num_tokens_from_messages as openai_num_tokens_from_messages
|
6
|
-
from not_again_ai.llm.openai_api.tokens import num_tokens_in_string as openai_num_tokens_in_string
|
7
|
-
from not_again_ai.llm.openai_api.tokens import truncate_str as openai_truncate_str
|
8
|
-
from not_again_ai.local_llm.ollama.tokens import load_tokenizer as ollama_load_tokenizer
|
9
|
-
from not_again_ai.local_llm.ollama.tokens import num_tokens_from_messages as ollama_num_tokens_from_messages
|
10
|
-
from not_again_ai.local_llm.ollama.tokens import num_tokens_in_string as ollama_num_tokens_in_string
|
11
|
-
from not_again_ai.local_llm.ollama.tokens import truncate_str as ollama_truncate_str
|
12
|
-
|
13
|
-
|
14
|
-
def load_tokenizer(model: str, provider: str) -> AutoTokenizer | tiktoken.Encoding:
|
15
|
-
"""Load the tokenizer for the given model and providers
|
16
|
-
|
17
|
-
Args:
|
18
|
-
model (str): The name of the language model to load the tokenizer for
|
19
|
-
provider (str): Either "openai_api" or "ollama"
|
20
|
-
|
21
|
-
Returns:
|
22
|
-
Either a HuggingFace tokenizer or a tiktoken encoding object
|
23
|
-
"""
|
24
|
-
if provider == "openai_api":
|
25
|
-
return openai_load_tokenizer(model)
|
26
|
-
elif provider == "ollama":
|
27
|
-
return ollama_load_tokenizer(model)
|
28
|
-
else:
|
29
|
-
raise ValueError(f"Unknown tokenizer provider {provider}")
|
30
|
-
|
31
|
-
|
32
|
-
def truncate_str(text: str, max_len: int, tokenizer: AutoTokenizer | tiktoken.Encoding, provider: str) -> str:
|
33
|
-
"""Truncates a string to a maximum token length.
|
34
|
-
|
35
|
-
Args:
|
36
|
-
text: The string to truncate.
|
37
|
-
max_len: The maximum number of tokens to keep.
|
38
|
-
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
39
|
-
provider (str): Either "openai_api" or "ollama"
|
40
|
-
|
41
|
-
Returns:
|
42
|
-
str: The truncated string.
|
43
|
-
"""
|
44
|
-
if provider == "openai_api":
|
45
|
-
return openai_truncate_str(text, max_len, tokenizer)
|
46
|
-
elif provider == "ollama":
|
47
|
-
return ollama_truncate_str(text, max_len, tokenizer)
|
48
|
-
else:
|
49
|
-
raise ValueError(f'Unknown tokenizer provider "{provider}"')
|
50
|
-
|
51
|
-
|
52
|
-
def num_tokens_in_string(text: str, tokenizer: AutoTokenizer | tiktoken.Encoding, provider: str) -> int:
|
53
|
-
"""Return the number of tokens in a string.
|
54
|
-
|
55
|
-
Args:
|
56
|
-
text: The string to count the tokens.
|
57
|
-
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
58
|
-
provider (str): Either "openai_api" or "ollama"
|
59
|
-
|
60
|
-
Returns:
|
61
|
-
int: The number of tokens in the string.
|
62
|
-
"""
|
63
|
-
if provider == "openai_api":
|
64
|
-
return openai_num_tokens_in_string(text, tokenizer)
|
65
|
-
elif provider == "ollama":
|
66
|
-
return ollama_num_tokens_in_string(text, tokenizer)
|
67
|
-
else:
|
68
|
-
raise ValueError(f'Unknown tokenizer provider "{provider}"')
|
69
|
-
|
70
|
-
|
71
|
-
def num_tokens_from_messages(
|
72
|
-
messages: list[dict[str, str]], tokenizer: AutoTokenizer | tiktoken.Encoding, provider: str
|
73
|
-
) -> int:
|
74
|
-
"""Return the number of tokens used by a list of messages.
|
75
|
-
|
76
|
-
Args:
|
77
|
-
messages: A list of messages to count the tokens
|
78
|
-
should ideally be the result after calling llm.prompts.chat_prompt.
|
79
|
-
tokenizer: Either a HuggingFace tokenizer or a tiktoken encoding object
|
80
|
-
provider (str): Either "openai_api" or "ollama"
|
81
|
-
|
82
|
-
Returns:
|
83
|
-
int: The number of tokens used by the messages.
|
84
|
-
"""
|
85
|
-
if provider == "openai_api":
|
86
|
-
return openai_num_tokens_from_messages(messages, tokenizer)
|
87
|
-
elif provider == "ollama":
|
88
|
-
return ollama_num_tokens_from_messages(messages, tokenizer)
|
89
|
-
else:
|
90
|
-
raise ValueError(f'Unknown tokenizer provider "{provider}"')
|
@@ -1,44 +0,0 @@
|
|
1
|
-
not_again_ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
not_again_ai/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
not_again_ai/base/file_system.py,sha256=KNQmacO4Q__CQuq2oPzWrg3rQO48n3evglc9bNiP7KM,949
|
4
|
-
not_again_ai/base/parallel.py,sha256=fcYhKBYBWvob84iKp3O93wvFFdXeidljZsShgBLTNGA,3448
|
5
|
-
not_again_ai/data/__init__.py,sha256=1jF6mwvtB2PT7IEc3xpbRtZm3g3Lyf8zUqH4AEE4qlQ,244
|
6
|
-
not_again_ai/data/web.py,sha256=wjx9cc33jcoJBGonYCIpwygPBFOwz7F-dx_ominmbnI,1838
|
7
|
-
not_again_ai/llm/__init__.py,sha256=_wNUL6FDaT369Z8W48FsaC_NkcOZ-ib2MMUvnaLOS-0,451
|
8
|
-
not_again_ai/llm/gh_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
not_again_ai/llm/gh_models/azure_ai_client.py,sha256=GkVn9ZwYbsLm3X0A3pGKKHuoqrxc-BZnZ4n9ExelRUQ,580
|
10
|
-
not_again_ai/llm/gh_models/chat_completion.py,sha256=zI6Kfqb9AW0t_Yd1ecaXy7q70gygJ_XKcFbtYrKIbn4,3599
|
11
|
-
not_again_ai/llm/openai_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
not_again_ai/llm/openai_api/chat_completion.py,sha256=5jO-J97zcKPJrzHn6V4NZB3nZa1RZwbIeMC3gbXlWWQ,17100
|
13
|
-
not_again_ai/llm/openai_api/context_management.py,sha256=BJSG100_qw9MeTCZGztDV5CBXjVOxU4x7gyoRlLxWnI,3561
|
14
|
-
not_again_ai/llm/openai_api/embeddings.py,sha256=4OBnxZicrY6q4dQhuPqMdAnifyjwrsKMTDj-kVre0yc,2500
|
15
|
-
not_again_ai/llm/openai_api/openai_client.py,sha256=AK9SDBkpP94u5Q73-Q5i5HRPQh_D8cF8Dfl0IgPsJDQ,3816
|
16
|
-
not_again_ai/llm/openai_api/prompts.py,sha256=lZYxgzoM2VqXWKUDToKWKR6w49KNYKu5TnqKLxG3TsM,8034
|
17
|
-
not_again_ai/llm/openai_api/tokens.py,sha256=Q4xdCEPrmgDCNjmcB4rg6ipvo4_McwSjc-b9gAHjUJs,8024
|
18
|
-
not_again_ai/local_llm/__init__.py,sha256=BsUn39U3QQaw6yomQHfp_HIPHRIBoMAgjcP3CDADx04,882
|
19
|
-
not_again_ai/local_llm/chat_completion.py,sha256=PmICXrGZJXIuqY00ULBGi2bKnPG8ticqTXZHSTzZK9o,4828
|
20
|
-
not_again_ai/local_llm/huggingface/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
not_again_ai/local_llm/huggingface/chat_completion.py,sha256=Y6uMbxLG8TaMVi3hJGrMl_G9Y1N_0dld5Kv1iqYnoao,2300
|
22
|
-
not_again_ai/local_llm/huggingface/helpers.py,sha256=YPr8KbQ8Ac_Mn_nBcrFuL3bCl-IuDCdaRvYVCocy8Gk,734
|
23
|
-
not_again_ai/local_llm/ollama/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
|
-
not_again_ai/local_llm/ollama/chat_completion.py,sha256=WNnR-fe50wSDsoAdSZSKyjoqLCCPb00jIWSIFWo_Bbg,4890
|
25
|
-
not_again_ai/local_llm/ollama/model_mapping.py,sha256=sJqPg97OO68O0k5MFkTjRLIn9gB7gCRAcyUEjxMfizo,891
|
26
|
-
not_again_ai/local_llm/ollama/ollama_client.py,sha256=dktyw7aKFq4EA3dU7Le5UpfsSq3Oh_POmYSrAI4qLi8,765
|
27
|
-
not_again_ai/local_llm/ollama/service.py,sha256=XczbxISTAp4KHnIkqRZaMsfBohH-TAHrjZ8T9x3cRAY,2900
|
28
|
-
not_again_ai/local_llm/ollama/tokens.py,sha256=k7K7enOSuCJRHteDG0x-CbuivQ2uAtKK3e0Jr7-BUd4,3952
|
29
|
-
not_again_ai/local_llm/prompts.py,sha256=mUpt8LMNnY9TVj-2tPE3lc81ZwWnBBrmBou0jxEPGL0,1560
|
30
|
-
not_again_ai/local_llm/tokens.py,sha256=Tf1uH5zjyiejOPVr6eVYFj1m-jQpeDUcG7gZK-xMOAE,3679
|
31
|
-
not_again_ai/py.typed,sha256=UaCuPFa3H8UAakbt-5G8SPacldTOGvJv18pPjUJ5gDY,93
|
32
|
-
not_again_ai/statistics/__init__.py,sha256=gA8r9JQFbFSN0ykrHy4G1IQgcky4f2eM5Oo24oVI5Ik,466
|
33
|
-
not_again_ai/statistics/dependence.py,sha256=4xaniMkLlTjdXcNVXdwepEAiZ-WaaGYfR9haJC1lU2Q,4434
|
34
|
-
not_again_ai/viz/__init__.py,sha256=MeaWae_QRbDEHJ4MWYoY1-Ad6S0FhSDaRhQncS2cpSc,447
|
35
|
-
not_again_ai/viz/barplots.py,sha256=rr_2phZgDaqcF5Ve7mBZrVvNXVzEt84RQPIyyeJxsMo,3384
|
36
|
-
not_again_ai/viz/distributions.py,sha256=OyWwJaNI6lMRm_iSrhq-CORLNvXfeuLSgDtVo3umnzU,4354
|
37
|
-
not_again_ai/viz/scatterplot.py,sha256=5CUOWeknbBOaZPeX9oPin5sBkRKEwk8qeFH45R-9LlY,2292
|
38
|
-
not_again_ai/viz/time_series.py,sha256=pOGZqXp_2nd6nKo-PUQNCtmMh__69jxQ6bQibTGLwZA,5212
|
39
|
-
not_again_ai/viz/utils.py,sha256=hN7gwxtBt3U6jQni2K8j5m5pCXpaJDoNzGhBBikEU28,238
|
40
|
-
not_again_ai-0.14.0.dist-info/LICENSE,sha256=btjOgNGpp-ux5xOo1Gx1MddxeWtT9sof3s3Nui29QfA,1071
|
41
|
-
not_again_ai-0.14.0.dist-info/METADATA,sha256=kNL0KybcNVoN7fcCMNO1CohIWZAxc74gnV68zzoEDfI,16475
|
42
|
-
not_again_ai-0.14.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
43
|
-
not_again_ai-0.14.0.dist-info/entry_points.txt,sha256=EMJegugnmJUd-jMUA_qIRMIPAasbei8gP6O4-ER0BxQ,61
|
44
|
-
not_again_ai-0.14.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|