not-again-ai 0.10.3__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- not_again_ai/llm/openai_api/chat_completion.py +12 -12
- not_again_ai/local_llm/chat_completion.py +11 -0
- not_again_ai/local_llm/ollama/chat_completion.py +19 -4
- not_again_ai/local_llm/ollama/model_mapping.py +1 -0
- {not_again_ai-0.10.3.dist-info → not_again_ai-0.11.0.dist-info}/METADATA +5 -5
- {not_again_ai-0.10.3.dist-info → not_again_ai-0.11.0.dist-info}/RECORD +9 -9
- {not_again_ai-0.10.3.dist-info → not_again_ai-0.11.0.dist-info}/LICENSE +0 -0
- {not_again_ai-0.10.3.dist-info → not_again_ai-0.11.0.dist-info}/WHEEL +0 -0
- {not_again_ai-0.10.3.dist-info → not_again_ai-0.11.0.dist-info}/entry_points.txt +0 -0
@@ -32,8 +32,8 @@ def chat_completion(
|
|
32
32
|
https://platform.openai.com/docs/models/model-endpoint-compatibility
|
33
33
|
for details on which models work with the Chat API.
|
34
34
|
client (OpenAI): An instance of the OpenAI client.
|
35
|
-
tools (list[dict[str, Any]], optional):
|
36
|
-
Defaults to None.
|
35
|
+
tools (list[dict[str, Any]], optional):A list of tools the model may call.
|
36
|
+
Use this to provide a list of functions the model may generate JSON inputs for. Defaults to None.
|
37
37
|
tool_choice (str, optional): The tool choice to use. Can be "auto", "required", "none", or a specific function name.
|
38
38
|
Note the function name cannot be any of "auto", "required", or "none". Defaults to "auto".
|
39
39
|
max_tokens (int, optional): The maximum number of tokens to generate in the chat completion.
|
@@ -58,21 +58,21 @@ def chat_completion(
|
|
58
58
|
|
59
59
|
Returns:
|
60
60
|
dict[str, Any]: A dictionary with the following keys:
|
61
|
-
|
61
|
+
finish_reason (str): The reason the model stopped generating further tokens.
|
62
62
|
Can be 'stop', 'length', or 'tool_calls'.
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
tool_names (list[str], optional): The names of the tools called by the model.
|
64
|
+
tool_args_list (list[dict], optional): The arguments of the tools called by the model.
|
65
|
+
message (str | dict): The content of the generated assistant message.
|
66
66
|
If json_mode is True, this will be a dictionary.
|
67
|
-
|
67
|
+
logprobs (list[dict[str, Any] | list[dict[str, Any]]]): If logprobs[1] is between 1 and 5, each element in the list
|
68
68
|
will be a list of dictionaries containing the token, logprob, and bytes for the top `logprobs[1]` logprobs. Otherwise,
|
69
69
|
this will be a list of dictionaries containing the token, logprob, and bytes for each token in the message.
|
70
|
-
|
71
|
-
|
70
|
+
choices (list[dict], optional): A list of chat completion choices if n > 1 where each dict contains the above fields.
|
71
|
+
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
72
72
|
NOTE: If n > 1 this is the sum of all completions.
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
74
|
+
system_fingerprint (str, optional): If seed is set, a unique identifier for the model used to generate the response.
|
75
|
+
response_duration (float): The time, in seconds, taken to generate the response from the API.
|
76
76
|
"""
|
77
77
|
response_format = {"type": "json_object"} if json_mode else None
|
78
78
|
|
@@ -11,6 +11,7 @@ def chat_completion(
|
|
11
11
|
messages: list[dict[str, Any]],
|
12
12
|
model: str,
|
13
13
|
client: OpenAI | Client,
|
14
|
+
tools: list[dict[str, Any]] | None = None,
|
14
15
|
max_tokens: int | None = None,
|
15
16
|
temperature: float = 0.7,
|
16
17
|
json_mode: bool = False,
|
@@ -25,6 +26,8 @@ def chat_completion(
|
|
25
26
|
messages (list[dict[str, Any]]): A list of messages to send to the model.
|
26
27
|
model (str): The model name to use.
|
27
28
|
client (OpenAI | Client): The client object to use for chat completion.
|
29
|
+
tools (list[dict[str, Any]], optional):A list of tools the model may call.
|
30
|
+
Use this to provide a list of functions the model may generate JSON inputs for. Defaults to None.
|
28
31
|
max_tokens (int, optional): The maximum number of tokens to generate.
|
29
32
|
temperature (float, optional): The temperature of the model. Increasing the temperature will make the model answer more creatively.
|
30
33
|
json_mode (bool, optional): This will structure the response as a valid JSON object.
|
@@ -34,6 +37,9 @@ def chat_completion(
|
|
34
37
|
dict[str, Any]: A dictionary with the following keys
|
35
38
|
message (str | dict): The content of the generated assistant message.
|
36
39
|
If json_mode is True, this will be a dictionary.
|
40
|
+
tool_names (list[str], optional): The names of the tools called by the model.
|
41
|
+
If the model does not support tools, a ResponseError is raised.
|
42
|
+
tool_args_list (list[dict], optional): The arguments of the tools called by the model.
|
37
43
|
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
38
44
|
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
39
45
|
response_duration (float): The time, in seconds, taken to generate the response by using the model.
|
@@ -45,6 +51,7 @@ def chat_completion(
|
|
45
51
|
messages=messages,
|
46
52
|
model=model,
|
47
53
|
client=client,
|
54
|
+
tools=tools,
|
48
55
|
max_tokens=max_tokens,
|
49
56
|
temperature=temperature,
|
50
57
|
json_mode=json_mode,
|
@@ -56,6 +63,7 @@ def chat_completion(
|
|
56
63
|
messages=messages,
|
57
64
|
model=model,
|
58
65
|
client=client,
|
66
|
+
tools=tools,
|
59
67
|
max_tokens=max_tokens,
|
60
68
|
temperature=temperature,
|
61
69
|
json_mode=json_mode,
|
@@ -68,6 +76,9 @@ def chat_completion(
|
|
68
76
|
# Parse the responses to be consistent
|
69
77
|
response_data = {}
|
70
78
|
response_data["message"] = response.get("message")
|
79
|
+
if response.get("tool_names") and response.get("tool_args_list"):
|
80
|
+
response_data["tool_names"] = response.get("tool_names")
|
81
|
+
response_data["tool_args_list"] = response.get("tool_args_list")
|
71
82
|
response_data["completion_tokens"] = response.get("completion_tokens")
|
72
83
|
response_data["prompt_tokens"] = response.get("prompt_tokens")
|
73
84
|
response_data["response_duration"] = response.get("response_duration")
|
@@ -13,6 +13,7 @@ def chat_completion(
|
|
13
13
|
messages: list[dict[str, Any]],
|
14
14
|
model: str,
|
15
15
|
client: Client,
|
16
|
+
tools: list[dict[str, Any]] | None = None,
|
16
17
|
max_tokens: int | None = None,
|
17
18
|
context_window: int | None = None,
|
18
19
|
temperature: float = 0.8,
|
@@ -27,6 +28,8 @@ def chat_completion(
|
|
27
28
|
messages (list[dict[str, Any]]): A list of messages to send to the model.
|
28
29
|
model (str): The model to use.
|
29
30
|
client (Client): The Ollama client.
|
31
|
+
tools (list[dict[str, Any]], optional):A list of tools the model may call.
|
32
|
+
Use this to provide a list of functions the model may generate JSON inputs for. Defaults to None.
|
30
33
|
max_tokens (int, optional): The maximum number of tokens to generate. Ollama calls this `num_predict`.
|
31
34
|
context_window (int, optional): The number of tokens to consider as context. Ollama calls this `num_ctx`.
|
32
35
|
temperature (float, optional): The temperature of the model. Increasing the temperature will make the model answer more creatively.
|
@@ -38,6 +41,9 @@ def chat_completion(
|
|
38
41
|
dict[str, Any]: A dictionary with the following keys
|
39
42
|
message (str | dict): The content of the generated assistant message.
|
40
43
|
If json_mode is True, this will be a dictionary.
|
44
|
+
tool_names (list[str], optional): The names of the tools called by the model.
|
45
|
+
If the model does not support tools, a ResponseError is raised.
|
46
|
+
tool_args_list (list[dict], optional): The arguments of the tools called by the model.
|
41
47
|
prompt_tokens (int): The number of tokens in the messages sent to the model.
|
42
48
|
completion_tokens (int): The number of tokens used by the model to generate the completion.
|
43
49
|
response_duration (float): The time, in seconds, taken to generate the response by using the model.
|
@@ -59,6 +65,8 @@ def chat_completion(
|
|
59
65
|
}
|
60
66
|
if json_mode:
|
61
67
|
all_args["format"] = "json"
|
68
|
+
if tools:
|
69
|
+
all_args["tools"] = tools
|
62
70
|
|
63
71
|
try:
|
64
72
|
start_time = time.time()
|
@@ -78,19 +86,26 @@ def chat_completion(
|
|
78
86
|
response_data: dict[str, Any] = {}
|
79
87
|
|
80
88
|
# Handle getting the message returned by the model
|
81
|
-
message = response["message"].get("content",
|
89
|
+
message = response["message"].get("content", "")
|
82
90
|
if message and json_mode:
|
83
91
|
with contextlib.suppress(json.JSONDecodeError):
|
84
92
|
message = json.loads(message)
|
85
|
-
|
86
|
-
|
93
|
+
response_data["message"] = message
|
94
|
+
|
95
|
+
# Try getting tool calls
|
96
|
+
if response["message"].get("tool_calls"):
|
97
|
+
tool_calls = response["message"]["tool_calls"]
|
98
|
+
tool_names = [tool_call["function"]["name"] for tool_call in tool_calls]
|
99
|
+
tool_args_list = [tool_call["function"]["arguments"] for tool_call in tool_calls]
|
100
|
+
response_data["tool_names"] = tool_names
|
101
|
+
response_data["tool_args_list"] = tool_args_list
|
87
102
|
|
88
103
|
tokenizer = load_tokenizer(model)
|
89
104
|
prompt_tokens = num_tokens_from_messages(messages, tokenizer)
|
90
105
|
response_data["prompt_tokens"] = prompt_tokens
|
91
106
|
|
92
107
|
# Get the number of tokens generated
|
93
|
-
response_data["completion_tokens"] = response.get("eval_count", None)
|
108
|
+
response_data["completion_tokens"] = response.get("eval_count", None)
|
94
109
|
if response_data["completion_tokens"] is None:
|
95
110
|
response_data["completion_tokens"] = num_tokens_in_string(str(response_data["message"]), tokenizer)
|
96
111
|
|
@@ -7,6 +7,7 @@ i.e. all phi3 models will start with "phi3".
|
|
7
7
|
OLLAMA_MODEL_MAPPING = {
|
8
8
|
"phi3": "microsoft/Phi-3-mini-4k-instruct",
|
9
9
|
"llama3:": "nvidia/Llama3-ChatQA-1.5-8B", # Using this version to get around needed to accept an agreement to get access to the tokenizer
|
10
|
+
"llama3.1": "unsloth/Meta-Llama-3.1-8B-Instruct",
|
10
11
|
"gemma": "google/gemma-1.1-7b-it", # Requires HF_TOKEN set and accepting the agreement on the HF model page
|
11
12
|
"qwen2": "Qwen/Qwen2-7B-Instruct",
|
12
13
|
"granite-code": "ibm-granite/granite-34b-code-instruct",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: not-again-ai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.11.0
|
4
4
|
Summary: Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place.
|
5
5
|
Home-page: https://github.com/DaveCoDev/not-again-ai
|
6
6
|
License: MIT
|
@@ -24,15 +24,15 @@ Provides-Extra: viz
|
|
24
24
|
Requires-Dist: jinja2 (>=3.1,<4.0) ; extra == "local-llm"
|
25
25
|
Requires-Dist: loguru (==0.7.2)
|
26
26
|
Requires-Dist: numpy (>=1.26,<2.0) ; extra == "statistics" or extra == "viz"
|
27
|
-
Requires-Dist: ollama (>=0.
|
28
|
-
Requires-Dist: openai (>=1.
|
27
|
+
Requires-Dist: ollama (>=0.3,<0.4) ; extra == "local-llm"
|
28
|
+
Requires-Dist: openai (>=1.37,<2.0) ; extra == "llm"
|
29
29
|
Requires-Dist: pandas (>=2.2,<3.0) ; extra == "viz"
|
30
30
|
Requires-Dist: python-liquid (>=1.12,<2.0) ; extra == "llm"
|
31
31
|
Requires-Dist: scikit-learn (>=1.5,<2.0) ; extra == "statistics"
|
32
32
|
Requires-Dist: scipy (>=1.14,<2.0) ; extra == "statistics"
|
33
33
|
Requires-Dist: seaborn (>=0.13,<0.14) ; extra == "viz"
|
34
34
|
Requires-Dist: tiktoken (>=0.7,<0.8) ; extra == "llm"
|
35
|
-
Requires-Dist: transformers (>=4.
|
35
|
+
Requires-Dist: transformers (>=4.43,<5.0) ; extra == "local-llm"
|
36
36
|
Project-URL: Documentation, https://github.com/DaveCoDev/not-again-ai
|
37
37
|
Project-URL: Repository, https://github.com/DaveCoDev/not-again-ai
|
38
38
|
Description-Content-Type: text/markdown
|
@@ -295,7 +295,7 @@ installed package to indicate that inline type annotations should be checked.
|
|
295
295
|
|
296
296
|
## Typos
|
297
297
|
|
298
|
-
|
298
|
+
Check for typos using [typos](https://github.com/crate-ci/typos)
|
299
299
|
|
300
300
|
```bash
|
301
301
|
(.venv) $ nox -s typos
|
@@ -4,20 +4,20 @@ not_again_ai/base/file_system.py,sha256=KNQmacO4Q__CQuq2oPzWrg3rQO48n3evglc9bNiP
|
|
4
4
|
not_again_ai/base/parallel.py,sha256=fcYhKBYBWvob84iKp3O93wvFFdXeidljZsShgBLTNGA,3448
|
5
5
|
not_again_ai/llm/__init__.py,sha256=_wNUL6FDaT369Z8W48FsaC_NkcOZ-ib2MMUvnaLOS-0,451
|
6
6
|
not_again_ai/llm/openai_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
not_again_ai/llm/openai_api/chat_completion.py,sha256=
|
7
|
+
not_again_ai/llm/openai_api/chat_completion.py,sha256=PRFi5Sl1K5GOgfWDYygHlmS-Ks1ZE6ETBzinZsz5GCc,8954
|
8
8
|
not_again_ai/llm/openai_api/context_management.py,sha256=BJSG100_qw9MeTCZGztDV5CBXjVOxU4x7gyoRlLxWnI,3561
|
9
9
|
not_again_ai/llm/openai_api/embeddings.py,sha256=4OBnxZicrY6q4dQhuPqMdAnifyjwrsKMTDj-kVre0yc,2500
|
10
10
|
not_again_ai/llm/openai_api/openai_client.py,sha256=6pZw2xw9X-ceV22rhApwFJ2tAKCxi-SxkjxBsTBZ2Nw,2470
|
11
11
|
not_again_ai/llm/openai_api/prompts.py,sha256=7cDfvIKCTYM0t5lK34FLLqYf-SR_cynDXIXw3zWDizA,7094
|
12
12
|
not_again_ai/llm/openai_api/tokens.py,sha256=31neIrY66ejJQ10VB3EWnkN00wuw9vMpCS8tsw2WtFg,4392
|
13
13
|
not_again_ai/local_llm/__init__.py,sha256=BsUn39U3QQaw6yomQHfp_HIPHRIBoMAgjcP3CDADx04,882
|
14
|
-
not_again_ai/local_llm/chat_completion.py,sha256=
|
14
|
+
not_again_ai/local_llm/chat_completion.py,sha256=buZQGV2sChaSi5cgiAcOd9gi4lAEzFIGGIqV-1qazAc,4174
|
15
15
|
not_again_ai/local_llm/huggingface/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
not_again_ai/local_llm/huggingface/chat_completion.py,sha256=Y6uMbxLG8TaMVi3hJGrMl_G9Y1N_0dld5Kv1iqYnoao,2300
|
17
17
|
not_again_ai/local_llm/huggingface/helpers.py,sha256=YPr8KbQ8Ac_Mn_nBcrFuL3bCl-IuDCdaRvYVCocy8Gk,734
|
18
18
|
not_again_ai/local_llm/ollama/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
not_again_ai/local_llm/ollama/chat_completion.py,sha256=
|
20
|
-
not_again_ai/local_llm/ollama/model_mapping.py,sha256=
|
19
|
+
not_again_ai/local_llm/ollama/chat_completion.py,sha256=C8uU-yq7FL9OLdflZVjbNkEOofmD2A3Hcsd8k-59iS4,5053
|
20
|
+
not_again_ai/local_llm/ollama/model_mapping.py,sha256=sJqPg97OO68O0k5MFkTjRLIn9gB7gCRAcyUEjxMfizo,891
|
21
21
|
not_again_ai/local_llm/ollama/ollama_client.py,sha256=dktyw7aKFq4EA3dU7Le5UpfsSq3Oh_POmYSrAI4qLi8,765
|
22
22
|
not_again_ai/local_llm/ollama/service.py,sha256=XczbxISTAp4KHnIkqRZaMsfBohH-TAHrjZ8T9x3cRAY,2900
|
23
23
|
not_again_ai/local_llm/ollama/tokens.py,sha256=k7K7enOSuCJRHteDG0x-CbuivQ2uAtKK3e0Jr7-BUd4,3952
|
@@ -32,8 +32,8 @@ not_again_ai/viz/distributions.py,sha256=OyWwJaNI6lMRm_iSrhq-CORLNvXfeuLSgDtVo3u
|
|
32
32
|
not_again_ai/viz/scatterplot.py,sha256=5CUOWeknbBOaZPeX9oPin5sBkRKEwk8qeFH45R-9LlY,2292
|
33
33
|
not_again_ai/viz/time_series.py,sha256=pOGZqXp_2nd6nKo-PUQNCtmMh__69jxQ6bQibTGLwZA,5212
|
34
34
|
not_again_ai/viz/utils.py,sha256=hN7gwxtBt3U6jQni2K8j5m5pCXpaJDoNzGhBBikEU28,238
|
35
|
-
not_again_ai-0.
|
36
|
-
not_again_ai-0.
|
37
|
-
not_again_ai-0.
|
38
|
-
not_again_ai-0.
|
39
|
-
not_again_ai-0.
|
35
|
+
not_again_ai-0.11.0.dist-info/LICENSE,sha256=btjOgNGpp-ux5xOo1Gx1MddxeWtT9sof3s3Nui29QfA,1071
|
36
|
+
not_again_ai-0.11.0.dist-info/METADATA,sha256=S7IbgUSScGSxAeKWmyxk4hpy4qS-eLqpmfDfFEZq4Ok,15517
|
37
|
+
not_again_ai-0.11.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
38
|
+
not_again_ai-0.11.0.dist-info/entry_points.txt,sha256=EMJegugnmJUd-jMUA_qIRMIPAasbei8gP6O4-ER0BxQ,61
|
39
|
+
not_again_ai-0.11.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|