not-again-ai 0.13.0__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- not_again_ai/data/__init__.py +7 -0
- not_again_ai/data/web.py +56 -0
- not_again_ai/llm/chat_completion/__init__.py +4 -0
- not_again_ai/llm/chat_completion/interface.py +32 -0
- not_again_ai/llm/chat_completion/providers/ollama_api.py +227 -0
- not_again_ai/llm/chat_completion/providers/openai_api.py +290 -0
- not_again_ai/llm/chat_completion/types.py +145 -0
- not_again_ai/llm/prompting/__init__.py +3 -0
- not_again_ai/llm/prompting/compile_messages.py +98 -0
- not_again_ai/llm/prompting/interface.py +46 -0
- not_again_ai/llm/prompting/providers/openai_tiktoken.py +122 -0
- not_again_ai/llm/prompting/types.py +43 -0
- {not_again_ai-0.13.0.dist-info → not_again_ai-0.15.0.dist-info}/METADATA +63 -58
- not_again_ai-0.15.0.dist-info/RECORD +32 -0
- {not_again_ai-0.13.0.dist-info → not_again_ai-0.15.0.dist-info}/WHEEL +1 -1
- not_again_ai/llm/gh_models/azure_ai_client.py +0 -20
- not_again_ai/llm/gh_models/chat_completion.py +0 -81
- not_again_ai/llm/openai_api/chat_completion.py +0 -200
- not_again_ai/llm/openai_api/context_management.py +0 -70
- not_again_ai/llm/openai_api/embeddings.py +0 -62
- not_again_ai/llm/openai_api/openai_client.py +0 -78
- not_again_ai/llm/openai_api/prompts.py +0 -191
- not_again_ai/llm/openai_api/tokens.py +0 -184
- not_again_ai/local_llm/__init__.py +0 -27
- not_again_ai/local_llm/chat_completion.py +0 -105
- not_again_ai/local_llm/huggingface/__init__.py +0 -0
- not_again_ai/local_llm/huggingface/chat_completion.py +0 -59
- not_again_ai/local_llm/huggingface/helpers.py +0 -23
- not_again_ai/local_llm/ollama/__init__.py +0 -0
- not_again_ai/local_llm/ollama/chat_completion.py +0 -111
- not_again_ai/local_llm/ollama/model_mapping.py +0 -17
- not_again_ai/local_llm/ollama/ollama_client.py +0 -24
- not_again_ai/local_llm/ollama/service.py +0 -81
- not_again_ai/local_llm/ollama/tokens.py +0 -104
- not_again_ai/local_llm/prompts.py +0 -38
- not_again_ai/local_llm/tokens.py +0 -90
- not_again_ai-0.13.0.dist-info/RECORD +0 -42
- not_again_ai-0.13.0.dist-info/entry_points.txt +0 -3
- /not_again_ai/llm/{gh_models → chat_completion/providers}/__init__.py +0 -0
- /not_again_ai/llm/{openai_api → prompting/providers}/__init__.py +0 -0
- {not_again_ai-0.13.0.dist-info → not_again_ai-0.15.0.dist-info}/LICENSE +0 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import Any, Generic, Literal, TypeVar
|
3
|
+
|
4
|
+
from pydantic import BaseModel, Field
|
5
|
+
|
6
|
+
|
7
|
+
class Role(str, Enum):
|
8
|
+
ASSISTANT = "assistant"
|
9
|
+
DEVELOPER = "developer"
|
10
|
+
SYSTEM = "system"
|
11
|
+
TOOL = "tool"
|
12
|
+
USER = "user"
|
13
|
+
|
14
|
+
|
15
|
+
class ContentPartType(str, Enum):
|
16
|
+
TEXT = "text"
|
17
|
+
IMAGE = "image_url"
|
18
|
+
|
19
|
+
|
20
|
+
class TextContent(BaseModel):
|
21
|
+
type: Literal[ContentPartType.TEXT] = ContentPartType.TEXT
|
22
|
+
text: str
|
23
|
+
|
24
|
+
|
25
|
+
class ImageDetail(str, Enum):
|
26
|
+
AUTO = "auto"
|
27
|
+
LOW = "low"
|
28
|
+
HIGH = "high"
|
29
|
+
|
30
|
+
|
31
|
+
class ImageUrl(BaseModel):
|
32
|
+
url: str
|
33
|
+
detail: ImageDetail = ImageDetail.AUTO
|
34
|
+
|
35
|
+
|
36
|
+
class ImageContent(BaseModel):
|
37
|
+
type: Literal[ContentPartType.IMAGE] = ContentPartType.IMAGE
|
38
|
+
image_url: ImageUrl
|
39
|
+
|
40
|
+
|
41
|
+
ContentT = TypeVar("ContentT", bound=str | list[TextContent | ImageContent])
|
42
|
+
|
43
|
+
|
44
|
+
class BaseMessage(BaseModel, Generic[ContentT]):
|
45
|
+
content: ContentT
|
46
|
+
role: Role
|
47
|
+
name: str | None = None
|
48
|
+
|
49
|
+
|
50
|
+
class Function(BaseModel):
|
51
|
+
name: str
|
52
|
+
arguments: dict[str, Any]
|
53
|
+
|
54
|
+
|
55
|
+
class ToolCall(BaseModel):
|
56
|
+
id: str
|
57
|
+
function: Function
|
58
|
+
type: Literal["function"] = "function"
|
59
|
+
|
60
|
+
|
61
|
+
class DeveloperMessage(BaseMessage[str]):
|
62
|
+
role: Literal[Role.DEVELOPER] = Role.DEVELOPER
|
63
|
+
|
64
|
+
|
65
|
+
class SystemMessage(BaseMessage[str]):
|
66
|
+
role: Literal[Role.SYSTEM] = Role.SYSTEM
|
67
|
+
|
68
|
+
|
69
|
+
class UserMessage(BaseMessage[str | list[TextContent | ImageContent]]):
|
70
|
+
role: Literal[Role.USER] = Role.USER
|
71
|
+
|
72
|
+
|
73
|
+
class AssistantMessage(BaseMessage[str]):
|
74
|
+
role: Literal[Role.ASSISTANT] = Role.ASSISTANT
|
75
|
+
refusal: str | None = None
|
76
|
+
tool_calls: list[ToolCall] | None = None
|
77
|
+
|
78
|
+
|
79
|
+
class ToolMessage(BaseMessage[str]):
|
80
|
+
# A tool message's name field will be interpreted as "tool_call_id"
|
81
|
+
role: Literal[Role.TOOL] = Role.TOOL
|
82
|
+
|
83
|
+
|
84
|
+
MessageT = AssistantMessage | DeveloperMessage | SystemMessage | ToolMessage | UserMessage
|
85
|
+
|
86
|
+
|
87
|
+
class ChatCompletionRequest(BaseModel):
|
88
|
+
messages: list[MessageT]
|
89
|
+
model: str
|
90
|
+
|
91
|
+
max_completion_tokens: int | None = Field(default=None)
|
92
|
+
context_window: int | None = Field(default=None)
|
93
|
+
logprobs: bool | None = Field(default=None)
|
94
|
+
n: int | None = Field(default=None)
|
95
|
+
|
96
|
+
tools: list[dict[str, Any]] | None = Field(default=None)
|
97
|
+
tool_choice: str | None = Field(default=None)
|
98
|
+
parallel_tool_calls: bool | None = Field(default=None)
|
99
|
+
json_mode: bool | None = Field(default=None)
|
100
|
+
structured_outputs: dict[str, Any] | None = Field(default=None)
|
101
|
+
|
102
|
+
temperature: float | None = Field(default=None)
|
103
|
+
reasoning_effort: Literal["low", "medium", "high"] | None = Field(default=None)
|
104
|
+
top_p: float | None = Field(default=None)
|
105
|
+
logit_bias: dict[str, float] | None = Field(default=None)
|
106
|
+
top_logprobs: int | None = Field(default=None)
|
107
|
+
frequency_penalty: float | None = Field(default=None)
|
108
|
+
presence_penalty: float | None = Field(default=None)
|
109
|
+
stop: str | list[str] | None = Field(default=None)
|
110
|
+
|
111
|
+
seed: int | None = Field(default=None)
|
112
|
+
|
113
|
+
mirostat: int | None = Field(default=None)
|
114
|
+
mirostat_eta: float | None = Field(default=None)
|
115
|
+
mirostat_tau: float | None = Field(default=None)
|
116
|
+
repeat_last_n: int | None = Field(default=None)
|
117
|
+
tfs_z: float | None = Field(default=None)
|
118
|
+
top_k: int | None = Field(default=None)
|
119
|
+
min_p: float | None = Field(default=None)
|
120
|
+
|
121
|
+
|
122
|
+
class ChatCompletionChoice(BaseModel):
|
123
|
+
message: AssistantMessage
|
124
|
+
finish_reason: Literal["stop", "length", "tool_calls", "content_filter"]
|
125
|
+
|
126
|
+
json_message: dict[str, Any] | None = Field(default=None)
|
127
|
+
logprobs: list[dict[str, Any] | list[dict[str, Any]]] | None = Field(default=None)
|
128
|
+
|
129
|
+
extras: Any | None = Field(default=None)
|
130
|
+
|
131
|
+
|
132
|
+
class ChatCompletionResponse(BaseModel):
|
133
|
+
choices: list[ChatCompletionChoice]
|
134
|
+
|
135
|
+
errors: str = Field(default="")
|
136
|
+
|
137
|
+
completion_tokens: int
|
138
|
+
prompt_tokens: int
|
139
|
+
completion_detailed_tokens: dict[str, int] | None = Field(default=None)
|
140
|
+
prompt_detailed_tokens: dict[str, int] | None = Field(default=None)
|
141
|
+
response_duration: float
|
142
|
+
|
143
|
+
system_fingerprint: str | None = Field(default=None)
|
144
|
+
|
145
|
+
extras: Any | None = Field(default=None)
|
@@ -0,0 +1,98 @@
|
|
1
|
+
import base64
|
2
|
+
from copy import deepcopy
|
3
|
+
import mimetypes
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Any
|
6
|
+
|
7
|
+
from liquid import Template
|
8
|
+
from openai.lib._pydantic import to_strict_json_schema
|
9
|
+
from pydantic import BaseModel
|
10
|
+
|
11
|
+
from not_again_ai.llm.chat_completion.types import MessageT, TextContent
|
12
|
+
|
13
|
+
|
14
|
+
def compile_messages(messages: list[MessageT], variables: dict[str, str]) -> list[MessageT]:
|
15
|
+
"""Compiles messages using Liquid templating and the provided variables.
|
16
|
+
Calls Template(content_part).render(**variables) on each text content part.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
messages: List of MessageT where content can contain Liquid templates.
|
20
|
+
variables: The variables to inject into the templates.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
The same list of messages with the content parts injected with the variables.
|
24
|
+
"""
|
25
|
+
messages_formatted = deepcopy(messages)
|
26
|
+
for message in messages_formatted:
|
27
|
+
if isinstance(message.content, str):
|
28
|
+
# For simple string content, apply template directly
|
29
|
+
message.content = Template(message.content).render(**variables)
|
30
|
+
elif isinstance(message.content, list):
|
31
|
+
# For UserMessage with content parts
|
32
|
+
for content_part in message.content:
|
33
|
+
if isinstance(content_part, TextContent):
|
34
|
+
content_part.text = Template(content_part.text).render(**variables)
|
35
|
+
# ImageContent parts are left unchanged
|
36
|
+
return messages_formatted
|
37
|
+
|
38
|
+
|
39
|
+
def encode_image(image_path: Path) -> str:
|
40
|
+
"""Encodes an image file at the given Path to base64.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
image_path: The path to the image file to encode.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
The base64 encoded image as a string.
|
47
|
+
"""
|
48
|
+
with Path.open(image_path, "rb") as image_file:
|
49
|
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
50
|
+
|
51
|
+
|
52
|
+
def create_image_url(image_path: Path) -> str:
|
53
|
+
"""Creates a data URL for an image file at the given Path.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
image_path: The path to the image file to encode.
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
The data URL for the image.
|
60
|
+
"""
|
61
|
+
image_data = encode_image(image_path)
|
62
|
+
|
63
|
+
valid_mime_types = ["image/jpeg", "image/png", "image/webp", "image/gif"]
|
64
|
+
|
65
|
+
# Get the MIME type from the image file extension
|
66
|
+
mime_type = mimetypes.guess_type(image_path)[0]
|
67
|
+
|
68
|
+
# Check if the MIME type is valid
|
69
|
+
# List of valid types is here: https://platform.openai.com/docs/guides/vision/what-type-of-files-can-i-upload
|
70
|
+
if mime_type not in valid_mime_types:
|
71
|
+
raise ValueError(f"Invalid MIME type for image: {mime_type}")
|
72
|
+
|
73
|
+
return f"data:{mime_type};base64,{image_data}"
|
74
|
+
|
75
|
+
|
76
|
+
def pydantic_to_json_schema(
|
77
|
+
pydantic_model: type[BaseModel], schema_name: str, description: str | None = None
|
78
|
+
) -> dict[str, Any]:
|
79
|
+
"""Converts a Pydantic model to a JSON schema expected by Structured Outputs.
|
80
|
+
Must adhere to the supported schemas: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
|
81
|
+
|
82
|
+
Args:
|
83
|
+
pydantic_model: The Pydantic model to convert.
|
84
|
+
schema_name: The name of the schema.
|
85
|
+
description: An optional description of the schema.
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
A JSON schema dictionary representing the Pydantic model.
|
89
|
+
"""
|
90
|
+
converted_pydantic = to_strict_json_schema(pydantic_model)
|
91
|
+
schema = {
|
92
|
+
"name": schema_name,
|
93
|
+
"strict": True,
|
94
|
+
"schema": converted_pydantic,
|
95
|
+
}
|
96
|
+
if description:
|
97
|
+
schema["description"] = description
|
98
|
+
return schema
|
@@ -0,0 +1,46 @@
|
|
1
|
+
from collections.abc import Collection, Set
|
2
|
+
from typing import Literal
|
3
|
+
|
4
|
+
from loguru import logger
|
5
|
+
|
6
|
+
from not_again_ai.llm.chat_completion.types import MessageT
|
7
|
+
from not_again_ai.llm.prompting.providers.openai_tiktoken import TokenizerOpenAI
|
8
|
+
from not_again_ai.llm.prompting.types import BaseTokenizer
|
9
|
+
|
10
|
+
|
11
|
+
class Tokenizer(BaseTokenizer):
|
12
|
+
def __init__(
|
13
|
+
self,
|
14
|
+
model: str,
|
15
|
+
provider: str,
|
16
|
+
allowed_special: Literal["all"] | Set[str] | None = None,
|
17
|
+
disallowed_special: Literal["all"] | Collection[str] | None = None,
|
18
|
+
):
|
19
|
+
self.model = model
|
20
|
+
self.provider = provider
|
21
|
+
self.allowed_special = allowed_special
|
22
|
+
self.disallowed_special = disallowed_special
|
23
|
+
|
24
|
+
self.init_tokenizer(model, provider, allowed_special, disallowed_special)
|
25
|
+
|
26
|
+
def init_tokenizer(
|
27
|
+
self,
|
28
|
+
model: str,
|
29
|
+
provider: str,
|
30
|
+
allowed_special: Literal["all"] | Set[str] | None = None,
|
31
|
+
disallowed_special: Literal["all"] | Collection[str] | None = None,
|
32
|
+
) -> None:
|
33
|
+
if provider == "openai" or provider == "azure_openai":
|
34
|
+
self.tokenizer = TokenizerOpenAI(model, provider, allowed_special, disallowed_special)
|
35
|
+
else:
|
36
|
+
logger.warning(f"Provider {provider} not supported. Initializing using tiktoken and gpt-4o.")
|
37
|
+
self.tokenizer = TokenizerOpenAI("gpt-4o", "openai", allowed_special, disallowed_special)
|
38
|
+
|
39
|
+
def truncate_str(self, text: str, max_len: int) -> str:
|
40
|
+
return self.tokenizer.truncate_str(text, max_len)
|
41
|
+
|
42
|
+
def num_tokens_in_str(self, text: str) -> int:
|
43
|
+
return self.tokenizer.num_tokens_in_str(text)
|
44
|
+
|
45
|
+
def num_tokens_in_messages(self, messages: list[MessageT]) -> int:
|
46
|
+
return self.tokenizer.num_tokens_in_messages(messages)
|
@@ -0,0 +1,122 @@
|
|
1
|
+
from collections.abc import Collection, Set
|
2
|
+
from typing import Literal
|
3
|
+
|
4
|
+
from loguru import logger
|
5
|
+
import tiktoken
|
6
|
+
|
7
|
+
from not_again_ai.llm.chat_completion.types import MessageT
|
8
|
+
from not_again_ai.llm.prompting.types import BaseTokenizer
|
9
|
+
|
10
|
+
|
11
|
+
class TokenizerOpenAI(BaseTokenizer):
|
12
|
+
def __init__(
|
13
|
+
self,
|
14
|
+
model: str,
|
15
|
+
provider: str = "openai",
|
16
|
+
allowed_special: Literal["all"] | Set[str] | None = None,
|
17
|
+
disallowed_special: Literal["all"] | Collection[str] | None = None,
|
18
|
+
):
|
19
|
+
self.model = model
|
20
|
+
self.provider = provider
|
21
|
+
self.allowed_special = allowed_special
|
22
|
+
self.disallowed_special = disallowed_special
|
23
|
+
|
24
|
+
self.init_tokenizer(model, provider, allowed_special, disallowed_special)
|
25
|
+
|
26
|
+
def init_tokenizer(
|
27
|
+
self,
|
28
|
+
model: str,
|
29
|
+
provider: str = "openai",
|
30
|
+
allowed_special: Literal["all"] | Set[str] | None = None,
|
31
|
+
disallowed_special: Literal["all"] | Collection[str] | None = None,
|
32
|
+
) -> None:
|
33
|
+
try:
|
34
|
+
self.encoding = tiktoken.encoding_for_model(model)
|
35
|
+
except KeyError:
|
36
|
+
default_encoding = "o200k_base"
|
37
|
+
logger.warning(f"Model {model} not found. Using {default_encoding} encoding.")
|
38
|
+
self.encoding = tiktoken.get_encoding(default_encoding)
|
39
|
+
|
40
|
+
# Set defaults if not provided
|
41
|
+
if not allowed_special:
|
42
|
+
self.allowed_special = set()
|
43
|
+
if not disallowed_special:
|
44
|
+
self.disallowed_special = ()
|
45
|
+
|
46
|
+
def truncate_str(self, text: str, max_len: int) -> str:
|
47
|
+
tokens = self.encoding.encode(
|
48
|
+
text,
|
49
|
+
allowed_special=self.allowed_special if self.allowed_special is not None else set(),
|
50
|
+
disallowed_special=self.disallowed_special if self.disallowed_special is not None else (),
|
51
|
+
)
|
52
|
+
if len(tokens) > max_len:
|
53
|
+
tokens = tokens[:max_len]
|
54
|
+
truncated_text = self.encoding.decode(tokens)
|
55
|
+
return truncated_text
|
56
|
+
else:
|
57
|
+
return text
|
58
|
+
|
59
|
+
def num_tokens_in_str(self, text: str) -> int:
|
60
|
+
return len(
|
61
|
+
self.encoding.encode(
|
62
|
+
text,
|
63
|
+
allowed_special=self.allowed_special if self.allowed_special is not None else set(),
|
64
|
+
disallowed_special=self.disallowed_special if self.disallowed_special is not None else (),
|
65
|
+
)
|
66
|
+
)
|
67
|
+
|
68
|
+
def num_tokens_in_messages(self, messages: list[MessageT]) -> int:
|
69
|
+
if self.model in {
|
70
|
+
"gpt-3.5-turbo-0613",
|
71
|
+
"gpt-3.5-turbo-16k-0613",
|
72
|
+
"gpt-3.5-turbo-1106",
|
73
|
+
"gpt-3.5-turbo-0125",
|
74
|
+
"gpt-4-0314",
|
75
|
+
"gpt-4-32k-0314",
|
76
|
+
"gpt-4-0613",
|
77
|
+
"gpt-4-32k-0613",
|
78
|
+
"gpt-4-1106-preview",
|
79
|
+
"gpt-4-turbo-preview",
|
80
|
+
"gpt-4-0125-preview",
|
81
|
+
"gpt-4-turbo",
|
82
|
+
"gpt-4-turbo-2024-04-09",
|
83
|
+
"gpt-4o",
|
84
|
+
"gpt-4o-2024-05-13",
|
85
|
+
"gpt-4o-2024-08-06",
|
86
|
+
"gpt-4o-2024-11-20",
|
87
|
+
"gpt-4o-mini",
|
88
|
+
"gpt-4o-mini-2024-07-18",
|
89
|
+
"o1",
|
90
|
+
"o1-2024-12-17",
|
91
|
+
"o1-mini",
|
92
|
+
"o1-mini-2024-09-12",
|
93
|
+
"o1-preview",
|
94
|
+
"o1-preview-2024-09-12",
|
95
|
+
}:
|
96
|
+
tokens_per_message = 3 # every message follows <|start|>{role/name}\n{content}<|end|>\n
|
97
|
+
tokens_per_name = 1 # if there's a name, the role is omitted
|
98
|
+
elif self.model == "gpt-3.5-turbo-0301":
|
99
|
+
tokens_per_message = 4
|
100
|
+
tokens_per_name = -1
|
101
|
+
else:
|
102
|
+
logger.warning(f"Model {self.model} not supported. Assuming gpt-4o encoding.")
|
103
|
+
tokens_per_message = 3
|
104
|
+
tokens_per_name = 1
|
105
|
+
|
106
|
+
num_tokens = 0
|
107
|
+
for message in messages:
|
108
|
+
num_tokens += tokens_per_message
|
109
|
+
message_dict = message.model_dump(exclude_none=True)
|
110
|
+
for key, value in message_dict.items():
|
111
|
+
if isinstance(value, str):
|
112
|
+
num_tokens += len(
|
113
|
+
self.encoding.encode(
|
114
|
+
value,
|
115
|
+
allowed_special=self.allowed_special if self.allowed_special is not None else set(),
|
116
|
+
disallowed_special=self.disallowed_special if self.disallowed_special is not None else (),
|
117
|
+
)
|
118
|
+
)
|
119
|
+
if key == "name":
|
120
|
+
num_tokens += tokens_per_name
|
121
|
+
num_tokens += 3
|
122
|
+
return num_tokens
|
@@ -0,0 +1,43 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from collections.abc import Collection, Set
|
3
|
+
from typing import Literal
|
4
|
+
|
5
|
+
from not_again_ai.llm.chat_completion.types import MessageT
|
6
|
+
|
7
|
+
|
8
|
+
class BaseTokenizer(ABC):
|
9
|
+
def __init__(
|
10
|
+
self,
|
11
|
+
model: str,
|
12
|
+
provider: str,
|
13
|
+
allowed_special: Literal["all"] | Set[str] | None = None,
|
14
|
+
disallowed_special: Literal["all"] | Collection[str] | None = None,
|
15
|
+
):
|
16
|
+
self.model = model
|
17
|
+
self.provider = provider
|
18
|
+
self.allowed_special = allowed_special
|
19
|
+
self.disallowed_special = disallowed_special
|
20
|
+
|
21
|
+
self.init_tokenizer(model, provider, allowed_special, disallowed_special)
|
22
|
+
|
23
|
+
@abstractmethod
|
24
|
+
def init_tokenizer(
|
25
|
+
self,
|
26
|
+
model: str,
|
27
|
+
provider: str,
|
28
|
+
allowed_special: Literal["all"] | Set[str] | None = None,
|
29
|
+
disallowed_special: Literal["all"] | Collection[str] | None = None,
|
30
|
+
) -> None:
|
31
|
+
pass
|
32
|
+
|
33
|
+
@abstractmethod
|
34
|
+
def truncate_str(self, text: str, max_len: int) -> str:
|
35
|
+
pass
|
36
|
+
|
37
|
+
@abstractmethod
|
38
|
+
def num_tokens_in_str(self, text: str) -> int:
|
39
|
+
pass
|
40
|
+
|
41
|
+
@abstractmethod
|
42
|
+
def num_tokens_in_messages(self, messages: list[MessageT]) -> int:
|
43
|
+
pass
|
@@ -1,12 +1,11 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.3
|
2
2
|
Name: not-again-ai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.15.0
|
4
4
|
Summary: Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place.
|
5
|
-
Home-page: https://github.com/DaveCoDev/not-again-ai
|
6
5
|
License: MIT
|
7
6
|
Author: DaveCoDev
|
8
7
|
Author-email: dave.co.dev@gmail.com
|
9
|
-
Requires-Python: >=3.11
|
8
|
+
Requires-Python: >=3.11, <3.13
|
10
9
|
Classifier: Development Status :: 3 - Alpha
|
11
10
|
Classifier: Intended Audience :: Developers
|
12
11
|
Classifier: Intended Audience :: Science/Research
|
@@ -17,26 +16,27 @@ Classifier: Programming Language :: Python :: 3
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
19
18
|
Classifier: Typing :: Typed
|
19
|
+
Provides-Extra: data
|
20
20
|
Provides-Extra: llm
|
21
|
-
Provides-Extra: local-llm
|
22
21
|
Provides-Extra: statistics
|
23
22
|
Provides-Extra: viz
|
24
|
-
Requires-Dist: azure-
|
25
|
-
Requires-Dist:
|
26
|
-
Requires-Dist:
|
27
|
-
Requires-Dist:
|
28
|
-
Requires-Dist:
|
29
|
-
Requires-Dist:
|
30
|
-
Requires-Dist:
|
31
|
-
Requires-Dist:
|
32
|
-
Requires-Dist: pydantic (>=2.
|
33
|
-
Requires-Dist:
|
34
|
-
Requires-Dist:
|
35
|
-
Requires-Dist:
|
36
|
-
Requires-Dist:
|
37
|
-
Requires-Dist:
|
38
|
-
Requires-Dist:
|
39
|
-
Project-URL: Documentation, https://github.
|
23
|
+
Requires-Dist: azure-identity (>=1.19) ; extra == "llm"
|
24
|
+
Requires-Dist: loguru (>=0.7)
|
25
|
+
Requires-Dist: numpy (>=2.2) ; extra == "statistics"
|
26
|
+
Requires-Dist: numpy (>=2.2) ; extra == "viz"
|
27
|
+
Requires-Dist: ollama (>=0.4) ; extra == "llm"
|
28
|
+
Requires-Dist: openai (>=1.60) ; extra == "llm"
|
29
|
+
Requires-Dist: pandas (>=2.2) ; extra == "viz"
|
30
|
+
Requires-Dist: playwright (>=1.49) ; extra == "data"
|
31
|
+
Requires-Dist: pydantic (>=2.10)
|
32
|
+
Requires-Dist: pytest-playwright (>=0.6) ; extra == "data"
|
33
|
+
Requires-Dist: python-liquid (>=1.12) ; extra == "llm"
|
34
|
+
Requires-Dist: scikit-learn (>=1.6) ; extra == "statistics"
|
35
|
+
Requires-Dist: scipy (>=1.15) ; extra == "statistics"
|
36
|
+
Requires-Dist: seaborn (>=0.13) ; extra == "viz"
|
37
|
+
Requires-Dist: tiktoken (>=0.8) ; extra == "llm"
|
38
|
+
Project-URL: Documentation, https://davecodev.github.io/not-again-ai/
|
39
|
+
Project-URL: Homepage, https://github.com/DaveCoDev/not-again-ai
|
40
40
|
Project-URL: Repository, https://github.com/DaveCoDev/not-again-ai
|
41
41
|
Description-Content-Type: text/markdown
|
42
42
|
|
@@ -66,40 +66,47 @@ Requires: Python 3.11, or 3.12
|
|
66
66
|
Install the entire package from [PyPI](https://pypi.org/project/not-again-ai/) with:
|
67
67
|
|
68
68
|
```bash
|
69
|
-
$ pip install not_again_ai[llm,
|
69
|
+
$ pip install not_again_ai[data,llm,statistics,viz]
|
70
70
|
```
|
71
71
|
|
72
|
-
Note that local LLM requires separate installations and will not work out of the box due to how hardware dependent it is. Be sure to check the [notebooks](notebooks/local_llm/) for more details.
|
73
|
-
|
74
72
|
The package is split into subpackages, so you can install only the parts you need.
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
73
|
+
|
74
|
+
### Base
|
75
|
+
1. `pip install not_again_ai`
|
76
|
+
|
77
|
+
|
78
|
+
### Data
|
79
|
+
1. `pip install not_again_ai[data]`
|
80
|
+
1. `playwright install` to download the browser binaries.
|
81
|
+
|
82
|
+
|
83
|
+
### LLM
|
84
|
+
1. `pip install not_again_ai[llm]`
|
85
|
+
1. Setup OpenAI API
|
86
|
+
1. Go to https://platform.openai.com/settings/profile?tab=api-keys to get your API key.
|
87
|
+
1. (Optional) Set the `OPENAI_API_KEY` and the `OPENAI_ORG_ID` environment variables.
|
88
|
+
1. Setup Azure OpenAI (AOAI)
|
89
|
+
1. Using AOAI requires using Entra ID authentication. See https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity for how to set this up for your AOAI deployment.
|
90
|
+
* Requires the correct role assigned to your user account and being signed into the Azure CLI.
|
91
|
+
1. (Optional) Set the `AZURE_OPENAI_ENDPOINT` environment variable.
|
92
|
+
1. If you wish to use Ollama:
|
93
|
+
1. Follow the instructions at https://github.com/ollama/ollama to install Ollama for your system.
|
94
|
+
1. (Optional) [Add Ollama as a startup service (recommended)](https://github.com/ollama/ollama/blob/main/docs/linux.md#adding-ollama-as-a-startup-service-recommended)
|
95
|
+
1. (Optional) To make the Ollama service accessible on your local network from a Linux server, add the following to the `/etc/systemd/system/ollama.service` file which will make Ollama available at `http://<local_address>:11434`:
|
96
|
+
```bash
|
97
|
+
[Service]
|
98
|
+
...
|
99
|
+
Environment="OLLAMA_HOST=0.0.0.0"
|
100
|
+
```
|
101
|
+
1. It is recommended to always have the latest version of Ollama. To update Ollama check the [docs](https://github.com/ollama/ollama/blob/main/docs/). The command for Linux is: `curl -fsSL https://ollama.com/install.sh | sh`
|
102
|
+
|
103
|
+
|
104
|
+
### Statistics
|
105
|
+
1. `pip install not_again_ai[statistics]`
|
106
|
+
|
107
|
+
|
108
|
+
### Visualization
|
109
|
+
1. `pip install not_again_ai[viz]`
|
103
110
|
|
104
111
|
|
105
112
|
# Development Information
|
@@ -135,10 +142,8 @@ $ poetry update
|
|
135
142
|
|
136
143
|
To install all dependencies (with all extra dependencies) into an isolated virtual environment:
|
137
144
|
|
138
|
-
> Append `--sync` to uninstall dependencies that are no longer in use from the virtual environment.
|
139
|
-
|
140
145
|
```bash
|
141
|
-
$ poetry
|
146
|
+
$ poetry sync --all-extras
|
142
147
|
```
|
143
148
|
|
144
149
|
To [activate](https://python-poetry.org/docs/basic-usage#activating-the-virtual-environment) the
|
@@ -194,7 +199,7 @@ Automated code quality checks are performed using
|
|
194
199
|
environments and run commands based on [`noxfile.py`](./noxfile.py) for unit testing, PEP 8 style
|
195
200
|
guide checking, type checking and documentation generation.
|
196
201
|
|
197
|
-
> Note: `nox` is installed into the virtual environment automatically by the `poetry
|
202
|
+
> Note: `nox` is installed into the virtual environment automatically by the `poetry sync`
|
198
203
|
> command above. Run `poetry shell` to activate the virtual environment.
|
199
204
|
|
200
205
|
To run all default sessions:
|
@@ -229,10 +234,10 @@ areas of the project that are currently not tested.
|
|
229
234
|
|
230
235
|
pytest and code coverage are configured in [`pyproject.toml`](./pyproject.toml).
|
231
236
|
|
232
|
-
To
|
237
|
+
To run selected tests:
|
233
238
|
|
234
239
|
```bash
|
235
|
-
(.venv) $ nox -s test -- -k
|
240
|
+
(.venv) $ nox -s test -- -k "test_web"
|
236
241
|
```
|
237
242
|
|
238
243
|
## Code Style Checking
|
@@ -0,0 +1,32 @@
|
|
1
|
+
not_again_ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
not_again_ai/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
not_again_ai/base/file_system.py,sha256=KNQmacO4Q__CQuq2oPzWrg3rQO48n3evglc9bNiP7KM,949
|
4
|
+
not_again_ai/base/parallel.py,sha256=fcYhKBYBWvob84iKp3O93wvFFdXeidljZsShgBLTNGA,3448
|
5
|
+
not_again_ai/data/__init__.py,sha256=1jF6mwvtB2PT7IEc3xpbRtZm3g3Lyf8zUqH4AEE4qlQ,244
|
6
|
+
not_again_ai/data/web.py,sha256=wjx9cc33jcoJBGonYCIpwygPBFOwz7F-dx_ominmbnI,1838
|
7
|
+
not_again_ai/llm/__init__.py,sha256=_wNUL6FDaT369Z8W48FsaC_NkcOZ-ib2MMUvnaLOS-0,451
|
8
|
+
not_again_ai/llm/chat_completion/__init__.py,sha256=a2qmmmrXjMKyHGZDjt_xdqYbSrEOBea_VvZArzMboe0,200
|
9
|
+
not_again_ai/llm/chat_completion/interface.py,sha256=FCyE-1gLdhwuS0Lv8iTbZvraa4iZjnKB8qb31WF53uk,1204
|
10
|
+
not_again_ai/llm/chat_completion/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
not_again_ai/llm/chat_completion/providers/ollama_api.py,sha256=iBTMyF8edo8uxxrorNPtShzmCXG7m0RlEBunWLSO4Mo,7999
|
12
|
+
not_again_ai/llm/chat_completion/providers/openai_api.py,sha256=S7TZhDIQ_xpp3JakRVcd3Gpw2UjeHCETdA9MfRKUjCU,12294
|
13
|
+
not_again_ai/llm/chat_completion/types.py,sha256=q8APUWWzwCKL0Rs_zEFfph9uBcwh5nAT0f0rp4crvk0,4039
|
14
|
+
not_again_ai/llm/prompting/__init__.py,sha256=7YnHro1yH01FLGnao27WyqQDFjNYf9npE5UxoR9YrUU,84
|
15
|
+
not_again_ai/llm/prompting/compile_messages.py,sha256=HmVCQ-0iVg8vFWZyppxUf9m_ae5c8rK1Zx8ySPD1Bg8,3452
|
16
|
+
not_again_ai/llm/prompting/interface.py,sha256=SMKYabmu3zTWbEDukU6aLU_JQ88apeBWWOF_qZ0s3ww,1783
|
17
|
+
not_again_ai/llm/prompting/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
not_again_ai/llm/prompting/providers/openai_tiktoken.py,sha256=8YrEiK3ZHyKVGiVsJ_Rd6eVdISIvcub7ooj-HB7Prsc,4536
|
19
|
+
not_again_ai/llm/prompting/types.py,sha256=xz70dnawL9rji7Zr1_mOekY-uUlvKJJf7k9nXJsOXc4,1219
|
20
|
+
not_again_ai/py.typed,sha256=UaCuPFa3H8UAakbt-5G8SPacldTOGvJv18pPjUJ5gDY,93
|
21
|
+
not_again_ai/statistics/__init__.py,sha256=gA8r9JQFbFSN0ykrHy4G1IQgcky4f2eM5Oo24oVI5Ik,466
|
22
|
+
not_again_ai/statistics/dependence.py,sha256=4xaniMkLlTjdXcNVXdwepEAiZ-WaaGYfR9haJC1lU2Q,4434
|
23
|
+
not_again_ai/viz/__init__.py,sha256=MeaWae_QRbDEHJ4MWYoY1-Ad6S0FhSDaRhQncS2cpSc,447
|
24
|
+
not_again_ai/viz/barplots.py,sha256=rr_2phZgDaqcF5Ve7mBZrVvNXVzEt84RQPIyyeJxsMo,3384
|
25
|
+
not_again_ai/viz/distributions.py,sha256=OyWwJaNI6lMRm_iSrhq-CORLNvXfeuLSgDtVo3umnzU,4354
|
26
|
+
not_again_ai/viz/scatterplot.py,sha256=5CUOWeknbBOaZPeX9oPin5sBkRKEwk8qeFH45R-9LlY,2292
|
27
|
+
not_again_ai/viz/time_series.py,sha256=pOGZqXp_2nd6nKo-PUQNCtmMh__69jxQ6bQibTGLwZA,5212
|
28
|
+
not_again_ai/viz/utils.py,sha256=hN7gwxtBt3U6jQni2K8j5m5pCXpaJDoNzGhBBikEU28,238
|
29
|
+
not_again_ai-0.15.0.dist-info/LICENSE,sha256=btjOgNGpp-ux5xOo1Gx1MddxeWtT9sof3s3Nui29QfA,1071
|
30
|
+
not_again_ai-0.15.0.dist-info/METADATA,sha256=_vGJUluFVmoYQrNwLGMh5NWtH6aiJ5BG8G8hlZ5TRpE,15038
|
31
|
+
not_again_ai-0.15.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
32
|
+
not_again_ai-0.15.0.dist-info/RECORD,,
|