ai-microcore 3.16.5__tar.gz → 4.0.0.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/PKG-INFO +19 -1
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/README.md +18 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/__init__.py +1 -1
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/_prepare_llm_args.py +1 -1
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/ai_func/__init__.py +25 -4
- ai_microcore-4.0.0.dev2/microcore/ai_func/ai-func.json.j2 +11 -0
- ai_microcore-3.16.5/microcore/ai_func/python_ai_func.j2 → ai_microcore-4.0.0.dev2/microcore/ai_func/ai-func.pythonic.j2 +1 -1
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/configuration.py +4 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/embedding_db/__init__.py +3 -2
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/embedding_db/chromadb.py +17 -4
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/shared.py +2 -1
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/logging.py +15 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/message_types.py +8 -2
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/LICENSE +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/_env.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/_llm_functions.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/ai_modules.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/file_storage.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/json_parsing.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/__init__.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/_openai_llm_v0.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/_openai_llm_v1.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/anthropic.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/google_genai.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/google_vertex_ai.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/local_llm.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/local_transformers.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/openai_llm.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/metrics.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/python.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/templating/__init__.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/templating/jinja2.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/text2speech/elevenlabs.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/tokenizing.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/types.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/ui.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/utils.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/wrappers/__init__.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/wrappers/llm_response_wrapper.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/wrappers/prompt_wrapper.py +0 -0
- {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-microcore
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.0.0.dev2
|
|
4
4
|
Summary: # Minimalistic Foundation for AI Applications
|
|
5
5
|
Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
|
|
6
6
|
Author-email: Vitalii Stepanenko <mail@vitalii.in>
|
|
@@ -132,6 +132,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
|
|
|
132
132
|
<br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
|
|
133
133
|
3. OS environment variables have the lowest priority.
|
|
134
134
|
|
|
135
|
+
### Vector Databases
|
|
136
|
+
|
|
137
|
+
Vector database functions are available via `microcore.texts`.
|
|
138
|
+
Default vector database is [Chroma](https://www.trychroma.com/).
|
|
139
|
+
In order to use vector database functions, you need to install the `chromadb` package:
|
|
140
|
+
```bash
|
|
141
|
+
pip install chromadb
|
|
142
|
+
```
|
|
143
|
+
By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
|
|
144
|
+
Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from microcore import configure
|
|
148
|
+
configure(
|
|
149
|
+
EMBEDDING_DB_HOST = 'localhost',
|
|
150
|
+
EMBEDDING_DB_PORT = 8000,
|
|
151
|
+
)
|
|
152
|
+
```
|
|
135
153
|
|
|
136
154
|
## 🌟 Core Functions
|
|
137
155
|
|
|
@@ -107,6 +107,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
|
|
|
107
107
|
<br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
|
|
108
108
|
3. OS environment variables have the lowest priority.
|
|
109
109
|
|
|
110
|
+
### Vector Databases
|
|
111
|
+
|
|
112
|
+
Vector database functions are available via `microcore.texts`.
|
|
113
|
+
Default vector database is [Chroma](https://www.trychroma.com/).
|
|
114
|
+
In order to use vector database functions, you need to install the `chromadb` package:
|
|
115
|
+
```bash
|
|
116
|
+
pip install chromadb
|
|
117
|
+
```
|
|
118
|
+
By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
|
|
119
|
+
Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from microcore import configure
|
|
123
|
+
configure(
|
|
124
|
+
EMBEDDING_DB_HOST = 'localhost',
|
|
125
|
+
EMBEDDING_DB_PORT = 8000,
|
|
126
|
+
)
|
|
127
|
+
```
|
|
110
128
|
|
|
111
129
|
## 🌟 Core Functions
|
|
112
130
|
|
|
@@ -22,7 +22,7 @@ def prepare_chat_messages(prompt: TPrompt) -> list[dict]:
|
|
|
22
22
|
dict(role=DEFAULT_MESSAGE_ROLE, content=msg)
|
|
23
23
|
if isinstance(msg, str)
|
|
24
24
|
else (
|
|
25
|
-
asdict(msg, dict_factory=msg.
|
|
25
|
+
asdict(msg, dict_factory=msg.DICT_FACTORY)
|
|
26
26
|
if isinstance(msg, Msg)
|
|
27
27
|
else msg
|
|
28
28
|
)
|
|
@@ -5,13 +5,23 @@ descr: Allows to describe python functions for LLM
|
|
|
5
5
|
|
|
6
6
|
import ast
|
|
7
7
|
import inspect
|
|
8
|
+
from enum import Enum
|
|
8
9
|
from typing import Dict, Any
|
|
9
10
|
import docstring_parser
|
|
10
11
|
from .. import tpl
|
|
12
|
+
from ..utils import dedent
|
|
11
13
|
|
|
12
14
|
|
|
15
|
+
class AiFuncSyntax(str, Enum):
|
|
16
|
+
PYTHONIC: str = "pythonic"
|
|
17
|
+
JSON: str = "json"
|
|
18
|
+
DEFAULT: str = str(JSON)
|
|
19
|
+
|
|
20
|
+
def __str__(self):
|
|
21
|
+
return self.value
|
|
22
|
+
|
|
13
23
|
def func_arg_comments(func):
|
|
14
|
-
func_source = inspect.getsource(func)
|
|
24
|
+
func_source = dedent(inspect.getsource(func))
|
|
15
25
|
module = ast.parse(func_source)
|
|
16
26
|
func_def = module.body[0]
|
|
17
27
|
|
|
@@ -60,7 +70,7 @@ def func_metadata(func) -> Dict[str, Any]:
|
|
|
60
70
|
for name, val in metadata["args"].items():
|
|
61
71
|
val["comment"] = arg_comments[name]
|
|
62
72
|
|
|
63
|
-
|
|
73
|
+
# Parse docstring
|
|
64
74
|
parsed_docstring = docstring_parser.parse(inspect.getdoc(func))
|
|
65
75
|
|
|
66
76
|
# Add descriptions from parsed docstring to parameters
|
|
@@ -71,6 +81,17 @@ def func_metadata(func) -> Dict[str, Any]:
|
|
|
71
81
|
return metadata
|
|
72
82
|
|
|
73
83
|
|
|
74
|
-
def describe_ai_func(func):
|
|
84
|
+
def describe_ai_func(func: callable, syntax: AiFuncSyntax | str = None) -> str:
|
|
85
|
+
"""
|
|
86
|
+
Renders function description for LLM
|
|
87
|
+
Args:
|
|
88
|
+
func: callable: function to describe
|
|
89
|
+
syntax: AiFuncSyntax | str: syntax to use for the description
|
|
90
|
+
- Use AiFuncSyntax enums to use standard templates (""json", "pythonic")
|
|
91
|
+
- Use custom template name to use custom template
|
|
92
|
+
Returns: str: rendered description, part of prompt
|
|
93
|
+
"""
|
|
94
|
+
syntax = syntax or AiFuncSyntax.DEFAULT
|
|
95
|
+
tpl_file = f"ai-func.{syntax}.j2" if syntax in AiFuncSyntax else syntax
|
|
75
96
|
metadata = func_metadata(func)
|
|
76
|
-
return tpl(
|
|
97
|
+
return tpl(tpl_file, **metadata)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# {{ description or name.replace('_', ' ').capitalize() }}
|
|
2
|
+
{
|
|
3
|
+
"call": "{{ name }}"{% if args %}{{ "," }}{% endif %}
|
|
4
|
+
{%- for k,v in args.items() %}
|
|
5
|
+
"{{ k }}":
|
|
6
|
+
{%- if v.type %} <{{ v.type }}>{% endif -%}
|
|
7
|
+
{%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){%endif-%}
|
|
8
|
+
{%- if not loop.last -%},{%- endif -%}
|
|
9
|
+
{%- if v.comment %} {{ v.comment }}{% endif -%}
|
|
10
|
+
{%- endfor -%}
|
|
11
|
+
{{ "\n}" }}
|
|
@@ -369,6 +369,10 @@ class Config(LLMConfig):
|
|
|
369
369
|
|
|
370
370
|
EMBEDDING_DB_ALLOW_DUPLICATES: bool = from_env(dtype=bool, default=False)
|
|
371
371
|
|
|
372
|
+
EMBEDDING_DB_HOST: str = from_env(default=None)
|
|
373
|
+
|
|
374
|
+
EMBEDDING_DB_PORT: str = from_env(default=None)
|
|
375
|
+
|
|
372
376
|
DEFAULT_ENCODING: str = from_env("utf-8")
|
|
373
377
|
"""Used in file system operations, utf-8 by default"""
|
|
374
378
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import sys
|
|
3
2
|
from abc import ABC, abstractmethod
|
|
4
3
|
from dataclasses import dataclass
|
|
5
4
|
|
|
@@ -8,6 +7,8 @@ import tiktoken
|
|
|
8
7
|
from ..utils import ExtendedString
|
|
9
8
|
|
|
10
9
|
|
|
10
|
+
INT32_MAX = 2**31 - 1 # 2147483647
|
|
11
|
+
|
|
11
12
|
class SearchResults(list):
|
|
12
13
|
def fit_to_token_size(
|
|
13
14
|
self,
|
|
@@ -115,7 +116,7 @@ class AbstractEmbeddingDB(ABC):
|
|
|
115
116
|
**kwargs,
|
|
116
117
|
) -> SearchResults | list[str | SearchResult]:
|
|
117
118
|
return self.search(
|
|
118
|
-
collection, query, n_results=
|
|
119
|
+
collection, query, n_results=INT32_MAX, where=where, **kwargs
|
|
119
120
|
)
|
|
120
121
|
|
|
121
122
|
@abstractmethod
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from dataclasses import dataclass
|
|
2
3
|
import uuid
|
|
3
4
|
|
|
@@ -16,10 +17,22 @@ class ChromaEmbeddingDB(AbstractEmbeddingDB):
|
|
|
16
17
|
client: chromadb.Client = None
|
|
17
18
|
|
|
18
19
|
def __post_init__(self):
|
|
19
|
-
self.
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
if self.config.EMBEDDING_DB_HOST:
|
|
21
|
+
logging.info(
|
|
22
|
+
"Connecting to ChromaDB at %s:%s",
|
|
23
|
+
self.config.EMBEDDING_DB_HOST,
|
|
24
|
+
self.config.EMBEDDING_DB_PORT
|
|
25
|
+
)
|
|
26
|
+
self.client = chromadb.HttpClient(
|
|
27
|
+
host=self.config.EMBEDDING_DB_HOST,
|
|
28
|
+
port=self.config.EMBEDDING_DB_PORT or 8000,
|
|
29
|
+
settings=Settings(anonymized_telemetry=False),
|
|
30
|
+
)
|
|
31
|
+
else:
|
|
32
|
+
self.client = chromadb.PersistentClient(
|
|
33
|
+
path=f"{self.config.STORAGE_PATH}/{self.config.EMBEDDING_DB_FOLDER}",
|
|
34
|
+
settings=Settings(anonymized_telemetry=False),
|
|
35
|
+
)
|
|
23
36
|
self.embedding_function = (
|
|
24
37
|
self.config.EMBEDDING_DB_FUNCTION
|
|
25
38
|
or embedding_functions.DefaultEmbeddingFunction()
|
|
@@ -41,6 +41,20 @@ def _format_request_log_str(prompt, **kwargs) -> str:
|
|
|
41
41
|
)
|
|
42
42
|
if out.endswith("\n"):
|
|
43
43
|
out = out[:-1]
|
|
44
|
+
if LoggingConfig.STRIP_REQUEST_LINES:
|
|
45
|
+
start_lines, end_lines = LoggingConfig.STRIP_REQUEST_LINES
|
|
46
|
+
max_lines = start_lines + end_lines
|
|
47
|
+
lines = out.split("\n")
|
|
48
|
+
if len(lines) > max_lines:
|
|
49
|
+
out = "\n".join(
|
|
50
|
+
lines[:start_lines]
|
|
51
|
+
+ [
|
|
52
|
+
f"{LoggingConfig.INDENT}{Fore.YELLOW}"
|
|
53
|
+
f"...(output was truncated)..."
|
|
54
|
+
f"{LoggingConfig.PROMPT_COLOR}"
|
|
55
|
+
]
|
|
56
|
+
+ (lines[-end_lines:] if end_lines else [])
|
|
57
|
+
)
|
|
44
58
|
return out
|
|
45
59
|
|
|
46
60
|
|
|
@@ -72,6 +86,7 @@ class LoggingConfig:
|
|
|
72
86
|
OUTPUT_METHOD: callable = print
|
|
73
87
|
REQUEST_FORMATTER: callable = _format_request_log_str
|
|
74
88
|
RESPONSE_FORMATTER: callable = _format_response_log_str
|
|
89
|
+
STRIP_REQUEST_LINES: tuple[int, int] | None = [40, 15]
|
|
75
90
|
|
|
76
91
|
|
|
77
92
|
def _log_request(prompt, **kwargs):
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
+
from typing import ClassVar
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class Role(str, Enum):
|
|
@@ -9,16 +10,20 @@ class Role(str, Enum):
|
|
|
9
10
|
USER = "user"
|
|
10
11
|
ASSISTANT = "assistant"
|
|
11
12
|
|
|
13
|
+
def __str__(self):
|
|
14
|
+
return self.value
|
|
15
|
+
|
|
12
16
|
|
|
13
17
|
DEFAULT_MESSAGE_ROLE = Role.USER
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
@dataclass
|
|
17
21
|
class Msg:
|
|
18
|
-
dict_factory = dict
|
|
19
22
|
role: str = field(default=DEFAULT_MESSAGE_ROLE)
|
|
20
23
|
content: str = field(default="")
|
|
21
24
|
|
|
25
|
+
DICT_FACTORY: ClassVar = dict
|
|
26
|
+
|
|
22
27
|
def __str__(self):
|
|
23
28
|
return str(self.content)
|
|
24
29
|
|
|
@@ -49,10 +54,11 @@ class PartialMsg(AssistantMsg):
|
|
|
49
54
|
is_partial = True
|
|
50
55
|
"""Custom dictionary class to handle additional properties"""
|
|
51
56
|
|
|
52
|
-
dict_factory = _PartialMsgDict
|
|
53
57
|
placeholder = "<|placeholder|>"
|
|
54
58
|
variants_splitter = "<|or|>"
|
|
55
59
|
|
|
60
|
+
DICT_FACTORY: ClassVar = _PartialMsgDict
|
|
61
|
+
|
|
56
62
|
@staticmethod
|
|
57
63
|
def split_prefix_and_suffixes(content: str):
|
|
58
64
|
parts = content.split(PartialMsg.placeholder)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|