ai-microcore 4.0.0.dev2__tar.gz → 4.0.0.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/PKG-INFO +1 -1
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/__init__.py +1 -1
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/_env.py +5 -2
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ai_func/__init__.py +1 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/configuration.py +13 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/embedding_db/__init__.py +1 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/file_storage.py +4 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/json_parsing.py +1 -1
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/_openai_llm_v0.py +1 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/anthropic.py +9 -1
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/local_transformers.py +1 -1
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/shared.py +1 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/tokenizing.py +2 -1
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/utils.py +90 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/LICENSE +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/README.md +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/_llm_functions.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/_prepare_llm_args.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ai_func/ai-func.json.j2 +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ai_func/ai-func.pythonic.j2 +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ai_modules.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/embedding_db/chromadb.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/__init__.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/_openai_llm_v1.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/google_genai.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/google_vertex_ai.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/local_llm.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/openai_llm.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/logging.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/message_types.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/metrics.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/python.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/templating/__init__.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/templating/jinja2.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/text2speech/elevenlabs.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/types.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ui.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/wrappers/__init__.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/wrappers/llm_response_wrapper.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/wrappers/prompt_wrapper.py +0 -0
- {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-microcore
|
|
3
|
-
Version: 4.0.0.
|
|
3
|
+
Version: 4.0.0.dev4
|
|
4
4
|
Summary: # Minimalistic Foundation for AI Applications
|
|
5
5
|
Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
|
|
6
6
|
Author-email: Vitalii Stepanenko <mail@vitalii.in>
|
|
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING
|
|
|
6
6
|
import jinja2
|
|
7
7
|
|
|
8
8
|
from .embedding_db import AbstractEmbeddingDB
|
|
9
|
-
from .configuration import Config, ApiType, LLMConfigError
|
|
9
|
+
from .configuration import Config, ApiType, LLMConfigError, EmbeddingDbType
|
|
10
10
|
from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
|
|
11
11
|
from .templating.jinja2 import make_jinja2_env, make_tpl_function
|
|
12
12
|
from .llm.openai_llm import make_llm_functions as make_openai_llm_functions
|
|
@@ -134,7 +134,10 @@ class Env:
|
|
|
134
134
|
)
|
|
135
135
|
|
|
136
136
|
def init_similarity_search(self):
|
|
137
|
-
if
|
|
137
|
+
if (
|
|
138
|
+
self.config.EMBEDDING_DB_TYPE == EmbeddingDbType.CHROMA
|
|
139
|
+
and find_spec("chromadb") is not None
|
|
140
|
+
):
|
|
138
141
|
from .embedding_db.chromadb import ChromaEmbeddingDB
|
|
139
142
|
|
|
140
143
|
self.texts = ChromaEmbeddingDB(self.config)
|
|
@@ -78,6 +78,17 @@ class ApiType(str, Enum):
|
|
|
78
78
|
def is_local(api_type: str) -> bool:
|
|
79
79
|
return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
|
|
80
80
|
|
|
81
|
+
def __str__(self):
|
|
82
|
+
return self.value
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class EmbeddingDbType(str, Enum):
|
|
86
|
+
CHROMA = "chroma"
|
|
87
|
+
NONE = ""
|
|
88
|
+
|
|
89
|
+
def __str__(self):
|
|
90
|
+
return self.value
|
|
91
|
+
|
|
81
92
|
|
|
82
93
|
_default_dotenv_loaded = False
|
|
83
94
|
|
|
@@ -373,6 +384,8 @@ class Config(LLMConfig):
|
|
|
373
384
|
|
|
374
385
|
EMBEDDING_DB_PORT: str = from_env(default=None)
|
|
375
386
|
|
|
387
|
+
EMBEDDING_DB_TYPE: str = from_env(EmbeddingDbType.CHROMA)
|
|
388
|
+
|
|
376
389
|
DEFAULT_ENCODING: str = from_env("utf-8")
|
|
377
390
|
"""Used in file system operations, utf-8 by default"""
|
|
378
391
|
|
|
@@ -15,11 +15,15 @@ from .utils import file_link, list_files
|
|
|
15
15
|
|
|
16
16
|
_missing = object()
|
|
17
17
|
|
|
18
|
+
|
|
18
19
|
@dataclass
|
|
19
20
|
class Storage:
|
|
20
21
|
|
|
21
22
|
custom_path: str = field(default="")
|
|
22
23
|
|
|
24
|
+
def __call__(self, custom_path: str):
|
|
25
|
+
return Storage(custom_path)
|
|
26
|
+
|
|
23
27
|
@property
|
|
24
28
|
def path(self) -> Path:
|
|
25
29
|
return Path(str(self.custom_path) or config().STORAGE_PATH)
|
|
@@ -9,6 +9,7 @@ from ..wrappers.llm_response_wrapper import LLMResponse
|
|
|
9
9
|
from ..utils import is_chat_model
|
|
10
10
|
from .shared import prepare_callbacks
|
|
11
11
|
|
|
12
|
+
|
|
12
13
|
def _get_chunk_text(chunk, mode_chat_model: bool):
|
|
13
14
|
# Azure API gives first chunk with empty choices
|
|
14
15
|
choice = chunk.choices[0] if len(chunk.choices) else {}
|
|
@@ -9,6 +9,7 @@ from ..types import LLMAsyncFunctionType, LLMFunctionType
|
|
|
9
9
|
from ..wrappers.llm_response_wrapper import LLMResponse
|
|
10
10
|
from .shared import prepare_callbacks
|
|
11
11
|
|
|
12
|
+
|
|
12
13
|
def _get_chunk_text(chunk):
|
|
13
14
|
return isinstance(chunk, ContentBlockDeltaEvent) and chunk.delta.text or ""
|
|
14
15
|
|
|
@@ -36,8 +37,15 @@ def _process_streamed_response(response, callbacks: list[callable]):
|
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
def _prepare_llm_arguments(config: Config, kwargs: dict):
|
|
39
|
-
args = {
|
|
40
|
+
args = {**config.LLM_DEFAULT_ARGS, **kwargs}
|
|
40
41
|
args["model"] = args.get("model", config.MODEL)
|
|
42
|
+
if "max_tokens" not in args:
|
|
43
|
+
if "claude-3-5-sonnet" in args["model"]:
|
|
44
|
+
args["max_tokens"] = 8192
|
|
45
|
+
elif "claude-3-7-sonnet" in args["model"]:
|
|
46
|
+
args["max_tokens"] = 16384
|
|
47
|
+
else:
|
|
48
|
+
args["max_tokens"] = 4096
|
|
41
49
|
args.pop("seed", None) # Not supported by Anthropic
|
|
42
50
|
callbacks = prepare_callbacks(config, args)
|
|
43
51
|
return args, {"callbacks": callbacks}
|
|
@@ -16,7 +16,7 @@ def inference(prompt: str, model, tokenizer, **kwargs):
|
|
|
16
16
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
|
17
17
|
outputs = model.generate(**inputs, **kwargs)
|
|
18
18
|
outputs = [
|
|
19
|
-
tokenizer.decode(i[len(inputs[0])
|
|
19
|
+
tokenizer.decode(i[len(inputs[0]):], skip_special_tokens=skip_special_tokens)
|
|
20
20
|
for i in outputs
|
|
21
21
|
]
|
|
22
22
|
return LLMResponse(outputs[0], dict(all=outputs))
|
|
@@ -13,6 +13,7 @@ def make_remove_hidden_output(config: Config) -> callable:
|
|
|
13
13
|
|
|
14
14
|
return remove_hidden_output
|
|
15
15
|
|
|
16
|
+
|
|
16
17
|
def prepare_callbacks(config: Config, args, set_stream: bool = True) -> list[callable]:
|
|
17
18
|
callbacks = args.pop("callbacks", []) or [] + config.CALLBACKS or []
|
|
18
19
|
if "callback" in args:
|
|
@@ -369,3 +369,93 @@ def resolve_callable(
|
|
|
369
369
|
except (ImportError, AttributeError, AssertionError, ValueError) as e:
|
|
370
370
|
raise ValueError(f"Can't resolve callable by name '{fn}', {e}") from e
|
|
371
371
|
return fn
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def levenshtein(a: str, b: str) -> int:
|
|
375
|
+
"""Compute the Levenshtein edit distance between two strings.
|
|
376
|
+
|
|
377
|
+
The **Levenshtein distance** is the minimum number of single‑character
|
|
378
|
+
edits (insertions, deletions, or substitutions) required to transform one
|
|
379
|
+
string into the other.
|
|
380
|
+
|
|
381
|
+
This implementation uses the classic Wagner–Fischer dynamic‑programming
|
|
382
|
+
algorithm and stores only a single row of the DP matrix at any time,
|
|
383
|
+
reducing memory usage to be linear in the length of the shorter string.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
a (str): First input string.
|
|
387
|
+
b (str): Second input string.
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
int: Non‑negative integer representing the edit distance. A value of
|
|
391
|
+
``0`` means the strings are identical.
|
|
392
|
+
|
|
393
|
+
Complexity:
|
|
394
|
+
* **Time** ``O(ab)``
|
|
395
|
+
* **Space** ``O(min(a, b))``
|
|
396
|
+
|
|
397
|
+
Examples:
|
|
398
|
+
>>> levenshtein("kitten", "sitting")
|
|
399
|
+
3
|
|
400
|
+
>>> levenshtein("graph", "giraffe")
|
|
401
|
+
4
|
|
402
|
+
"""
|
|
403
|
+
if a == b:
|
|
404
|
+
return 0
|
|
405
|
+
# Ensure a is the shorter string to reduce memory
|
|
406
|
+
if len(a) > len(b):
|
|
407
|
+
a, b = b, a
|
|
408
|
+
previous = list(range(len(a) + 1))
|
|
409
|
+
for i, ch_b in enumerate(b, start=1):
|
|
410
|
+
current = [i]
|
|
411
|
+
for j, ch_a in enumerate(a, start=1):
|
|
412
|
+
cost = 0 if ch_a == ch_b else 1
|
|
413
|
+
current.append(
|
|
414
|
+
min(
|
|
415
|
+
current[-1] + 1, # insertion
|
|
416
|
+
previous[j] + 1, # deletion
|
|
417
|
+
previous[j - 1] + cost # substitution
|
|
418
|
+
)
|
|
419
|
+
)
|
|
420
|
+
previous = current
|
|
421
|
+
return previous[-1]
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def most_similar(
|
|
425
|
+
needle: str,
|
|
426
|
+
haystack: list[str],
|
|
427
|
+
distance_func: callable = levenshtein,
|
|
428
|
+
case_sensitive: bool = False,
|
|
429
|
+
) -> tuple[str, int]:
|
|
430
|
+
"""
|
|
431
|
+
Find the most similar string from a list of strings using the
|
|
432
|
+
specified distance function.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
needle (str): The word to compare against.
|
|
436
|
+
haystack (list[str]): A list of words to compare with.
|
|
437
|
+
distance_func (callable): The distance function to use for comparison.
|
|
438
|
+
Defaults to levenshtein.
|
|
439
|
+
case_sensitive (bool): If True, the comparison is case-sensitive.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
tuple[str, int]: A tuple containing the most similar word and its distance
|
|
443
|
+
from the given word.
|
|
444
|
+
|
|
445
|
+
Raises:
|
|
446
|
+
ValueError: If haystack is empty.
|
|
447
|
+
"""
|
|
448
|
+
if not haystack:
|
|
449
|
+
raise ValueError("Haystack cannot be empty")
|
|
450
|
+
|
|
451
|
+
min_dist = float('inf')
|
|
452
|
+
most_similar_word = None
|
|
453
|
+
a = needle if case_sensitive else needle.lower()
|
|
454
|
+
for word in haystack:
|
|
455
|
+
b = word if case_sensitive else word.lower()
|
|
456
|
+
dist = distance_func(a, b)
|
|
457
|
+
if dist < min_dist:
|
|
458
|
+
min_dist = dist
|
|
459
|
+
most_similar_word = word
|
|
460
|
+
|
|
461
|
+
return most_similar_word, min_dist
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/wrappers/llm_response_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|