PyPI - ai-microcore - Versions diffs - 4.0.0.dev2__tar.gz → 4.0.0.dev4__tar.gz - Mend

ai-microcore 4.0.0.dev2tar.gz → 4.0.0.dev4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-microcore
-Version: 4.0.0.dev2
+Version: 4.0.0.dev4
 Summary: # Minimalistic Foundation for AI Applications
 Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
 Author-email: Vitalii Stepanenko <mail@vitalii.in>

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/__init__.py RENAMED Viewed

@@ -161,4 +161,4 @@ __all__ = [
     # "wrappers",
 ]
-__version__ = "4.0.0-dev2"
+__version__ = "4.0.0-dev4"

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/_env.py RENAMED Viewed

@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING
 import jinja2
 from .embedding_db import AbstractEmbeddingDB
-from .configuration import Config, ApiType, LLMConfigError
+from .configuration import Config, ApiType, LLMConfigError, EmbeddingDbType
 from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
 from .templating.jinja2 import make_jinja2_env, make_tpl_function
 from .llm.openai_llm import make_llm_functions as make_openai_llm_functions
@@ -134,7 +134,10 @@ class Env:
             )
     def init_similarity_search(self):
-        if find_spec("chromadb") is not None:
+        if (
+            self.config.EMBEDDING_DB_TYPE == EmbeddingDbType.CHROMA
+            and find_spec("chromadb") is not None
+        ):
             from .embedding_db.chromadb import ChromaEmbeddingDB
             self.texts = ChromaEmbeddingDB(self.config)

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ai_func/__init__.py RENAMED Viewed

@@ -20,6 +20,7 @@ class AiFuncSyntax(str, Enum):
     def __str__(self):
         return self.value
 def func_arg_comments(func):
     func_source = dedent(inspect.getsource(func))
     module = ast.parse(func_source)

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/configuration.py RENAMED Viewed

@@ -78,6 +78,17 @@ class ApiType(str, Enum):
     def is_local(api_type: str) -> bool:
         return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
+    def __str__(self):
+        return self.value
+class EmbeddingDbType(str, Enum):
+    CHROMA = "chroma"
+    NONE = ""
+    def __str__(self):
+        return self.value
 _default_dotenv_loaded = False
@@ -373,6 +384,8 @@ class Config(LLMConfig):
     EMBEDDING_DB_PORT: str = from_env(default=None)
+    EMBEDDING_DB_TYPE: str = from_env(EmbeddingDbType.CHROMA)
     DEFAULT_ENCODING: str = from_env("utf-8")
     """Used in file system operations, utf-8 by default"""

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/embedding_db/__init__.py RENAMED Viewed

@@ -9,6 +9,7 @@ from ..utils import ExtendedString
 INT32_MAX = 2**31 - 1  # 2147483647
 class SearchResults(list):
     def fit_to_token_size(
         self,

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/file_storage.py RENAMED Viewed

@@ -15,11 +15,15 @@ from .utils import file_link, list_files
 _missing = object()
 @dataclass
 class Storage:
     custom_path: str = field(default="")
+    def __call__(self, custom_path: str):
+        return Storage(custom_path)
     @property
     def path(self) -> Path:
         return Path(str(self.custom_path) or config().STORAGE_PATH)

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/json_parsing.py RENAMED Viewed

@@ -102,7 +102,7 @@ def unwrap_json_substring(
         ...
     return (
-        input_string[start : end + 1]
+        input_string[start: end + 1]
         if brace
         else input_string if return_original_on_fail else ""
     )

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/_openai_llm_v0.py RENAMED Viewed

@@ -9,6 +9,7 @@ from ..wrappers.llm_response_wrapper import LLMResponse
 from ..utils import is_chat_model
 from .shared import prepare_callbacks
 def _get_chunk_text(chunk, mode_chat_model: bool):
     # Azure API gives first chunk with empty choices
     choice = chunk.choices[0] if len(chunk.choices) else {}

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/anthropic.py RENAMED Viewed

@@ -9,6 +9,7 @@ from ..types import LLMAsyncFunctionType, LLMFunctionType
 from ..wrappers.llm_response_wrapper import LLMResponse
 from .shared import prepare_callbacks
 def _get_chunk_text(chunk):
     return isinstance(chunk, ContentBlockDeltaEvent) and chunk.delta.text or ""
@@ -36,8 +37,15 @@ def _process_streamed_response(response, callbacks: list[callable]):
 def _prepare_llm_arguments(config: Config, kwargs: dict):
-    args = {"max_tokens": 1024, **config.LLM_DEFAULT_ARGS, **kwargs}
+    args = {**config.LLM_DEFAULT_ARGS, **kwargs}
     args["model"] = args.get("model", config.MODEL)
+    if "max_tokens" not in args:
+        if "claude-3-5-sonnet" in args["model"]:
+            args["max_tokens"] = 8192
+        elif "claude-3-7-sonnet" in args["model"]:
+            args["max_tokens"] = 16384
+        else:
+            args["max_tokens"] = 4096
     args.pop("seed", None)  # Not supported by Anthropic
     callbacks = prepare_callbacks(config, args)
     return args, {"callbacks": callbacks}

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/local_transformers.py RENAMED Viewed

@@ -16,7 +16,7 @@ def inference(prompt: str, model, tokenizer, **kwargs):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, **kwargs)
     outputs = [
-        tokenizer.decode(i[len(inputs[0]) :], skip_special_tokens=skip_special_tokens)
+        tokenizer.decode(i[len(inputs[0]):], skip_special_tokens=skip_special_tokens)
         for i in outputs
     ]
     return LLMResponse(outputs[0], dict(all=outputs))

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/shared.py RENAMED Viewed

@@ -13,6 +13,7 @@ def make_remove_hidden_output(config: Config) -> callable:
     return remove_hidden_output
 def prepare_callbacks(config: Config, args, set_stream: bool = True) -> list[callable]:
     callbacks = args.pop("callbacks", []) or [] + config.CALLBACKS or []
     if "callback" in args:

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/tokenizing.py RENAMED Viewed

@@ -5,7 +5,8 @@ import requests.exceptions
 from ._env import env
-class CantLoadTikTokenEncoding(RuntimeError): ...
+class CantLoadTikTokenEncoding(RuntimeError):
+    ...
 def _resolve_tiktoken_encoding(

{ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/utils.py RENAMED Viewed

@@ -369,3 +369,93 @@ def resolve_callable(
     except (ImportError, AttributeError, AssertionError, ValueError) as e:
         raise ValueError(f"Can't resolve callable by name '{fn}', {e}") from e
     return fn
+def levenshtein(a: str, b: str) -> int:
+    """Compute the Levenshtein edit distance between two strings.
+    The **Levenshtein distance** is the minimum number of single‑character
+    edits (insertions, deletions, or substitutions) required to transform one
+    string into the other.
+    This implementation uses the classic Wagner–Fischer dynamic‑programming
+    algorithm and stores only a single row of the DP matrix at any time,
+    reducing memory usage to be linear in the length of the shorter string.
+    Args:
+        a (str): First input string.
+        b (str): Second input string.
+    Returns:
+        int: Non‑negative integer representing the edit distance. A value of
+        ``0`` means the strings are identical.
+    Complexity:
+        * **Time** ``O(ab)``
+        * **Space** ``O(min(a, b))``
+    Examples:
+        >>> levenshtein("kitten", "sitting")
+        3
+        >>> levenshtein("graph", "giraffe")
+        4
+    """
+    if a == b:
+        return 0
+    # Ensure a is the shorter string to reduce memory
+    if len(a) > len(b):
+        a, b = b, a
+    previous = list(range(len(a) + 1))
+    for i, ch_b in enumerate(b, start=1):
+        current = [i]
+        for j, ch_a in enumerate(a, start=1):
+            cost = 0 if ch_a == ch_b else 1
+            current.append(
+                min(
+                    current[-1] + 1,        # insertion
+                    previous[j] + 1,        # deletion
+                    previous[j - 1] + cost  # substitution
+                )
+            )
+        previous = current
+    return previous[-1]
+def most_similar(
+    needle: str,
+    haystack: list[str],
+    distance_func: callable = levenshtein,
+    case_sensitive: bool = False,
+) -> tuple[str, int]:
+    """
+    Find the most similar string from a list of strings using the
+    specified distance function.
+    Args:
+        needle (str): The word to compare against.
+        haystack (list[str]): A list of words to compare with.
+        distance_func (callable): The distance function to use for comparison.
+            Defaults to levenshtein.
+        case_sensitive (bool): If True, the comparison is case-sensitive.
+    Returns:
+        tuple[str, int]: A tuple containing the most similar word and its distance
+            from the given word.
+    Raises:
+        ValueError: If haystack is empty.
+    """
+    if not haystack:
+        raise ValueError("Haystack cannot be empty")
+    min_dist = float('inf')
+    most_similar_word = None
+    a = needle if case_sensitive else needle.lower()
+    for word in haystack:
+        b = word if case_sensitive else word.lower()
+        dist = distance_func(a, b)
+        if dist < min_dist:
+            min_dist = dist
+            most_similar_word = word
+    return most_similar_word, min_dist