PyPI - ai-microcore - Versions diffs - 4.0.0.dev3__tar.gz → 4.0.0.dev4__tar.gz - Mend

ai-microcore 4.0.0.dev3tar.gz → 4.0.0.dev4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-microcore
-Version: 4.0.0.dev3
+Version: 4.0.0.dev4
 Summary: # Minimalistic Foundation for AI Applications
 Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
 Author-email: Vitalii Stepanenko <mail@vitalii.in>

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/__init__.py RENAMED Viewed

@@ -161,4 +161,4 @@ __all__ = [
     # "wrappers",
 ]
-__version__ = "4.0.0-dev3"
+__version__ = "4.0.0-dev4"

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/_env.py RENAMED Viewed

@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING
 import jinja2
 from .embedding_db import AbstractEmbeddingDB
-from .configuration import Config, ApiType, LLMConfigError
+from .configuration import Config, ApiType, LLMConfigError, EmbeddingDbType
 from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
 from .templating.jinja2 import make_jinja2_env, make_tpl_function
 from .llm.openai_llm import make_llm_functions as make_openai_llm_functions
@@ -134,7 +134,10 @@ class Env:
             )
     def init_similarity_search(self):
-        if find_spec("chromadb") is not None:
+        if (
+            self.config.EMBEDDING_DB_TYPE == EmbeddingDbType.CHROMA
+            and find_spec("chromadb") is not None
+        ):
             from .embedding_db.chromadb import ChromaEmbeddingDB
             self.texts = ChromaEmbeddingDB(self.config)

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/ai_func/__init__.py RENAMED Viewed

@@ -20,6 +20,7 @@ class AiFuncSyntax(str, Enum):
     def __str__(self):
         return self.value
 def func_arg_comments(func):
     func_source = dedent(inspect.getsource(func))
     module = ast.parse(func_source)

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/configuration.py RENAMED Viewed

@@ -78,6 +78,17 @@ class ApiType(str, Enum):
     def is_local(api_type: str) -> bool:
         return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
+    def __str__(self):
+        return self.value
+class EmbeddingDbType(str, Enum):
+    CHROMA = "chroma"
+    NONE = ""
+    def __str__(self):
+        return self.value
 _default_dotenv_loaded = False
@@ -373,6 +384,8 @@ class Config(LLMConfig):
     EMBEDDING_DB_PORT: str = from_env(default=None)
+    EMBEDDING_DB_TYPE: str = from_env(EmbeddingDbType.CHROMA)
     DEFAULT_ENCODING: str = from_env("utf-8")
     """Used in file system operations, utf-8 by default"""

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/embedding_db/__init__.py RENAMED Viewed

@@ -9,6 +9,7 @@ from ..utils import ExtendedString
 INT32_MAX = 2**31 - 1  # 2147483647
 class SearchResults(list):
     def fit_to_token_size(
         self,

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/file_storage.py RENAMED Viewed

@@ -15,6 +15,7 @@ from .utils import file_link, list_files
 _missing = object()
 @dataclass
 class Storage:

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/json_parsing.py RENAMED Viewed

@@ -102,7 +102,7 @@ def unwrap_json_substring(
         ...
     return (
-        input_string[start : end + 1]
+        input_string[start: end + 1]
         if brace
         else input_string if return_original_on_fail else ""
     )

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/_openai_llm_v0.py RENAMED Viewed

@@ -9,6 +9,7 @@ from ..wrappers.llm_response_wrapper import LLMResponse
 from ..utils import is_chat_model
 from .shared import prepare_callbacks
 def _get_chunk_text(chunk, mode_chat_model: bool):
     # Azure API gives first chunk with empty choices
     choice = chunk.choices[0] if len(chunk.choices) else {}

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/anthropic.py RENAMED Viewed

@@ -9,6 +9,7 @@ from ..types import LLMAsyncFunctionType, LLMFunctionType
 from ..wrappers.llm_response_wrapper import LLMResponse
 from .shared import prepare_callbacks
 def _get_chunk_text(chunk):
     return isinstance(chunk, ContentBlockDeltaEvent) and chunk.delta.text or ""
@@ -36,8 +37,15 @@ def _process_streamed_response(response, callbacks: list[callable]):
 def _prepare_llm_arguments(config: Config, kwargs: dict):
-    args = {"max_tokens": 1024, **config.LLM_DEFAULT_ARGS, **kwargs}
+    args = {**config.LLM_DEFAULT_ARGS, **kwargs}
     args["model"] = args.get("model", config.MODEL)
+    if "max_tokens" not in args:
+        if "claude-3-5-sonnet" in args["model"]:
+            args["max_tokens"] = 8192
+        elif "claude-3-7-sonnet" in args["model"]:
+            args["max_tokens"] = 16384
+        else:
+            args["max_tokens"] = 4096
     args.pop("seed", None)  # Not supported by Anthropic
     callbacks = prepare_callbacks(config, args)
     return args, {"callbacks": callbacks}

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/local_transformers.py RENAMED Viewed

@@ -16,7 +16,7 @@ def inference(prompt: str, model, tokenizer, **kwargs):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, **kwargs)
     outputs = [
-        tokenizer.decode(i[len(inputs[0]) :], skip_special_tokens=skip_special_tokens)
+        tokenizer.decode(i[len(inputs[0]):], skip_special_tokens=skip_special_tokens)
         for i in outputs
     ]
     return LLMResponse(outputs[0], dict(all=outputs))

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/shared.py RENAMED Viewed

@@ -13,6 +13,7 @@ def make_remove_hidden_output(config: Config) -> callable:
     return remove_hidden_output
 def prepare_callbacks(config: Config, args, set_stream: bool = True) -> list[callable]:
     callbacks = args.pop("callbacks", []) or [] + config.CALLBACKS or []
     if "callback" in args:

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/tokenizing.py RENAMED Viewed

@@ -5,7 +5,8 @@ import requests.exceptions
 from ._env import env
-class CantLoadTikTokenEncoding(RuntimeError): ...
+class CantLoadTikTokenEncoding(RuntimeError):
+    ...
 def _resolve_tiktoken_encoding(

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/utils.py RENAMED Viewed

@@ -412,9 +412,9 @@ def levenshtein(a: str, b: str) -> int:
             cost = 0 if ch_a == ch_b else 1
             current.append(
                 min(
-                    current[-1] + 1,       # insertion
-                    previous[j] + 1,       # deletion
-                    previous[j - 1] + cost # substitution
+                    current[-1] + 1,        # insertion
+                    previous[j] + 1,        # deletion
+                    previous[j - 1] + cost  # substitution
                 )
             )
         previous = current
@@ -458,4 +458,4 @@ def most_similar(
             min_dist = dist
             most_similar_word = word
-    return most_similar_word, min_dist
+    return most_similar_word, min_dist