PyPI - ai-microcore - Versions diffs - 4.0.0.dev3__tar.gz → 4.0.0.dev5__tar.gz - Mend

ai-microcore 4.0.0.dev3tar.gz → 4.0.0.dev5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-microcore
-Version: 4.0.0.dev3
+Version: 4.0.0.dev5
 Summary: # Minimalistic Foundation for AI Applications
 Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
 Author-email: Vitalii Stepanenko <mail@vitalii.in>

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/__init__.py RENAMED Viewed

@@ -161,4 +161,4 @@ __all__ = [
     # "wrappers",
 ]
-__version__ = "4.0.0-dev3"
+__version__ = "4.0.0-dev5"

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/_env.py RENAMED Viewed

@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING
 import jinja2
 from .embedding_db import AbstractEmbeddingDB
-from .configuration import Config, ApiType, LLMConfigError
+from .configuration import Config, ApiType, LLMConfigError, EmbeddingDbType
 from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
 from .templating.jinja2 import make_jinja2_env, make_tpl_function
 from .llm.openai_llm import make_llm_functions as make_openai_llm_functions
@@ -14,6 +14,7 @@ from .llm.local_llm import make_llm_functions as make_local_llm_functions
 if TYPE_CHECKING:
     from .wrappers.llm_response_wrapper import LLMResponse  # noqa: F401
+    from transformers import PreTrainedModel, PreTrainedTokenizer  # noqa: F401
 @dataclass
@@ -26,10 +27,10 @@ class Env:
     llm_before_handlers: list[callable] = field(default_factory=list)
     llm_after_handlers: list[callable] = field(default_factory=list)
     texts: AbstractEmbeddingDB = None
-    model: "transformers.PreTrainedModel" = field(
+    model: "PreTrainedModel" = field(
         default=None, init=False, repr=False
     )  # noqa
-    tokenizer: "transformers.PreTrainedTokenizer" = field(  # noqa
+    tokenizer: "PreTrainedTokenizer" = field(  # noqa
         default=None, init=False, repr=False
     )
@@ -134,7 +135,10 @@ class Env:
             )
     def init_similarity_search(self):
-        if find_spec("chromadb") is not None:
+        if (
+            self.config.EMBEDDING_DB_TYPE == EmbeddingDbType.CHROMA
+            and find_spec("chromadb") is not None
+        ):
             from .embedding_db.chromadb import ChromaEmbeddingDB
             self.texts = ChromaEmbeddingDB(self.config)

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/_llm_functions.py RENAMED Viewed

@@ -1,17 +1,27 @@
+import logging
 from datetime import datetime
 from .utils import run_parallel
-from .wrappers.llm_response_wrapper import LLMResponse
+from .wrappers.llm_response_wrapper import LLMResponse, DictFromLLMResponse
 from .types import TPrompt
 from ._env import env
-def llm(prompt: TPrompt, **kwargs) -> str | LLMResponse:
+def llm(
+    prompt: TPrompt,
+    retries: int = 0,
+    parse_json: bool | dict = False,
+    **kwargs
+) -> str | LLMResponse:
     """
     Request Large Language Model synchronously
     Args:
         prompt (str | Msg | dict | list[str | Msg | dict]): Text to send to LLM
+        retries (int): Number of retries in case of error
+        parse_json (bool|dict):
+            If True, parses response as JSON,
+            alternatively non-empty dict can be used as parse_json arguments
         **kwargs (dict): Parameters supported by the LLM API
             See parameters supported by the OpenAI:
@@ -40,7 +50,18 @@ def llm(prompt: TPrompt, **kwargs) -> str | LLMResponse:
     """
     [h(prompt, **kwargs) for h in env().llm_before_handlers]
     start = datetime.now()
-    response = env().llm_function(prompt, **kwargs)
+    tries = retries + 1
+    while tries > 0:
+        try:
+            tries -= 1
+            response = env().llm_function(prompt, **kwargs)
+            break
+        except Exception as e:
+            if tries == 0:
+                raise e
+            logging.error(f"LLM error: {e}")
+            logging.info(f"Retrying... {tries} retries left")
+            continue
     try:
         response.gen_duration = (datetime.now() - start).total_seconds()
         if not env().config.SAVE_MEMORY:
@@ -48,15 +69,35 @@ def llm(prompt: TPrompt, **kwargs) -> str | LLMResponse:
     except AttributeError:
         ...
     [h(response) for h in env().llm_after_handlers]
+    if tries > 0:
+        retry_params = dict(**kwargs)
+        retry_params["retries"] = tries - 1
+        setattr(
+            response,
+            "_retry_callback",
+            lambda: llm(prompt, **retry_params)
+        )
+    if parse_json:
+        parsing_params = parse_json if isinstance(parse_json, dict) else {}
+        return response.parse_json(**parsing_params)
     return response
-async def allm(prompt: TPrompt, **kwargs) -> str | LLMResponse:
+async def allm(
+    prompt: TPrompt,
+    retries: int = 0,
+    parse_json: bool | dict = False,
+    **kwargs
+) -> str | LLMResponse | DictFromLLMResponse:
     """
     Request Large Language Model asynchronously
     Args:
         prompt (str | Msg | dict | list[str | Msg | dict]): Text to send to LLM
+        retries (int): Number of retries in case of error
+        parse_json (bool|dict):
+            If True, parses response as JSON,
+            alternatively non-empty dict can be used as parse_json arguments
         **kwargs (dict): Parameters supported by the LLM API
             See parameters supported by the OpenAI:
@@ -87,7 +128,18 @@ async def allm(prompt: TPrompt, **kwargs) -> str | LLMResponse:
     """
     [h(prompt, **kwargs) for h in env().llm_before_handlers]
     start = datetime.now()
-    response = await env().llm_async_function(prompt, **kwargs)
+    tries = retries + 1
+    while tries > 0:
+        try:
+            tries -= 1
+            response = await env().llm_async_function(prompt, **kwargs)
+            break
+        except Exception as e:
+            if tries == 0:
+                raise e
+            logging.error(f"LLM error: {e}")
+            logging.info(f"Retrying... {tries} retries left")
+            continue
     try:
         response.gen_duration = (datetime.now() - start).total_seconds()
         if not env().config.SAVE_MEMORY:
@@ -95,6 +147,15 @@ async def allm(prompt: TPrompt, **kwargs) -> str | LLMResponse:
     except AttributeError:
         ...
     [h(response) for h in env().llm_after_handlers]
+    if parse_json:
+        try:
+            parsing_params = parse_json if isinstance(parse_json, dict) else {}
+            return response.parse_json(**parsing_params)
+        except Exception as e:
+            if tries > 0:
+                logging.error(f"LLM error: {e}")
+                logging.info(f"Retrying... {tries} retries left")
+                return await allm(prompt, retries=tries - 1, parse_json=parse_json, **kwargs)
     return response

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/ai_func/__init__.py RENAMED Viewed

@@ -20,6 +20,7 @@ class AiFuncSyntax(str, Enum):
     def __str__(self):
         return self.value
 def func_arg_comments(func):
     func_source = dedent(inspect.getsource(func))
     module = ast.parse(func_source)

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/configuration.py RENAMED Viewed

@@ -78,6 +78,17 @@ class ApiType(str, Enum):
     def is_local(api_type: str) -> bool:
         return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
+    def __str__(self):
+        return self.value
+class EmbeddingDbType(str, Enum):
+    CHROMA = "chroma"
+    NONE = ""
+    def __str__(self):
+        return self.value
 _default_dotenv_loaded = False
@@ -373,6 +384,8 @@ class Config(LLMConfig):
     EMBEDDING_DB_PORT: str = from_env(default=None)
+    EMBEDDING_DB_TYPE: str = from_env(EmbeddingDbType.CHROMA)
     DEFAULT_ENCODING: str = from_env("utf-8")
     """Used in file system operations, utf-8 by default"""

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/embedding_db/__init__.py RENAMED Viewed

@@ -9,6 +9,7 @@ from ..utils import ExtendedString
 INT32_MAX = 2**31 - 1  # 2147483647
 class SearchResults(list):
     def fit_to_token_size(
         self,

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/file_storage.py RENAMED Viewed

@@ -15,6 +15,7 @@ from .utils import file_link, list_files
 _missing = object()
 @dataclass
 class Storage:

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/json_parsing.py RENAMED Viewed

@@ -102,7 +102,7 @@ def unwrap_json_substring(
         ...
     return (
-        input_string[start : end + 1]
+        input_string[start: end + 1]
         if brace
         else input_string if return_original_on_fail else ""
     )

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/llm/_openai_llm_v0.py RENAMED Viewed

@@ -9,6 +9,7 @@ from ..wrappers.llm_response_wrapper import LLMResponse
 from ..utils import is_chat_model
 from .shared import prepare_callbacks
 def _get_chunk_text(chunk, mode_chat_model: bool):
     # Azure API gives first chunk with empty choices
     choice = chunk.choices[0] if len(chunk.choices) else {}

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/llm/anthropic.py RENAMED Viewed

@@ -9,6 +9,7 @@ from ..types import LLMAsyncFunctionType, LLMFunctionType
 from ..wrappers.llm_response_wrapper import LLMResponse
 from .shared import prepare_callbacks
 def _get_chunk_text(chunk):
     return isinstance(chunk, ContentBlockDeltaEvent) and chunk.delta.text or ""
@@ -36,8 +37,15 @@ def _process_streamed_response(response, callbacks: list[callable]):
 def _prepare_llm_arguments(config: Config, kwargs: dict):
-    args = {"max_tokens": 1024, **config.LLM_DEFAULT_ARGS, **kwargs}
+    args = {**config.LLM_DEFAULT_ARGS, **kwargs}
     args["model"] = args.get("model", config.MODEL)
+    if "max_tokens" not in args:
+        if "claude-3-5-sonnet" in args["model"]:
+            args["max_tokens"] = 8192
+        elif "claude-3-7-sonnet" in args["model"]:
+            args["max_tokens"] = 16384
+        else:
+            args["max_tokens"] = 4096
     args.pop("seed", None)  # Not supported by Anthropic
     callbacks = prepare_callbacks(config, args)
     return args, {"callbacks": callbacks}

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/llm/local_transformers.py RENAMED Viewed

@@ -16,7 +16,7 @@ def inference(prompt: str, model, tokenizer, **kwargs):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, **kwargs)
     outputs = [
-        tokenizer.decode(i[len(inputs[0]) :], skip_special_tokens=skip_special_tokens)
+        tokenizer.decode(i[len(inputs[0]):], skip_special_tokens=skip_special_tokens)
         for i in outputs
     ]
     return LLMResponse(outputs[0], dict(all=outputs))

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/llm/shared.py RENAMED Viewed

@@ -13,6 +13,7 @@ def make_remove_hidden_output(config: Config) -> callable:
     return remove_hidden_output
 def prepare_callbacks(config: Config, args, set_stream: bool = True) -> list[callable]:
     callbacks = args.pop("callbacks", []) or [] + config.CALLBACKS or []
     if "callback" in args:

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/text2speech/elevenlabs.py RENAMED Viewed

@@ -1,9 +1,28 @@
 import os
+from dataclasses import dataclass, asdict
 from datetime import datetime
 import aiohttp
 from .._env import env
+@dataclass
+class TTSArgs:
+    text: str
+    out_file: str = None
+    voice: str = "D38z5RcWu1voky8WS1ja"
+    stability: float = 0.29
+    similarity_boost: float = 0.5
+    style: float = 0.0
+    chunk_size: int = 1024
+    speed: float = 1.0
+    use_speaker_boost: bool = False
+    previous_text: str = None
+    next_text: str = None
+    def to_dict(self) -> dict:
+        return asdict(self)
 async def text_to_speech(
     text: str,
     out_file: str = None,
@@ -12,6 +31,10 @@ async def text_to_speech(
     similarity_boost=0.5,
     style=0.0,
     chunk_size=1024,
+    speed=1.0,
+    use_speaker_boost: bool = False,
+    previous_text: str = None,
+    next_text: str = None,
 ) -> str:
     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice}"
     if not out_file:
@@ -25,8 +48,15 @@ async def text_to_speech(
             "stability": stability,
             "similarity_boost": similarity_boost,
             "style": style,
+            "speed": speed,
         },
     }
+    if use_speaker_boost:
+        data["voice_settings"]["use_speaker_boost"] = use_speaker_boost
+    if previous_text:
+        data["previous_text"] = previous_text
+    if next_text:
+        data["next_text"] = next_text
     headers = {
         "Accept": "audio/mpeg",
         "Content-Type": "application/json",

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/tokenizing.py RENAMED Viewed

@@ -5,7 +5,8 @@ import requests.exceptions
 from ._env import env
-class CantLoadTikTokenEncoding(RuntimeError): ...
+class CantLoadTikTokenEncoding(RuntimeError):
+    ...
 def _resolve_tiktoken_encoding(

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/types.py RENAMED Viewed

@@ -5,6 +5,7 @@ from .message_types import Msg
 if TYPE_CHECKING:
     from .wrappers.prompt_wrapper import PromptWrapper  # noqa: F401
+    from .wrappers.llm_response_wrapper import LLMResponse  # noqa: F401
 TPrompt = Union[
     dict, Msg, str, "PromptWrapper", List[Union[dict, Msg, str, "PromptWrapper"]]
@@ -12,9 +13,9 @@ TPrompt = Union[
 """Type for prompt argument in LLM requests"""
 TplFunctionType = Callable[[Union[PathLike[str], str], Any], str]
 """Function type for rendering prompt templates"""
-LLMFunctionType = Callable[[TPrompt, Any], str]
+LLMFunctionType = Callable[[TPrompt, Any], "LLMResponse"]
 """Function type for requesting LLM synchronously"""
-LLMAsyncFunctionType = Callable[[TPrompt, Any], Awaitable[str]]
+LLMAsyncFunctionType = Callable[[TPrompt, Any], Awaitable["LLMResponse"]]
 """Function type for requesting LLM asynchronously"""

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/utils.py RENAMED Viewed

@@ -412,9 +412,9 @@ def levenshtein(a: str, b: str) -> int:
             cost = 0 if ch_a == ch_b else 1
             current.append(
                 min(
-                    current[-1] + 1,       # insertion
-                    previous[j] + 1,       # deletion
-                    previous[j - 1] + cost # substitution
+                    current[-1] + 1,        # insertion
+                    previous[j] + 1,        # deletion
+                    previous[j - 1] + cost  # substitution
                 )
             )
         previous = current
@@ -458,4 +458,4 @@ def most_similar(
             min_dist = dist
             most_similar_word = word
-    return most_similar_word, min_dist
+    return most_similar_word, min_dist

{ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev5}/microcore/wrappers/llm_response_wrapper.py RENAMED Viewed

@@ -46,9 +46,27 @@ class LLMResponse(ExtendedString, ConvertableToMessage):
         return obj
     def parse_json(
-        self, raise_errors: bool = True, required_fields: list[str] = None
+        self,
+        raise_errors: bool = True,
+        required_fields: list[str] = None,
+        validator: callable = None,
     ) -> list | dict | float | int | str | DictFromLLMResponse:
-        res = parse_json(self.content, raise_errors, required_fields)
+        try:
+            res = parse_json(self.content, True, required_fields)
+            if validator:
+                try:
+                    validator(res)
+                except Exception as e:
+                    raise BadAIAnswer(f"Language model response validation failed: {e}") from None
+        except Exception as e:
+            if hasattr(self, "_retry_callback"):
+                res = self._retry_callback()
+                if isinstance(res, DictFromLLMResponse):
+                    return res
+                return res.parse_json(raise_errors, required_fields, validator)
+            if raise_errors:
+                raise e
+            res = False
         if isinstance(res, dict):
             res = DictFromLLMResponse(res)
             res.llm_response = self