PyPI - ai-microcore - Versions diffs - 3.16.5__tar.gz → 4.0.0.dev2__tar.gz - Mend

ai-microcore 3.16.5tar.gz → 4.0.0.dev2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-microcore
-Version: 3.16.5
+Version: 4.0.0.dev2
 Summary: # Minimalistic Foundation for AI Applications
 Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
 Author-email: Vitalii Stepanenko <mail@vitalii.in>
@@ -132,6 +132,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
     <br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
 3.  OS environment variables have the lowest priority.
+### Vector Databases
+Vector database functions are available via `microcore.texts`.
+Default vector database is [Chroma](https://www.trychroma.com/).
+In order to use vector database functions, you need to install the `chromadb` package:
+```bash
+pip install chromadb
+```
+By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
+Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
+```python
+from microcore import configure
+configure(
+    EMBEDDING_DB_HOST = 'localhost',
+    EMBEDDING_DB_PORT = 8000,
+)
+```
 ## 🌟 Core Functions

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/README.md RENAMED Viewed

@@ -107,6 +107,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
     <br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
 3.  OS environment variables have the lowest priority.
+### Vector Databases
+Vector database functions are available via `microcore.texts`.
+Default vector database is [Chroma](https://www.trychroma.com/).
+In order to use vector database functions, you need to install the `chromadb` package:
+```bash
+pip install chromadb
+```
+By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
+Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
+```python
+from microcore import configure
+configure(
+    EMBEDDING_DB_HOST = 'localhost',
+    EMBEDDING_DB_PORT = 8000,
+)
+```
 ## 🌟 Core Functions

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/__init__.py RENAMED Viewed

@@ -161,4 +161,4 @@ __all__ = [
     # "wrappers",
 ]
-__version__ = "3.16.5"
+__version__ = "4.0.0-dev2"

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/_prepare_llm_args.py RENAMED Viewed

@@ -22,7 +22,7 @@ def prepare_chat_messages(prompt: TPrompt) -> list[dict]:
             dict(role=DEFAULT_MESSAGE_ROLE, content=msg)
             if isinstance(msg, str)
             else (
-                asdict(msg, dict_factory=msg.dict_factory)
+                asdict(msg, dict_factory=msg.DICT_FACTORY)
                 if isinstance(msg, Msg)
                 else msg
             )

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/ai_func/__init__.py RENAMED Viewed

@@ -5,13 +5,23 @@ descr: Allows to describe python functions for LLM
 import ast
 import inspect
+from enum import Enum
 from typing import Dict, Any
 import docstring_parser
 from .. import tpl
+from ..utils import dedent
+class AiFuncSyntax(str, Enum):
+    PYTHONIC: str = "pythonic"
+    JSON: str = "json"
+    DEFAULT: str = str(JSON)
+    def __str__(self):
+        return self.value
 def func_arg_comments(func):
-    func_source = inspect.getsource(func)
+    func_source = dedent(inspect.getsource(func))
     module = ast.parse(func_source)
     func_def = module.body[0]
@@ -60,7 +70,7 @@ def func_metadata(func) -> Dict[str, Any]:
     for name, val in metadata["args"].items():
         val["comment"] = arg_comments[name]
-        # Parse docstring
+    # Parse docstring
     parsed_docstring = docstring_parser.parse(inspect.getdoc(func))
     # Add descriptions from parsed docstring to parameters
@@ -71,6 +81,17 @@ def func_metadata(func) -> Dict[str, Any]:
     return metadata
-def describe_ai_func(func):
+def describe_ai_func(func: callable, syntax: AiFuncSyntax | str = None) -> str:
+    """
+    Renders function description for LLM
+    Args:
+        func: callable: function to describe
+        syntax: AiFuncSyntax | str: syntax to use for the description
+                - Use AiFuncSyntax enums to use standard templates (""json", "pythonic")
+                - Use custom template name to use custom template
+    Returns: str: rendered description, part of prompt
+    """
+    syntax = syntax or AiFuncSyntax.DEFAULT
+    tpl_file = f"ai-func.{syntax}.j2" if syntax in AiFuncSyntax else syntax
     metadata = func_metadata(func)
-    return tpl("python_ai_func.j2", **metadata)
+    return tpl(tpl_file, **metadata)

ai_microcore-4.0.0.dev2/microcore/ai_func/ai-func.json.j2 ADDED Viewed

@@ -0,0 +1,11 @@
+# {{ description or name.replace('_', ' ').capitalize() }}
+{
+  "call": "{{ name }}"{% if args %}{{ "," }}{% endif %}
+{%- for k,v in args.items() %}
+  "{{ k }}":
+    {%- if v.type %} <{{ v.type }}>{% endif -%}
+    {%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){%endif-%}
+    {%- if not loop.last -%},{%- endif -%}
+    {%- if v.comment %} {{ v.comment }}{% endif -%}
+{%- endfor -%}
+{{ "\n}" }}

ai_microcore-3.16.5/microcore/ai_func/python_ai_func.j2 → ai_microcore-4.0.0.dev2/microcore/ai_func/ai-func.pythonic.j2 RENAMED Viewed

@@ -1,4 +1,4 @@
-# {{ description }}
+# {{ description or name.replace('_', ' ').capitalize() }}
 {{ name }}(
 {%- for name,v in args.items() -%}
 {{ "\n\t" }}{{ name }}

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/configuration.py RENAMED Viewed

@@ -369,6 +369,10 @@ class Config(LLMConfig):
     EMBEDDING_DB_ALLOW_DUPLICATES: bool = from_env(dtype=bool, default=False)
+    EMBEDDING_DB_HOST: str = from_env(default=None)
+    EMBEDDING_DB_PORT: str = from_env(default=None)
     DEFAULT_ENCODING: str = from_env("utf-8")
     """Used in file system operations, utf-8 by default"""

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/embedding_db/__init__.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import logging
-import sys
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -8,6 +7,8 @@ import tiktoken
 from ..utils import ExtendedString
+INT32_MAX = 2**31 - 1  # 2147483647
 class SearchResults(list):
     def fit_to_token_size(
         self,
@@ -115,7 +116,7 @@ class AbstractEmbeddingDB(ABC):
         **kwargs,
     ) -> SearchResults | list[str | SearchResult]:
         return self.search(
-            collection, query, n_results=sys.maxsize - 1, where=where, **kwargs
+            collection, query, n_results=INT32_MAX, where=where, **kwargs
         )
     @abstractmethod

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/embedding_db/chromadb.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import logging
 from dataclasses import dataclass
 import uuid
@@ -16,10 +17,22 @@ class ChromaEmbeddingDB(AbstractEmbeddingDB):
     client: chromadb.Client = None
     def __post_init__(self):
-        self.client = chromadb.PersistentClient(
-            path=f"{self.config.STORAGE_PATH}/{self.config.EMBEDDING_DB_FOLDER}",
-            settings=Settings(anonymized_telemetry=False),
-        )
+        if self.config.EMBEDDING_DB_HOST:
+            logging.info(
+                "Connecting to ChromaDB at %s:%s",
+                self.config.EMBEDDING_DB_HOST,
+                self.config.EMBEDDING_DB_PORT
+            )
+            self.client = chromadb.HttpClient(
+                host=self.config.EMBEDDING_DB_HOST,
+                port=self.config.EMBEDDING_DB_PORT or 8000,
+                settings=Settings(anonymized_telemetry=False),
+            )
+        else:
+            self.client = chromadb.PersistentClient(
+                path=f"{self.config.STORAGE_PATH}/{self.config.EMBEDDING_DB_FOLDER}",
+                settings=Settings(anonymized_telemetry=False),
+            )
         self.embedding_function = (
             self.config.EMBEDDING_DB_FUNCTION
             or embedding_functions.DefaultEmbeddingFunction()

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/shared.py RENAMED Viewed

@@ -21,4 +21,5 @@ def prepare_callbacks(config: Config, args, set_stream: bool = True) -> list[cal
             callbacks.append(cb)
     if set_stream and "stream" not in args:
         args["stream"] = bool(callbacks)
-    return callbacks
+    return callbacks

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/logging.py RENAMED Viewed

@@ -41,6 +41,20 @@ def _format_request_log_str(prompt, **kwargs) -> str:
         )
         if out.endswith("\n"):
             out = out[:-1]
+    if LoggingConfig.STRIP_REQUEST_LINES:
+        start_lines, end_lines = LoggingConfig.STRIP_REQUEST_LINES
+        max_lines = start_lines + end_lines
+        lines = out.split("\n")
+        if len(lines) > max_lines:
+            out = "\n".join(
+                lines[:start_lines]
+                + [
+                    f"{LoggingConfig.INDENT}{Fore.YELLOW}"
+                    f"...(output was truncated)..."
+                    f"{LoggingConfig.PROMPT_COLOR}"
+                ]
+                + (lines[-end_lines:] if end_lines else [])
+            )
     return out
@@ -72,6 +86,7 @@ class LoggingConfig:
     OUTPUT_METHOD: callable = print
     REQUEST_FORMATTER: callable = _format_request_log_str
     RESPONSE_FORMATTER: callable = _format_response_log_str
+    STRIP_REQUEST_LINES: tuple[int, int] | None = [40, 15]
 def _log_request(prompt, **kwargs):

{ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/message_types.py RENAMED Viewed

@@ -2,6 +2,7 @@
 from enum import Enum
 from dataclasses import dataclass, field
+from typing import ClassVar
 class Role(str, Enum):
@@ -9,16 +10,20 @@ class Role(str, Enum):
     USER = "user"
     ASSISTANT = "assistant"
+    def __str__(self):
+        return self.value
 DEFAULT_MESSAGE_ROLE = Role.USER
 @dataclass
 class Msg:
-    dict_factory = dict
     role: str = field(default=DEFAULT_MESSAGE_ROLE)
     content: str = field(default="")
+    DICT_FACTORY: ClassVar = dict
     def __str__(self):
         return str(self.content)
@@ -49,10 +54,11 @@ class PartialMsg(AssistantMsg):
         is_partial = True
         """Custom dictionary class to handle additional properties"""
-    dict_factory = _PartialMsgDict
     placeholder = "<|placeholder|>"
     variants_splitter = "<|or|>"
+    DICT_FACTORY: ClassVar = _PartialMsgDict
     @staticmethod
     def split_prefix_and_suffixes(content: str):
         parts = content.split(PartialMsg.placeholder)