PyPI - ai-microcore - Versions diffs - 4.0.0.dev1__tar.gz → 4.0.0.dev3__tar.gz - Mend

ai-microcore 4.0.0.dev1tar.gz → 4.0.0.dev3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-microcore
-Version: 4.0.0.dev1
+Version: 4.0.0.dev3
 Summary: # Minimalistic Foundation for AI Applications
 Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
 Author-email: Vitalii Stepanenko <mail@vitalii.in>
@@ -132,6 +132,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
     <br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
 3.  OS environment variables have the lowest priority.
+### Vector Databases
+Vector database functions are available via `microcore.texts`.
+Default vector database is [Chroma](https://www.trychroma.com/).
+In order to use vector database functions, you need to install the `chromadb` package:
+```bash
+pip install chromadb
+```
+By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
+Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
+```python
+from microcore import configure
+configure(
+    EMBEDDING_DB_HOST = 'localhost',
+    EMBEDDING_DB_PORT = 8000,
+)
+```
 ## 🌟 Core Functions

{ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/README.md RENAMED Viewed

@@ -107,6 +107,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
     <br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
 3.  OS environment variables have the lowest priority.
+### Vector Databases
+Vector database functions are available via `microcore.texts`.
+Default vector database is [Chroma](https://www.trychroma.com/).
+In order to use vector database functions, you need to install the `chromadb` package:
+```bash
+pip install chromadb
+```
+By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
+Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
+```python
+from microcore import configure
+configure(
+    EMBEDDING_DB_HOST = 'localhost',
+    EMBEDDING_DB_PORT = 8000,
+)
+```
 ## 🌟 Core Functions

{ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/__init__.py RENAMED Viewed

@@ -161,4 +161,4 @@ __all__ = [
     # "wrappers",
 ]
-__version__ = "4.0.0-dev1"
+__version__ = "4.0.0-dev3"

{ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/embedding_db/chromadb.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import logging
 from dataclasses import dataclass
 import uuid
@@ -17,6 +18,11 @@ class ChromaEmbeddingDB(AbstractEmbeddingDB):
     def __post_init__(self):
         if self.config.EMBEDDING_DB_HOST:
+            logging.info(
+                "Connecting to ChromaDB at %s:%s",
+                self.config.EMBEDDING_DB_HOST,
+                self.config.EMBEDDING_DB_PORT
+            )
             self.client = chromadb.HttpClient(
                 host=self.config.EMBEDDING_DB_HOST,
                 port=self.config.EMBEDDING_DB_PORT or 8000,

{ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/file_storage.py RENAMED Viewed

@@ -20,6 +20,9 @@ class Storage:
     custom_path: str = field(default="")
+    def __call__(self, custom_path: str):
+        return Storage(custom_path)
     @property
     def path(self) -> Path:
         return Path(str(self.custom_path) or config().STORAGE_PATH)

{ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/logging.py RENAMED Viewed

@@ -41,6 +41,20 @@ def _format_request_log_str(prompt, **kwargs) -> str:
         )
         if out.endswith("\n"):
             out = out[:-1]
+    if LoggingConfig.STRIP_REQUEST_LINES:
+        start_lines, end_lines = LoggingConfig.STRIP_REQUEST_LINES
+        max_lines = start_lines + end_lines
+        lines = out.split("\n")
+        if len(lines) > max_lines:
+            out = "\n".join(
+                lines[:start_lines]
+                + [
+                    f"{LoggingConfig.INDENT}{Fore.YELLOW}"
+                    f"...(output was truncated)..."
+                    f"{LoggingConfig.PROMPT_COLOR}"
+                ]
+                + (lines[-end_lines:] if end_lines else [])
+            )
     return out
@@ -72,6 +86,7 @@ class LoggingConfig:
     OUTPUT_METHOD: callable = print
     REQUEST_FORMATTER: callable = _format_request_log_str
     RESPONSE_FORMATTER: callable = _format_response_log_str
+    STRIP_REQUEST_LINES: tuple[int, int] | None = [40, 15]
 def _log_request(prompt, **kwargs):

{ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/utils.py RENAMED Viewed

@@ -369,3 +369,93 @@ def resolve_callable(
     except (ImportError, AttributeError, AssertionError, ValueError) as e:
         raise ValueError(f"Can't resolve callable by name '{fn}', {e}") from e
     return fn
+def levenshtein(a: str, b: str) -> int:
+    """Compute the Levenshtein edit distance between two strings.
+    The **Levenshtein distance** is the minimum number of single‑character
+    edits (insertions, deletions, or substitutions) required to transform one
+    string into the other.
+    This implementation uses the classic Wagner–Fischer dynamic‑programming
+    algorithm and stores only a single row of the DP matrix at any time,
+    reducing memory usage to be linear in the length of the shorter string.
+    Args:
+        a (str): First input string.
+        b (str): Second input string.
+    Returns:
+        int: Non‑negative integer representing the edit distance. A value of
+        ``0`` means the strings are identical.
+    Complexity:
+        * **Time** ``O(ab)``
+        * **Space** ``O(min(a, b))``
+    Examples:
+        >>> levenshtein("kitten", "sitting")
+        3
+        >>> levenshtein("graph", "giraffe")
+        4
+    """
+    if a == b:
+        return 0
+    # Ensure a is the shorter string to reduce memory
+    if len(a) > len(b):
+        a, b = b, a
+    previous = list(range(len(a) + 1))
+    for i, ch_b in enumerate(b, start=1):
+        current = [i]
+        for j, ch_a in enumerate(a, start=1):
+            cost = 0 if ch_a == ch_b else 1
+            current.append(
+                min(
+                    current[-1] + 1,       # insertion
+                    previous[j] + 1,       # deletion
+                    previous[j - 1] + cost # substitution
+                )
+            )
+        previous = current
+    return previous[-1]
+def most_similar(
+    needle: str,
+    haystack: list[str],
+    distance_func: callable = levenshtein,
+    case_sensitive: bool = False,
+) -> tuple[str, int]:
+    """
+    Find the most similar string from a list of strings using the
+    specified distance function.
+    Args:
+        needle (str): The word to compare against.
+        haystack (list[str]): A list of words to compare with.
+        distance_func (callable): The distance function to use for comparison.
+            Defaults to levenshtein.
+        case_sensitive (bool): If True, the comparison is case-sensitive.
+    Returns:
+        tuple[str, int]: A tuple containing the most similar word and its distance
+            from the given word.
+    Raises:
+        ValueError: If haystack is empty.
+    """
+    if not haystack:
+        raise ValueError("Haystack cannot be empty")
+    min_dist = float('inf')
+    most_similar_word = None
+    a = needle if case_sensitive else needle.lower()
+    for word in haystack:
+        b = word if case_sensitive else word.lower()
+        dist = distance_func(a, b)
+        if dist < min_dist:
+            min_dist = dist
+            most_similar_word = word
+    return most_similar_word, min_dist