ai-microcore 4.0.0.dev1__tar.gz → 4.0.0.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/PKG-INFO +19 -1
  2. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/README.md +18 -0
  3. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/__init__.py +1 -1
  4. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/embedding_db/chromadb.py +6 -0
  5. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/file_storage.py +3 -0
  6. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/logging.py +15 -0
  7. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/utils.py +90 -0
  8. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/LICENSE +0 -0
  9. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/_env.py +0 -0
  10. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/_llm_functions.py +0 -0
  11. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/_prepare_llm_args.py +0 -0
  12. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/ai_func/__init__.py +0 -0
  13. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/ai_func/ai-func.json.j2 +0 -0
  14. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/ai_func/ai-func.pythonic.j2 +0 -0
  15. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/ai_modules.py +0 -0
  16. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/configuration.py +0 -0
  17. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/embedding_db/__init__.py +0 -0
  18. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/json_parsing.py +0 -0
  19. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/__init__.py +0 -0
  20. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/_openai_llm_v0.py +0 -0
  21. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/_openai_llm_v1.py +0 -0
  22. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/anthropic.py +0 -0
  23. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/google_genai.py +0 -0
  24. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/google_vertex_ai.py +0 -0
  25. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/local_llm.py +0 -0
  26. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/local_transformers.py +0 -0
  27. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/openai_llm.py +0 -0
  28. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/llm/shared.py +0 -0
  29. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/message_types.py +0 -0
  30. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/metrics.py +0 -0
  31. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/python.py +0 -0
  32. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/templating/__init__.py +0 -0
  33. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/templating/jinja2.py +0 -0
  34. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/text2speech/elevenlabs.py +0 -0
  35. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/tokenizing.py +0 -0
  36. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/types.py +0 -0
  37. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/ui.py +0 -0
  38. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/wrappers/__init__.py +0 -0
  39. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/wrappers/llm_response_wrapper.py +0 -0
  40. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/microcore/wrappers/prompt_wrapper.py +0 -0
  41. {ai_microcore-4.0.0.dev1 → ai_microcore-4.0.0.dev3}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-microcore
3
- Version: 4.0.0.dev1
3
+ Version: 4.0.0.dev3
4
4
  Summary: # Minimalistic Foundation for AI Applications
5
5
  Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
6
6
  Author-email: Vitalii Stepanenko <mail@vitalii.in>
@@ -132,6 +132,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
132
132
  <br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
133
133
  3. OS environment variables have the lowest priority.
134
134
 
135
+ ### Vector Databases
136
+
137
+ Vector database functions are available via `microcore.texts`.
138
+ Default vector database is [Chroma](https://www.trychroma.com/).
139
+ In order to use vector database functions, you need to install the `chromadb` package:
140
+ ```bash
141
+ pip install chromadb
142
+ ```
143
+ By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
144
+ Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
145
+
146
+ ```python
147
+ from microcore import configure
148
+ configure(
149
+ EMBEDDING_DB_HOST = 'localhost',
150
+ EMBEDDING_DB_PORT = 8000,
151
+ )
152
+ ```
135
153
 
136
154
  ## 🌟 Core Functions
137
155
 
@@ -107,6 +107,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
107
107
  <br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
108
108
  3. OS environment variables have the lowest priority.
109
109
 
110
+ ### Vector Databases
111
+
112
+ Vector database functions are available via `microcore.texts`.
113
+ Default vector database is [Chroma](https://www.trychroma.com/).
114
+ In order to use vector database functions, you need to install the `chromadb` package:
115
+ ```bash
116
+ pip install chromadb
117
+ ```
118
+ By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
119
+ Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
120
+
121
+ ```python
122
+ from microcore import configure
123
+ configure(
124
+ EMBEDDING_DB_HOST = 'localhost',
125
+ EMBEDDING_DB_PORT = 8000,
126
+ )
127
+ ```
110
128
 
111
129
  ## 🌟 Core Functions
112
130
 
@@ -161,4 +161,4 @@ __all__ = [
161
161
  # "wrappers",
162
162
  ]
163
163
 
164
- __version__ = "4.0.0-dev1"
164
+ __version__ = "4.0.0-dev3"
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from dataclasses import dataclass
2
3
  import uuid
3
4
 
@@ -17,6 +18,11 @@ class ChromaEmbeddingDB(AbstractEmbeddingDB):
17
18
 
18
19
  def __post_init__(self):
19
20
  if self.config.EMBEDDING_DB_HOST:
21
+ logging.info(
22
+ "Connecting to ChromaDB at %s:%s",
23
+ self.config.EMBEDDING_DB_HOST,
24
+ self.config.EMBEDDING_DB_PORT
25
+ )
20
26
  self.client = chromadb.HttpClient(
21
27
  host=self.config.EMBEDDING_DB_HOST,
22
28
  port=self.config.EMBEDDING_DB_PORT or 8000,
@@ -20,6 +20,9 @@ class Storage:
20
20
 
21
21
  custom_path: str = field(default="")
22
22
 
23
+ def __call__(self, custom_path: str):
24
+ return Storage(custom_path)
25
+
23
26
  @property
24
27
  def path(self) -> Path:
25
28
  return Path(str(self.custom_path) or config().STORAGE_PATH)
@@ -41,6 +41,20 @@ def _format_request_log_str(prompt, **kwargs) -> str:
41
41
  )
42
42
  if out.endswith("\n"):
43
43
  out = out[:-1]
44
+ if LoggingConfig.STRIP_REQUEST_LINES:
45
+ start_lines, end_lines = LoggingConfig.STRIP_REQUEST_LINES
46
+ max_lines = start_lines + end_lines
47
+ lines = out.split("\n")
48
+ if len(lines) > max_lines:
49
+ out = "\n".join(
50
+ lines[:start_lines]
51
+ + [
52
+ f"{LoggingConfig.INDENT}{Fore.YELLOW}"
53
+ f"...(output was truncated)..."
54
+ f"{LoggingConfig.PROMPT_COLOR}"
55
+ ]
56
+ + (lines[-end_lines:] if end_lines else [])
57
+ )
44
58
  return out
45
59
 
46
60
 
@@ -72,6 +86,7 @@ class LoggingConfig:
72
86
  OUTPUT_METHOD: callable = print
73
87
  REQUEST_FORMATTER: callable = _format_request_log_str
74
88
  RESPONSE_FORMATTER: callable = _format_response_log_str
89
+ STRIP_REQUEST_LINES: tuple[int, int] | None = [40, 15]
75
90
 
76
91
 
77
92
  def _log_request(prompt, **kwargs):
@@ -369,3 +369,93 @@ def resolve_callable(
369
369
  except (ImportError, AttributeError, AssertionError, ValueError) as e:
370
370
  raise ValueError(f"Can't resolve callable by name '{fn}', {e}") from e
371
371
  return fn
372
+
373
+
374
+ def levenshtein(a: str, b: str) -> int:
375
+ """Compute the Levenshtein edit distance between two strings.
376
+
377
+ The **Levenshtein distance** is the minimum number of single‑character
378
+ edits (insertions, deletions, or substitutions) required to transform one
379
+ string into the other.
380
+
381
+ This implementation uses the classic Wagner–Fischer dynamic‑programming
382
+ algorithm and stores only a single row of the DP matrix at any time,
383
+ reducing memory usage to be linear in the length of the shorter string.
384
+
385
+ Args:
386
+ a (str): First input string.
387
+ b (str): Second input string.
388
+
389
+ Returns:
390
+ int: Non‑negative integer representing the edit distance. A value of
391
+ ``0`` means the strings are identical.
392
+
393
+ Complexity:
394
+ * **Time** ``O(ab)``
395
+ * **Space** ``O(min(a, b))``
396
+
397
+ Examples:
398
+ >>> levenshtein("kitten", "sitting")
399
+ 3
400
+ >>> levenshtein("graph", "giraffe")
401
+ 4
402
+ """
403
+ if a == b:
404
+ return 0
405
+ # Ensure a is the shorter string to reduce memory
406
+ if len(a) > len(b):
407
+ a, b = b, a
408
+ previous = list(range(len(a) + 1))
409
+ for i, ch_b in enumerate(b, start=1):
410
+ current = [i]
411
+ for j, ch_a in enumerate(a, start=1):
412
+ cost = 0 if ch_a == ch_b else 1
413
+ current.append(
414
+ min(
415
+ current[-1] + 1, # insertion
416
+ previous[j] + 1, # deletion
417
+ previous[j - 1] + cost # substitution
418
+ )
419
+ )
420
+ previous = current
421
+ return previous[-1]
422
+
423
+
424
+ def most_similar(
425
+ needle: str,
426
+ haystack: list[str],
427
+ distance_func: callable = levenshtein,
428
+ case_sensitive: bool = False,
429
+ ) -> tuple[str, int]:
430
+ """
431
+ Find the most similar string from a list of strings using the
432
+ specified distance function.
433
+
434
+ Args:
435
+ needle (str): The word to compare against.
436
+ haystack (list[str]): A list of words to compare with.
437
+ distance_func (callable): The distance function to use for comparison.
438
+ Defaults to levenshtein.
439
+ case_sensitive (bool): If True, the comparison is case-sensitive.
440
+
441
+ Returns:
442
+ tuple[str, int]: A tuple containing the most similar word and its distance
443
+ from the given word.
444
+
445
+ Raises:
446
+ ValueError: If haystack is empty.
447
+ """
448
+ if not haystack:
449
+ raise ValueError("Haystack cannot be empty")
450
+
451
+ min_dist = float('inf')
452
+ most_similar_word = None
453
+ a = needle if case_sensitive else needle.lower()
454
+ for word in haystack:
455
+ b = word if case_sensitive else word.lower()
456
+ dist = distance_func(a, b)
457
+ if dist < min_dist:
458
+ min_dist = dist
459
+ most_similar_word = word
460
+
461
+ return most_similar_word, min_dist