ai-microcore 4.0.0.dev2__tar.gz → 4.0.0.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/PKG-INFO +1 -1
  2. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/__init__.py +1 -1
  3. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/_env.py +5 -2
  4. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ai_func/__init__.py +1 -0
  5. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/configuration.py +13 -0
  6. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/embedding_db/__init__.py +1 -0
  7. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/file_storage.py +4 -0
  8. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/json_parsing.py +1 -1
  9. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/_openai_llm_v0.py +1 -0
  10. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/anthropic.py +9 -1
  11. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/local_transformers.py +1 -1
  12. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/shared.py +1 -0
  13. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/tokenizing.py +2 -1
  14. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/utils.py +90 -0
  15. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/LICENSE +0 -0
  16. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/README.md +0 -0
  17. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/_llm_functions.py +0 -0
  18. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/_prepare_llm_args.py +0 -0
  19. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ai_func/ai-func.json.j2 +0 -0
  20. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ai_func/ai-func.pythonic.j2 +0 -0
  21. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ai_modules.py +0 -0
  22. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/embedding_db/chromadb.py +0 -0
  23. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/__init__.py +0 -0
  24. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/_openai_llm_v1.py +0 -0
  25. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/google_genai.py +0 -0
  26. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/google_vertex_ai.py +0 -0
  27. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/local_llm.py +0 -0
  28. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/llm/openai_llm.py +0 -0
  29. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/logging.py +0 -0
  30. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/message_types.py +0 -0
  31. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/metrics.py +0 -0
  32. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/python.py +0 -0
  33. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/templating/__init__.py +0 -0
  34. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/templating/jinja2.py +0 -0
  35. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/text2speech/elevenlabs.py +0 -0
  36. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/types.py +0 -0
  37. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/ui.py +0 -0
  38. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/wrappers/__init__.py +0 -0
  39. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/wrappers/llm_response_wrapper.py +0 -0
  40. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/microcore/wrappers/prompt_wrapper.py +0 -0
  41. {ai_microcore-4.0.0.dev2 → ai_microcore-4.0.0.dev4}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-microcore
3
- Version: 4.0.0.dev2
3
+ Version: 4.0.0.dev4
4
4
  Summary: # Minimalistic Foundation for AI Applications
5
5
  Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
6
6
  Author-email: Vitalii Stepanenko <mail@vitalii.in>
@@ -161,4 +161,4 @@ __all__ = [
161
161
  # "wrappers",
162
162
  ]
163
163
 
164
- __version__ = "4.0.0-dev2"
164
+ __version__ = "4.0.0-dev4"
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING
6
6
  import jinja2
7
7
 
8
8
  from .embedding_db import AbstractEmbeddingDB
9
- from .configuration import Config, ApiType, LLMConfigError
9
+ from .configuration import Config, ApiType, LLMConfigError, EmbeddingDbType
10
10
  from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
11
11
  from .templating.jinja2 import make_jinja2_env, make_tpl_function
12
12
  from .llm.openai_llm import make_llm_functions as make_openai_llm_functions
@@ -134,7 +134,10 @@ class Env:
134
134
  )
135
135
 
136
136
  def init_similarity_search(self):
137
- if find_spec("chromadb") is not None:
137
+ if (
138
+ self.config.EMBEDDING_DB_TYPE == EmbeddingDbType.CHROMA
139
+ and find_spec("chromadb") is not None
140
+ ):
138
141
  from .embedding_db.chromadb import ChromaEmbeddingDB
139
142
 
140
143
  self.texts = ChromaEmbeddingDB(self.config)
@@ -20,6 +20,7 @@ class AiFuncSyntax(str, Enum):
20
20
  def __str__(self):
21
21
  return self.value
22
22
 
23
+
23
24
  def func_arg_comments(func):
24
25
  func_source = dedent(inspect.getsource(func))
25
26
  module = ast.parse(func_source)
@@ -78,6 +78,17 @@ class ApiType(str, Enum):
78
78
  def is_local(api_type: str) -> bool:
79
79
  return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
80
80
 
81
+ def __str__(self):
82
+ return self.value
83
+
84
+
85
+ class EmbeddingDbType(str, Enum):
86
+ CHROMA = "chroma"
87
+ NONE = ""
88
+
89
+ def __str__(self):
90
+ return self.value
91
+
81
92
 
82
93
  _default_dotenv_loaded = False
83
94
 
@@ -373,6 +384,8 @@ class Config(LLMConfig):
373
384
 
374
385
  EMBEDDING_DB_PORT: str = from_env(default=None)
375
386
 
387
+ EMBEDDING_DB_TYPE: str = from_env(EmbeddingDbType.CHROMA)
388
+
376
389
  DEFAULT_ENCODING: str = from_env("utf-8")
377
390
  """Used in file system operations, utf-8 by default"""
378
391
 
@@ -9,6 +9,7 @@ from ..utils import ExtendedString
9
9
 
10
10
  INT32_MAX = 2**31 - 1 # 2147483647
11
11
 
12
+
12
13
  class SearchResults(list):
13
14
  def fit_to_token_size(
14
15
  self,
@@ -15,11 +15,15 @@ from .utils import file_link, list_files
15
15
 
16
16
  _missing = object()
17
17
 
18
+
18
19
  @dataclass
19
20
  class Storage:
20
21
 
21
22
  custom_path: str = field(default="")
22
23
 
24
+ def __call__(self, custom_path: str):
25
+ return Storage(custom_path)
26
+
23
27
  @property
24
28
  def path(self) -> Path:
25
29
  return Path(str(self.custom_path) or config().STORAGE_PATH)
@@ -102,7 +102,7 @@ def unwrap_json_substring(
102
102
  ...
103
103
 
104
104
  return (
105
- input_string[start : end + 1]
105
+ input_string[start: end + 1]
106
106
  if brace
107
107
  else input_string if return_original_on_fail else ""
108
108
  )
@@ -9,6 +9,7 @@ from ..wrappers.llm_response_wrapper import LLMResponse
9
9
  from ..utils import is_chat_model
10
10
  from .shared import prepare_callbacks
11
11
 
12
+
12
13
  def _get_chunk_text(chunk, mode_chat_model: bool):
13
14
  # Azure API gives first chunk with empty choices
14
15
  choice = chunk.choices[0] if len(chunk.choices) else {}
@@ -9,6 +9,7 @@ from ..types import LLMAsyncFunctionType, LLMFunctionType
9
9
  from ..wrappers.llm_response_wrapper import LLMResponse
10
10
  from .shared import prepare_callbacks
11
11
 
12
+
12
13
  def _get_chunk_text(chunk):
13
14
  return isinstance(chunk, ContentBlockDeltaEvent) and chunk.delta.text or ""
14
15
 
@@ -36,8 +37,15 @@ def _process_streamed_response(response, callbacks: list[callable]):
36
37
 
37
38
 
38
39
  def _prepare_llm_arguments(config: Config, kwargs: dict):
39
- args = {"max_tokens": 1024, **config.LLM_DEFAULT_ARGS, **kwargs}
40
+ args = {**config.LLM_DEFAULT_ARGS, **kwargs}
40
41
  args["model"] = args.get("model", config.MODEL)
42
+ if "max_tokens" not in args:
43
+ if "claude-3-5-sonnet" in args["model"]:
44
+ args["max_tokens"] = 8192
45
+ elif "claude-3-7-sonnet" in args["model"]:
46
+ args["max_tokens"] = 16384
47
+ else:
48
+ args["max_tokens"] = 4096
41
49
  args.pop("seed", None) # Not supported by Anthropic
42
50
  callbacks = prepare_callbacks(config, args)
43
51
  return args, {"callbacks": callbacks}
@@ -16,7 +16,7 @@ def inference(prompt: str, model, tokenizer, **kwargs):
16
16
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
17
17
  outputs = model.generate(**inputs, **kwargs)
18
18
  outputs = [
19
- tokenizer.decode(i[len(inputs[0]) :], skip_special_tokens=skip_special_tokens)
19
+ tokenizer.decode(i[len(inputs[0]):], skip_special_tokens=skip_special_tokens)
20
20
  for i in outputs
21
21
  ]
22
22
  return LLMResponse(outputs[0], dict(all=outputs))
@@ -13,6 +13,7 @@ def make_remove_hidden_output(config: Config) -> callable:
13
13
 
14
14
  return remove_hidden_output
15
15
 
16
+
16
17
  def prepare_callbacks(config: Config, args, set_stream: bool = True) -> list[callable]:
17
18
  callbacks = args.pop("callbacks", []) or [] + config.CALLBACKS or []
18
19
  if "callback" in args:
@@ -5,7 +5,8 @@ import requests.exceptions
5
5
  from ._env import env
6
6
 
7
7
 
8
- class CantLoadTikTokenEncoding(RuntimeError): ...
8
+ class CantLoadTikTokenEncoding(RuntimeError):
9
+ ...
9
10
 
10
11
 
11
12
  def _resolve_tiktoken_encoding(
@@ -369,3 +369,93 @@ def resolve_callable(
369
369
  except (ImportError, AttributeError, AssertionError, ValueError) as e:
370
370
  raise ValueError(f"Can't resolve callable by name '{fn}', {e}") from e
371
371
  return fn
372
+
373
+
374
+ def levenshtein(a: str, b: str) -> int:
375
+ """Compute the Levenshtein edit distance between two strings.
376
+
377
+ The **Levenshtein distance** is the minimum number of single‑character
378
+ edits (insertions, deletions, or substitutions) required to transform one
379
+ string into the other.
380
+
381
+ This implementation uses the classic Wagner–Fischer dynamic‑programming
382
+ algorithm and stores only a single row of the DP matrix at any time,
383
+ reducing memory usage to be linear in the length of the shorter string.
384
+
385
+ Args:
386
+ a (str): First input string.
387
+ b (str): Second input string.
388
+
389
+ Returns:
390
+ int: Non‑negative integer representing the edit distance. A value of
391
+ ``0`` means the strings are identical.
392
+
393
+ Complexity:
394
+ * **Time** ``O(ab)``
395
+ * **Space** ``O(min(a, b))``
396
+
397
+ Examples:
398
+ >>> levenshtein("kitten", "sitting")
399
+ 3
400
+ >>> levenshtein("graph", "giraffe")
401
+ 4
402
+ """
403
+ if a == b:
404
+ return 0
405
+ # Ensure a is the shorter string to reduce memory
406
+ if len(a) > len(b):
407
+ a, b = b, a
408
+ previous = list(range(len(a) + 1))
409
+ for i, ch_b in enumerate(b, start=1):
410
+ current = [i]
411
+ for j, ch_a in enumerate(a, start=1):
412
+ cost = 0 if ch_a == ch_b else 1
413
+ current.append(
414
+ min(
415
+ current[-1] + 1, # insertion
416
+ previous[j] + 1, # deletion
417
+ previous[j - 1] + cost # substitution
418
+ )
419
+ )
420
+ previous = current
421
+ return previous[-1]
422
+
423
+
424
+ def most_similar(
425
+ needle: str,
426
+ haystack: list[str],
427
+ distance_func: callable = levenshtein,
428
+ case_sensitive: bool = False,
429
+ ) -> tuple[str, int]:
430
+ """
431
+ Find the most similar string from a list of strings using the
432
+ specified distance function.
433
+
434
+ Args:
435
+ needle (str): The word to compare against.
436
+ haystack (list[str]): A list of words to compare with.
437
+ distance_func (callable): The distance function to use for comparison.
438
+ Defaults to levenshtein.
439
+ case_sensitive (bool): If True, the comparison is case-sensitive.
440
+
441
+ Returns:
442
+ tuple[str, int]: A tuple containing the most similar word and its distance
443
+ from the given word.
444
+
445
+ Raises:
446
+ ValueError: If haystack is empty.
447
+ """
448
+ if not haystack:
449
+ raise ValueError("Haystack cannot be empty")
450
+
451
+ min_dist = float('inf')
452
+ most_similar_word = None
453
+ a = needle if case_sensitive else needle.lower()
454
+ for word in haystack:
455
+ b = word if case_sensitive else word.lower()
456
+ dist = distance_func(a, b)
457
+ if dist < min_dist:
458
+ min_dist = dist
459
+ most_similar_word = word
460
+
461
+ return most_similar_word, min_dist