ai-microcore 4.0.0.dev3__tar.gz → 4.0.0.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/PKG-INFO +1 -1
  2. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/__init__.py +1 -1
  3. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/_env.py +5 -2
  4. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/ai_func/__init__.py +1 -0
  5. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/configuration.py +13 -0
  6. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/embedding_db/__init__.py +1 -0
  7. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/file_storage.py +1 -0
  8. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/json_parsing.py +1 -1
  9. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/_openai_llm_v0.py +1 -0
  10. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/anthropic.py +9 -1
  11. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/local_transformers.py +1 -1
  12. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/shared.py +1 -0
  13. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/tokenizing.py +2 -1
  14. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/utils.py +4 -4
  15. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/LICENSE +0 -0
  16. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/README.md +0 -0
  17. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/_llm_functions.py +0 -0
  18. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/_prepare_llm_args.py +0 -0
  19. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/ai_func/ai-func.json.j2 +0 -0
  20. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/ai_func/ai-func.pythonic.j2 +0 -0
  21. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/ai_modules.py +0 -0
  22. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/embedding_db/chromadb.py +0 -0
  23. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/__init__.py +0 -0
  24. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/_openai_llm_v1.py +0 -0
  25. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/google_genai.py +0 -0
  26. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/google_vertex_ai.py +0 -0
  27. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/local_llm.py +0 -0
  28. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/llm/openai_llm.py +0 -0
  29. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/logging.py +0 -0
  30. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/message_types.py +0 -0
  31. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/metrics.py +0 -0
  32. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/python.py +0 -0
  33. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/templating/__init__.py +0 -0
  34. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/templating/jinja2.py +0 -0
  35. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/text2speech/elevenlabs.py +0 -0
  36. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/types.py +0 -0
  37. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/ui.py +0 -0
  38. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/wrappers/__init__.py +0 -0
  39. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/wrappers/llm_response_wrapper.py +0 -0
  40. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/microcore/wrappers/prompt_wrapper.py +0 -0
  41. {ai_microcore-4.0.0.dev3 → ai_microcore-4.0.0.dev4}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-microcore
3
- Version: 4.0.0.dev3
3
+ Version: 4.0.0.dev4
4
4
  Summary: # Minimalistic Foundation for AI Applications
5
5
  Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
6
6
  Author-email: Vitalii Stepanenko <mail@vitalii.in>
@@ -161,4 +161,4 @@ __all__ = [
161
161
  # "wrappers",
162
162
  ]
163
163
 
164
- __version__ = "4.0.0-dev3"
164
+ __version__ = "4.0.0-dev4"
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING
6
6
  import jinja2
7
7
 
8
8
  from .embedding_db import AbstractEmbeddingDB
9
- from .configuration import Config, ApiType, LLMConfigError
9
+ from .configuration import Config, ApiType, LLMConfigError, EmbeddingDbType
10
10
  from .types import TplFunctionType, LLMAsyncFunctionType, LLMFunctionType
11
11
  from .templating.jinja2 import make_jinja2_env, make_tpl_function
12
12
  from .llm.openai_llm import make_llm_functions as make_openai_llm_functions
@@ -134,7 +134,10 @@ class Env:
134
134
  )
135
135
 
136
136
  def init_similarity_search(self):
137
- if find_spec("chromadb") is not None:
137
+ if (
138
+ self.config.EMBEDDING_DB_TYPE == EmbeddingDbType.CHROMA
139
+ and find_spec("chromadb") is not None
140
+ ):
138
141
  from .embedding_db.chromadb import ChromaEmbeddingDB
139
142
 
140
143
  self.texts = ChromaEmbeddingDB(self.config)
@@ -20,6 +20,7 @@ class AiFuncSyntax(str, Enum):
20
20
  def __str__(self):
21
21
  return self.value
22
22
 
23
+
23
24
  def func_arg_comments(func):
24
25
  func_source = dedent(inspect.getsource(func))
25
26
  module = ast.parse(func_source)
@@ -78,6 +78,17 @@ class ApiType(str, Enum):
78
78
  def is_local(api_type: str) -> bool:
79
79
  return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
80
80
 
81
+ def __str__(self):
82
+ return self.value
83
+
84
+
85
+ class EmbeddingDbType(str, Enum):
86
+ CHROMA = "chroma"
87
+ NONE = ""
88
+
89
+ def __str__(self):
90
+ return self.value
91
+
81
92
 
82
93
  _default_dotenv_loaded = False
83
94
 
@@ -373,6 +384,8 @@ class Config(LLMConfig):
373
384
 
374
385
  EMBEDDING_DB_PORT: str = from_env(default=None)
375
386
 
387
+ EMBEDDING_DB_TYPE: str = from_env(EmbeddingDbType.CHROMA)
388
+
376
389
  DEFAULT_ENCODING: str = from_env("utf-8")
377
390
  """Used in file system operations, utf-8 by default"""
378
391
 
@@ -9,6 +9,7 @@ from ..utils import ExtendedString
9
9
 
10
10
  INT32_MAX = 2**31 - 1 # 2147483647
11
11
 
12
+
12
13
  class SearchResults(list):
13
14
  def fit_to_token_size(
14
15
  self,
@@ -15,6 +15,7 @@ from .utils import file_link, list_files
15
15
 
16
16
  _missing = object()
17
17
 
18
+
18
19
  @dataclass
19
20
  class Storage:
20
21
 
@@ -102,7 +102,7 @@ def unwrap_json_substring(
102
102
  ...
103
103
 
104
104
  return (
105
- input_string[start : end + 1]
105
+ input_string[start: end + 1]
106
106
  if brace
107
107
  else input_string if return_original_on_fail else ""
108
108
  )
@@ -9,6 +9,7 @@ from ..wrappers.llm_response_wrapper import LLMResponse
9
9
  from ..utils import is_chat_model
10
10
  from .shared import prepare_callbacks
11
11
 
12
+
12
13
  def _get_chunk_text(chunk, mode_chat_model: bool):
13
14
  # Azure API gives first chunk with empty choices
14
15
  choice = chunk.choices[0] if len(chunk.choices) else {}
@@ -9,6 +9,7 @@ from ..types import LLMAsyncFunctionType, LLMFunctionType
9
9
  from ..wrappers.llm_response_wrapper import LLMResponse
10
10
  from .shared import prepare_callbacks
11
11
 
12
+
12
13
  def _get_chunk_text(chunk):
13
14
  return isinstance(chunk, ContentBlockDeltaEvent) and chunk.delta.text or ""
14
15
 
@@ -36,8 +37,15 @@ def _process_streamed_response(response, callbacks: list[callable]):
36
37
 
37
38
 
38
39
  def _prepare_llm_arguments(config: Config, kwargs: dict):
39
- args = {"max_tokens": 1024, **config.LLM_DEFAULT_ARGS, **kwargs}
40
+ args = {**config.LLM_DEFAULT_ARGS, **kwargs}
40
41
  args["model"] = args.get("model", config.MODEL)
42
+ if "max_tokens" not in args:
43
+ if "claude-3-5-sonnet" in args["model"]:
44
+ args["max_tokens"] = 8192
45
+ elif "claude-3-7-sonnet" in args["model"]:
46
+ args["max_tokens"] = 16384
47
+ else:
48
+ args["max_tokens"] = 4096
41
49
  args.pop("seed", None) # Not supported by Anthropic
42
50
  callbacks = prepare_callbacks(config, args)
43
51
  return args, {"callbacks": callbacks}
@@ -16,7 +16,7 @@ def inference(prompt: str, model, tokenizer, **kwargs):
16
16
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
17
17
  outputs = model.generate(**inputs, **kwargs)
18
18
  outputs = [
19
- tokenizer.decode(i[len(inputs[0]) :], skip_special_tokens=skip_special_tokens)
19
+ tokenizer.decode(i[len(inputs[0]):], skip_special_tokens=skip_special_tokens)
20
20
  for i in outputs
21
21
  ]
22
22
  return LLMResponse(outputs[0], dict(all=outputs))
@@ -13,6 +13,7 @@ def make_remove_hidden_output(config: Config) -> callable:
13
13
 
14
14
  return remove_hidden_output
15
15
 
16
+
16
17
  def prepare_callbacks(config: Config, args, set_stream: bool = True) -> list[callable]:
17
18
  callbacks = args.pop("callbacks", []) or [] + config.CALLBACKS or []
18
19
  if "callback" in args:
@@ -5,7 +5,8 @@ import requests.exceptions
5
5
  from ._env import env
6
6
 
7
7
 
8
- class CantLoadTikTokenEncoding(RuntimeError): ...
8
+ class CantLoadTikTokenEncoding(RuntimeError):
9
+ ...
9
10
 
10
11
 
11
12
  def _resolve_tiktoken_encoding(
@@ -412,9 +412,9 @@ def levenshtein(a: str, b: str) -> int:
412
412
  cost = 0 if ch_a == ch_b else 1
413
413
  current.append(
414
414
  min(
415
- current[-1] + 1, # insertion
416
- previous[j] + 1, # deletion
417
- previous[j - 1] + cost # substitution
415
+ current[-1] + 1, # insertion
416
+ previous[j] + 1, # deletion
417
+ previous[j - 1] + cost # substitution
418
418
  )
419
419
  )
420
420
  previous = current
@@ -458,4 +458,4 @@ def most_similar(
458
458
  min_dist = dist
459
459
  most_similar_word = word
460
460
 
461
- return most_similar_word, min_dist
461
+ return most_similar_word, min_dist