ai-microcore 3.16.5__tar.gz → 4.0.0.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/PKG-INFO +19 -1
  2. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/README.md +18 -0
  3. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/__init__.py +1 -1
  4. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/_prepare_llm_args.py +1 -1
  5. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/ai_func/__init__.py +25 -4
  6. ai_microcore-4.0.0.dev2/microcore/ai_func/ai-func.json.j2 +11 -0
  7. ai_microcore-3.16.5/microcore/ai_func/python_ai_func.j2 → ai_microcore-4.0.0.dev2/microcore/ai_func/ai-func.pythonic.j2 +1 -1
  8. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/configuration.py +4 -0
  9. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/embedding_db/__init__.py +3 -2
  10. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/embedding_db/chromadb.py +17 -4
  11. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/shared.py +2 -1
  12. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/logging.py +15 -0
  13. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/message_types.py +8 -2
  14. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/LICENSE +0 -0
  15. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/_env.py +0 -0
  16. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/_llm_functions.py +0 -0
  17. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/ai_modules.py +0 -0
  18. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/file_storage.py +0 -0
  19. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/json_parsing.py +0 -0
  20. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/__init__.py +0 -0
  21. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/_openai_llm_v0.py +0 -0
  22. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/_openai_llm_v1.py +0 -0
  23. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/anthropic.py +0 -0
  24. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/google_genai.py +0 -0
  25. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/google_vertex_ai.py +0 -0
  26. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/local_llm.py +0 -0
  27. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/local_transformers.py +0 -0
  28. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/llm/openai_llm.py +0 -0
  29. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/metrics.py +0 -0
  30. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/python.py +0 -0
  31. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/templating/__init__.py +0 -0
  32. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/templating/jinja2.py +0 -0
  33. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/text2speech/elevenlabs.py +0 -0
  34. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/tokenizing.py +0 -0
  35. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/types.py +0 -0
  36. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/ui.py +0 -0
  37. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/utils.py +0 -0
  38. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/wrappers/__init__.py +0 -0
  39. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/wrappers/llm_response_wrapper.py +0 -0
  40. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/microcore/wrappers/prompt_wrapper.py +0 -0
  41. {ai_microcore-3.16.5 → ai_microcore-4.0.0.dev2}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-microcore
3
- Version: 3.16.5
3
+ Version: 4.0.0.dev2
4
4
  Summary: # Minimalistic Foundation for AI Applications
5
5
  Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai
6
6
  Author-email: Vitalii Stepanenko <mail@vitalii.in>
@@ -132,6 +132,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
132
132
  <br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
133
133
  3. OS environment variables have the lowest priority.
134
134
 
135
+ ### Vector Databases
136
+
137
+ Vector database functions are available via `microcore.texts`.
138
+ Default vector database is [Chroma](https://www.trychroma.com/).
139
+ In order to use vector database functions, you need to install the `chromadb` package:
140
+ ```bash
141
+ pip install chromadb
142
+ ```
143
+ By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
144
+ Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
145
+
146
+ ```python
147
+ from microcore import configure
148
+ configure(
149
+ EMBEDDING_DB_HOST = 'localhost',
150
+ EMBEDDING_DB_PORT = 8000,
151
+ )
152
+ ```
135
153
 
136
154
  ## 🌟 Core Functions
137
155
 
@@ -107,6 +107,24 @@ See [transformers installation](https://huggingface.co/docs/transformers/install
107
107
  <br>💡 <small>Setting `USE_DOT_ENV` to `false` disables reading configuration files.</small>
108
108
  3. OS environment variables have the lowest priority.
109
109
 
110
+ ### Vector Databases
111
+
112
+ Vector database functions are available via `microcore.texts`.
113
+ Default vector database is [Chroma](https://www.trychroma.com/).
114
+ In order to use vector database functions, you need to install the `chromadb` package:
115
+ ```bash
116
+ pip install chromadb
117
+ ```
118
+ By default, MicroCore will use ChromaDB PersistentClient (if corresponding package is installed).
119
+ Alternatively, you can run Chroma as separate service and configure MicroCore to use HttpClient:
120
+
121
+ ```python
122
+ from microcore import configure
123
+ configure(
124
+ EMBEDDING_DB_HOST = 'localhost',
125
+ EMBEDDING_DB_PORT = 8000,
126
+ )
127
+ ```
110
128
 
111
129
  ## 🌟 Core Functions
112
130
 
@@ -161,4 +161,4 @@ __all__ = [
161
161
  # "wrappers",
162
162
  ]
163
163
 
164
- __version__ = "3.16.5"
164
+ __version__ = "4.0.0-dev2"
@@ -22,7 +22,7 @@ def prepare_chat_messages(prompt: TPrompt) -> list[dict]:
22
22
  dict(role=DEFAULT_MESSAGE_ROLE, content=msg)
23
23
  if isinstance(msg, str)
24
24
  else (
25
- asdict(msg, dict_factory=msg.dict_factory)
25
+ asdict(msg, dict_factory=msg.DICT_FACTORY)
26
26
  if isinstance(msg, Msg)
27
27
  else msg
28
28
  )
@@ -5,13 +5,23 @@ descr: Allows to describe python functions for LLM
5
5
 
6
6
  import ast
7
7
  import inspect
8
+ from enum import Enum
8
9
  from typing import Dict, Any
9
10
  import docstring_parser
10
11
  from .. import tpl
12
+ from ..utils import dedent
11
13
 
12
14
 
15
+ class AiFuncSyntax(str, Enum):
16
+ PYTHONIC: str = "pythonic"
17
+ JSON: str = "json"
18
+ DEFAULT: str = str(JSON)
19
+
20
+ def __str__(self):
21
+ return self.value
22
+
13
23
  def func_arg_comments(func):
14
- func_source = inspect.getsource(func)
24
+ func_source = dedent(inspect.getsource(func))
15
25
  module = ast.parse(func_source)
16
26
  func_def = module.body[0]
17
27
 
@@ -60,7 +70,7 @@ def func_metadata(func) -> Dict[str, Any]:
60
70
  for name, val in metadata["args"].items():
61
71
  val["comment"] = arg_comments[name]
62
72
 
63
- # Parse docstring
73
+ # Parse docstring
64
74
  parsed_docstring = docstring_parser.parse(inspect.getdoc(func))
65
75
 
66
76
  # Add descriptions from parsed docstring to parameters
@@ -71,6 +81,17 @@ def func_metadata(func) -> Dict[str, Any]:
71
81
  return metadata
72
82
 
73
83
 
74
- def describe_ai_func(func):
84
+ def describe_ai_func(func: callable, syntax: AiFuncSyntax | str = None) -> str:
85
+ """
86
+ Renders function description for LLM
87
+ Args:
88
+ func: callable: function to describe
89
+ syntax: AiFuncSyntax | str: syntax to use for the description
90
+ - Use AiFuncSyntax enums to use standard templates (""json", "pythonic")
91
+ - Use custom template name to use custom template
92
+ Returns: str: rendered description, part of prompt
93
+ """
94
+ syntax = syntax or AiFuncSyntax.DEFAULT
95
+ tpl_file = f"ai-func.{syntax}.j2" if syntax in AiFuncSyntax else syntax
75
96
  metadata = func_metadata(func)
76
- return tpl("python_ai_func.j2", **metadata)
97
+ return tpl(tpl_file, **metadata)
@@ -0,0 +1,11 @@
1
+ # {{ description or name.replace('_', ' ').capitalize() }}
2
+ {
3
+ "call": "{{ name }}"{% if args %}{{ "," }}{% endif %}
4
+ {%- for k,v in args.items() %}
5
+ "{{ k }}":
6
+ {%- if v.type %} <{{ v.type }}>{% endif -%}
7
+ {%- if v.default != 'NOT_SET' %} (default = {{ v.default }}){%endif-%}
8
+ {%- if not loop.last -%},{%- endif -%}
9
+ {%- if v.comment %} {{ v.comment }}{% endif -%}
10
+ {%- endfor -%}
11
+ {{ "\n}" }}
@@ -1,4 +1,4 @@
1
- # {{ description }}
1
+ # {{ description or name.replace('_', ' ').capitalize() }}
2
2
  {{ name }}(
3
3
  {%- for name,v in args.items() -%}
4
4
  {{ "\n\t" }}{{ name }}
@@ -369,6 +369,10 @@ class Config(LLMConfig):
369
369
 
370
370
  EMBEDDING_DB_ALLOW_DUPLICATES: bool = from_env(dtype=bool, default=False)
371
371
 
372
+ EMBEDDING_DB_HOST: str = from_env(default=None)
373
+
374
+ EMBEDDING_DB_PORT: str = from_env(default=None)
375
+
372
376
  DEFAULT_ENCODING: str = from_env("utf-8")
373
377
  """Used in file system operations, utf-8 by default"""
374
378
 
@@ -1,5 +1,4 @@
1
1
  import logging
2
- import sys
3
2
  from abc import ABC, abstractmethod
4
3
  from dataclasses import dataclass
5
4
 
@@ -8,6 +7,8 @@ import tiktoken
8
7
  from ..utils import ExtendedString
9
8
 
10
9
 
10
+ INT32_MAX = 2**31 - 1 # 2147483647
11
+
11
12
  class SearchResults(list):
12
13
  def fit_to_token_size(
13
14
  self,
@@ -115,7 +116,7 @@ class AbstractEmbeddingDB(ABC):
115
116
  **kwargs,
116
117
  ) -> SearchResults | list[str | SearchResult]:
117
118
  return self.search(
118
- collection, query, n_results=sys.maxsize - 1, where=where, **kwargs
119
+ collection, query, n_results=INT32_MAX, where=where, **kwargs
119
120
  )
120
121
 
121
122
  @abstractmethod
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from dataclasses import dataclass
2
3
  import uuid
3
4
 
@@ -16,10 +17,22 @@ class ChromaEmbeddingDB(AbstractEmbeddingDB):
16
17
  client: chromadb.Client = None
17
18
 
18
19
  def __post_init__(self):
19
- self.client = chromadb.PersistentClient(
20
- path=f"{self.config.STORAGE_PATH}/{self.config.EMBEDDING_DB_FOLDER}",
21
- settings=Settings(anonymized_telemetry=False),
22
- )
20
+ if self.config.EMBEDDING_DB_HOST:
21
+ logging.info(
22
+ "Connecting to ChromaDB at %s:%s",
23
+ self.config.EMBEDDING_DB_HOST,
24
+ self.config.EMBEDDING_DB_PORT
25
+ )
26
+ self.client = chromadb.HttpClient(
27
+ host=self.config.EMBEDDING_DB_HOST,
28
+ port=self.config.EMBEDDING_DB_PORT or 8000,
29
+ settings=Settings(anonymized_telemetry=False),
30
+ )
31
+ else:
32
+ self.client = chromadb.PersistentClient(
33
+ path=f"{self.config.STORAGE_PATH}/{self.config.EMBEDDING_DB_FOLDER}",
34
+ settings=Settings(anonymized_telemetry=False),
35
+ )
23
36
  self.embedding_function = (
24
37
  self.config.EMBEDDING_DB_FUNCTION
25
38
  or embedding_functions.DefaultEmbeddingFunction()
@@ -21,4 +21,5 @@ def prepare_callbacks(config: Config, args, set_stream: bool = True) -> list[cal
21
21
  callbacks.append(cb)
22
22
  if set_stream and "stream" not in args:
23
23
  args["stream"] = bool(callbacks)
24
- return callbacks
24
+
25
+ return callbacks
@@ -41,6 +41,20 @@ def _format_request_log_str(prompt, **kwargs) -> str:
41
41
  )
42
42
  if out.endswith("\n"):
43
43
  out = out[:-1]
44
+ if LoggingConfig.STRIP_REQUEST_LINES:
45
+ start_lines, end_lines = LoggingConfig.STRIP_REQUEST_LINES
46
+ max_lines = start_lines + end_lines
47
+ lines = out.split("\n")
48
+ if len(lines) > max_lines:
49
+ out = "\n".join(
50
+ lines[:start_lines]
51
+ + [
52
+ f"{LoggingConfig.INDENT}{Fore.YELLOW}"
53
+ f"...(output was truncated)..."
54
+ f"{LoggingConfig.PROMPT_COLOR}"
55
+ ]
56
+ + (lines[-end_lines:] if end_lines else [])
57
+ )
44
58
  return out
45
59
 
46
60
 
@@ -72,6 +86,7 @@ class LoggingConfig:
72
86
  OUTPUT_METHOD: callable = print
73
87
  REQUEST_FORMATTER: callable = _format_request_log_str
74
88
  RESPONSE_FORMATTER: callable = _format_response_log_str
89
+ STRIP_REQUEST_LINES: tuple[int, int] | None = [40, 15]
75
90
 
76
91
 
77
92
  def _log_request(prompt, **kwargs):
@@ -2,6 +2,7 @@
2
2
 
3
3
  from enum import Enum
4
4
  from dataclasses import dataclass, field
5
+ from typing import ClassVar
5
6
 
6
7
 
7
8
  class Role(str, Enum):
@@ -9,16 +10,20 @@ class Role(str, Enum):
9
10
  USER = "user"
10
11
  ASSISTANT = "assistant"
11
12
 
13
+ def __str__(self):
14
+ return self.value
15
+
12
16
 
13
17
  DEFAULT_MESSAGE_ROLE = Role.USER
14
18
 
15
19
 
16
20
  @dataclass
17
21
  class Msg:
18
- dict_factory = dict
19
22
  role: str = field(default=DEFAULT_MESSAGE_ROLE)
20
23
  content: str = field(default="")
21
24
 
25
+ DICT_FACTORY: ClassVar = dict
26
+
22
27
  def __str__(self):
23
28
  return str(self.content)
24
29
 
@@ -49,10 +54,11 @@ class PartialMsg(AssistantMsg):
49
54
  is_partial = True
50
55
  """Custom dictionary class to handle additional properties"""
51
56
 
52
- dict_factory = _PartialMsgDict
53
57
  placeholder = "<|placeholder|>"
54
58
  variants_splitter = "<|or|>"
55
59
 
60
+ DICT_FACTORY: ClassVar = _PartialMsgDict
61
+
56
62
  @staticmethod
57
63
  def split_prefix_and_suffixes(content: str):
58
64
  parts = content.split(PartialMsg.placeholder)
File without changes