ai-microcore 4.0.0.dev22__tar.gz → 4.0.0.dev23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/PKG-INFO +1 -1
  2. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/__init__.py +1 -1
  3. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/_env.py +11 -0
  4. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/configuration.py +20 -0
  5. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/embedding_db/chromadb.py +1 -0
  6. ai_microcore-4.0.0.dev23/microcore/embedding_db/qdrant.py +227 -0
  7. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ui.py +1 -1
  8. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/LICENSE +0 -0
  9. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/README.md +0 -0
  10. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/_llm_functions.py +0 -0
  11. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/_prepare_llm_args.py +0 -0
  12. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ai_func/__init__.py +0 -0
  13. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ai_func/ai-func.json.j2 +0 -0
  14. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ai_func/ai-func.pythonic.j2 +0 -0
  15. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ai_modules.py +0 -0
  16. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/embedding_db/__init__.py +0 -0
  17. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/file_storage.py +0 -0
  18. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/interactive_setup.py +0 -0
  19. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/json_parsing.py +0 -0
  20. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/__init__.py +0 -0
  21. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/_openai_llm_v0.py +0 -0
  22. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/_openai_llm_v1.py +0 -0
  23. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/anthropic.py +0 -0
  24. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/google_genai.py +0 -0
  25. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/google_vertex_ai.py +0 -0
  26. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/local_llm.py +0 -0
  27. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/local_transformers.py +0 -0
  28. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/openai_llm.py +0 -0
  29. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/shared.py +0 -0
  30. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/logging.py +0 -0
  31. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/mcp.py +0 -0
  32. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/message_types.py +0 -0
  33. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/metrics.py +0 -0
  34. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/python.py +0 -0
  35. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/templating/__init__.py +0 -0
  36. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/templating/jinja2.py +0 -0
  37. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/text2speech/elevenlabs.py +0 -0
  38. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/tokenizing.py +0 -0
  39. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/types.py +0 -0
  40. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/utils.py +0 -0
  41. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/wrappers/__init__.py +0 -0
  42. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/wrappers/llm_response_wrapper.py +0 -0
  43. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/wrappers/prompt_wrapper.py +0 -0
  44. {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-microcore
3
- Version: 4.0.0.dev22
3
+ Version: 4.0.0.dev23
4
4
  Summary: # Minimalistic Foundation for AI Applications
5
5
  Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai,framework,adapter
6
6
  Author-email: Vitalii Stepanenko <mail@vitalii.in>
@@ -186,4 +186,4 @@ __all__ = [
186
186
  # "wrappers",
187
187
  ]
188
188
 
189
- __version__ = "4.0.0-dev22"
189
+ __version__ = "4.0.0-dev23"
@@ -151,6 +151,17 @@ class Env:
151
151
  from .embedding_db.chromadb import ChromaEmbeddingDB
152
152
 
153
153
  self.texts = ChromaEmbeddingDB(self.config)
154
+ return
155
+
156
+ if self.config.EMBEDDING_DB_TYPE == EmbeddingDbType.QDRANT:
157
+ if find_spec("qdrant_client") is None:
158
+ raise ModuleNotFoundError(
159
+ "To use Qdrant, install the `qdrant-client` package. "
160
+ "Run `pip install qdrant-client`."
161
+ )
162
+ from .embedding_db.qdrant import QdrantEmbeddingDB
163
+ self.texts = QdrantEmbeddingDB(self.config)
164
+ return
154
165
 
155
166
 
156
167
  @dataclass
@@ -84,6 +84,7 @@ class ApiType(str, Enum):
84
84
 
85
85
  class EmbeddingDbType(str, Enum):
86
86
  CHROMA = "chroma"
87
+ QDRANT = "qdrant"
87
88
  NONE = ""
88
89
 
89
90
  def __str__(self):
@@ -392,6 +393,9 @@ class Config(LLMConfig):
392
393
 
393
394
  EMBEDDING_DB_TYPE: str = from_env(EmbeddingDbType.CHROMA)
394
395
 
396
+ EMBEDDING_DB_SIZE: int = from_env(default=0)
397
+ """Used with Qdrant"""
398
+
395
399
  DEFAULT_ENCODING: str = from_env("utf-8")
396
400
  """Used in file system operations, utf-8 by default"""
397
401
 
@@ -435,3 +439,19 @@ class Config(LLMConfig):
435
439
  raise e
436
440
  if self.TEXT_TO_SPEECH_PATH is None:
437
441
  self.TEXT_TO_SPEECH_PATH = Path(self.STORAGE_PATH) / "voicing"
442
+
443
+ def validate(self):
444
+ super().validate()
445
+ if self.EMBEDDING_DB_TYPE == EmbeddingDbType.QDRANT:
446
+ if not self.EMBEDDING_DB_SIZE:
447
+ raise LLMConfigError(
448
+ "EMBEDDING_DB_SIZE is required configuration parameter for Qdrant"
449
+ )
450
+ if not self.EMBEDDING_DB_HOST:
451
+ raise LLMConfigError(
452
+ "EMBEDDING_DB_HOST is required configuration parameter for Qdrant"
453
+ )
454
+ if not self.EMBEDDING_DB_FUNCTION:
455
+ raise LLMConfigError(
456
+ "EMBEDDING_DB_FUNCTION is required configuration parameter for Qdrant"
457
+ )
@@ -107,6 +107,7 @@ class ChromaEmbeddingDB(AbstractEmbeddingDB):
107
107
  return chroma_collection.count() if chroma_collection else 0
108
108
 
109
109
  def delete(self, collection: str, what: str | list[str] | dict):
110
+ # pylint: disable=R0801, duplicate-code
110
111
  if not self.collection_exists(collection):
111
112
  return
112
113
 
@@ -0,0 +1,227 @@
1
+ import hashlib
2
+ import logging
3
+ import sys
4
+ from dataclasses import dataclass
5
+ import uuid
6
+
7
+ from qdrant_client import QdrantClient
8
+ from qdrant_client.http.models import (
9
+ PointIdsList,
10
+ CollectionInfo,
11
+ Record,
12
+ FieldCondition,
13
+ Filter,
14
+ MatchValue,
15
+ MatchText
16
+ )
17
+ from qdrant_client.models import VectorParams, Distance, PointStruct, ScoredPoint
18
+
19
+ from ..configuration import Config
20
+ from .. import SearchResult, SearchResults, AbstractEmbeddingDB
21
+
22
+
23
+ def is_sentence_transformer(fn):
24
+ return fn.__class__.__name__ == 'SentenceTransformer'
25
+
26
+
27
+ def prepare_embedding_function(fn):
28
+ if is_sentence_transformer(fn):
29
+ return lambda x: fn.encode(x).tolist()
30
+ return fn
31
+
32
+
33
+ @dataclass
34
+ class QdrantEmbeddingDB(AbstractEmbeddingDB):
35
+ config: Config
36
+ embedding_function: callable = None
37
+ client: QdrantClient = None
38
+
39
+ def __post_init__(self):
40
+
41
+ logging.info(
42
+ "Connecting to Qdrant at %s:%s",
43
+ self.config.EMBEDDING_DB_HOST,
44
+ self.config.EMBEDDING_DB_PORT
45
+ )
46
+ self.client = QdrantClient(
47
+ host=self.config.EMBEDDING_DB_HOST,
48
+ port=self.config.EMBEDDING_DB_PORT or 6333
49
+ )
50
+ self.embedding_function = prepare_embedding_function(self.config.EMBEDDING_DB_FUNCTION)
51
+
52
+ @classmethod
53
+ def _wrap_results(cls, points: list[ScoredPoint | Record]) -> list[str | SearchResult]:
54
+ return SearchResults(
55
+ [
56
+ SearchResult(
57
+ i.payload["_text"],
58
+ dict(
59
+ id=i.id,
60
+ distance=i.score if hasattr(i, "score") else 0,
61
+ metadata={k: v for k, v in i.payload.items() if k != "_text"},
62
+ ),
63
+
64
+ )
65
+ for i in points
66
+ ]
67
+ )
68
+
69
+ @classmethod
70
+ def _convert_where(cls, where: dict | None, kwargs=None) -> Filter | None:
71
+ conditions = []
72
+ # ChromaDB format
73
+ if kwargs and "where_document" in kwargs and kwargs["where_document"]:
74
+ conditions.append(
75
+ FieldCondition(
76
+ key="_text",
77
+ match=MatchText(text=kwargs["where_document"]["$contains"])
78
+ )
79
+ )
80
+ if where:
81
+ for k, v in where.items():
82
+ conditions.append(FieldCondition(key=k, match=MatchValue(value=v)))
83
+
84
+ return Filter(must=conditions) if conditions else None
85
+
86
+ def search(
87
+ self,
88
+ collection: str,
89
+ query: str | list,
90
+ n_results: int = 5,
91
+ where: dict = None,
92
+ **kwargs,
93
+ ) -> list[str | SearchResult]:
94
+ if not self.collection_exists(collection):
95
+ return SearchResults([])
96
+
97
+ if not isinstance(query, str):
98
+ raise ValueError("`query` must be a string")
99
+
100
+ query_vector = self.embedding_function(query)
101
+ where = self._convert_where(where, kwargs)
102
+ kwargs.pop("where_document", None)
103
+ hits = self.client.query_points(
104
+ collection_name=collection,
105
+ query=query_vector,
106
+ limit=n_results,
107
+ query_filter=where,
108
+ **kwargs,
109
+ )
110
+ return self._wrap_results(hits.points)
111
+
112
+ def save_many(self, collection: str, items: list[tuple[str, dict] | str]):
113
+ if not self.collection_exists(collection):
114
+ self._create_collection(collection)
115
+ point_structs = []
116
+ ids = set()
117
+ unique = not self.config.EMBEDDING_DB_ALLOW_DUPLICATES
118
+ for i in items:
119
+ if isinstance(i, str):
120
+ text = i
121
+ metadata = dict()
122
+ else:
123
+ text = i[0]
124
+ metadata = i[1] or {}
125
+ metadata["_text"] = text
126
+ if unique:
127
+ new_id = str(uuid.UUID(hashlib.md5(text.encode()).hexdigest()))
128
+ if new_id in ids:
129
+ continue
130
+ ids.add(new_id)
131
+ else:
132
+ new_id = str(uuid.uuid4())
133
+ point_structs.append(
134
+ PointStruct(
135
+ id=new_id,
136
+ vector=self.embedding_function(text),
137
+ payload=metadata
138
+ )
139
+ )
140
+
141
+ operation_info = self.client.upsert(
142
+ collection_name=collection,
143
+ wait=True,
144
+ points=point_structs,
145
+ )
146
+ return operation_info
147
+
148
+ def clear(self, collection: str):
149
+ if self.collection_exists(collection):
150
+ self.client.delete_collection(collection_name=collection)
151
+
152
+ def count(self, collection: str) -> int:
153
+ if self.collection_exists(collection):
154
+ return self._get_collection(collection).points_count
155
+ return 0
156
+
157
+ def delete(self, collection: str, what: str | list[str] | dict):
158
+ # pylint: disable=R0801, duplicate-code
159
+ if not self.collection_exists(collection):
160
+ return
161
+
162
+ if isinstance(what, str):
163
+ ids, where = [what], None
164
+ elif isinstance(what, list):
165
+ ids, where = what, None
166
+ elif isinstance(what, dict):
167
+ ids, where = None, what
168
+ else:
169
+ raise ValueError("Invalid `what` argument")
170
+ if ids is not None:
171
+ points_selector = PointIdsList(points=ids)
172
+ else:
173
+ points_selector = self._convert_where(where)
174
+ self.client.delete(collection_name=collection, points_selector=points_selector)
175
+
176
+ def get(
177
+ self,
178
+ collection: str,
179
+ ids: list[str] | str = None,
180
+ limit: int = None,
181
+ offset: int = None,
182
+ where: dict = None,
183
+ **kwargs,
184
+ ) -> list[str | SearchResult] | str | SearchResult | None:
185
+ if not self.collection_exists(collection):
186
+ return SearchResults([]) if not isinstance(ids, str) else None
187
+ if ids:
188
+ raise NotImplementedError("Getting by ids is not supported for Qdrant")
189
+ where = self._convert_where(where, kwargs)
190
+ kwargs.pop("where_document", None)
191
+ search_results = self._wrap_results(
192
+ self.client.scroll(
193
+ collection,
194
+ limit=limit or sys.maxsize - 1,
195
+ offset=offset or 0,
196
+ scroll_filter=where,
197
+ **kwargs
198
+ )[0]
199
+ )
200
+ if isinstance(ids, str):
201
+ return search_results[0] if search_results else None
202
+ return search_results
203
+
204
+ def get_all(self, collection: str) -> list[str | SearchResult]:
205
+ if not self.collection_exists(collection):
206
+ return SearchResults([])
207
+ return self._wrap_results(self.client.scroll(collection, limit=sys.maxsize - 1)[0])
208
+
209
+ def collection_exists(self, collection: str) -> bool:
210
+ return self.client.collection_exists(collection)
211
+
212
+ def _create_collection(self, name: str):
213
+ assert self.config.EMBEDDING_DB_SIZE > 0
214
+ size = self.config.EMBEDDING_DB_SIZE
215
+ distance = Distance.COSINE
216
+ self.client.create_collection(
217
+ collection_name=name,
218
+ vectors_config=VectorParams(size=size, distance=distance),
219
+ )
220
+
221
+ def _get_collection(self, name: str, create: bool = False) -> CollectionInfo | None:
222
+ if not self.collection_exists(name):
223
+ if create:
224
+ self._create_collection(name)
225
+ else:
226
+ return None
227
+ return self.client.get_collection(name)
@@ -54,7 +54,7 @@ def ask_choose(msg: str, variants: list):
54
54
  continue
55
55
  break
56
56
 
57
- item = variants[int(i)]
57
+ item = variants[i]
58
58
  return item
59
59
 
60
60