ai-microcore 4.0.0.dev22__tar.gz → 4.0.0.dev23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/PKG-INFO +1 -1
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/__init__.py +1 -1
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/_env.py +11 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/configuration.py +20 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/embedding_db/chromadb.py +1 -0
- ai_microcore-4.0.0.dev23/microcore/embedding_db/qdrant.py +227 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ui.py +1 -1
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/LICENSE +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/README.md +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/_llm_functions.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/_prepare_llm_args.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ai_func/__init__.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ai_func/ai-func.json.j2 +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ai_func/ai-func.pythonic.j2 +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/ai_modules.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/embedding_db/__init__.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/file_storage.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/interactive_setup.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/json_parsing.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/__init__.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/_openai_llm_v0.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/_openai_llm_v1.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/anthropic.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/google_genai.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/google_vertex_ai.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/local_llm.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/local_transformers.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/openai_llm.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/llm/shared.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/logging.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/mcp.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/message_types.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/metrics.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/python.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/templating/__init__.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/templating/jinja2.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/text2speech/elevenlabs.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/tokenizing.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/types.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/utils.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/wrappers/__init__.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/wrappers/llm_response_wrapper.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/wrappers/prompt_wrapper.py +0 -0
- {ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-microcore
|
|
3
|
-
Version: 4.0.0.
|
|
3
|
+
Version: 4.0.0.dev23
|
|
4
4
|
Summary: # Minimalistic Foundation for AI Applications
|
|
5
5
|
Keywords: llm,large language models,ai,similarity search,ai search,gpt,openai,framework,adapter
|
|
6
6
|
Author-email: Vitalii Stepanenko <mail@vitalii.in>
|
|
@@ -151,6 +151,17 @@ class Env:
|
|
|
151
151
|
from .embedding_db.chromadb import ChromaEmbeddingDB
|
|
152
152
|
|
|
153
153
|
self.texts = ChromaEmbeddingDB(self.config)
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
if self.config.EMBEDDING_DB_TYPE == EmbeddingDbType.QDRANT:
|
|
157
|
+
if find_spec("qdrant_client") is None:
|
|
158
|
+
raise ModuleNotFoundError(
|
|
159
|
+
"To use Qdrant, install the `qdrant-client` package. "
|
|
160
|
+
"Run `pip install qdrant-client`."
|
|
161
|
+
)
|
|
162
|
+
from .embedding_db.qdrant import QdrantEmbeddingDB
|
|
163
|
+
self.texts = QdrantEmbeddingDB(self.config)
|
|
164
|
+
return
|
|
154
165
|
|
|
155
166
|
|
|
156
167
|
@dataclass
|
|
@@ -84,6 +84,7 @@ class ApiType(str, Enum):
|
|
|
84
84
|
|
|
85
85
|
class EmbeddingDbType(str, Enum):
|
|
86
86
|
CHROMA = "chroma"
|
|
87
|
+
QDRANT = "qdrant"
|
|
87
88
|
NONE = ""
|
|
88
89
|
|
|
89
90
|
def __str__(self):
|
|
@@ -392,6 +393,9 @@ class Config(LLMConfig):
|
|
|
392
393
|
|
|
393
394
|
EMBEDDING_DB_TYPE: str = from_env(EmbeddingDbType.CHROMA)
|
|
394
395
|
|
|
396
|
+
EMBEDDING_DB_SIZE: int = from_env(default=0)
|
|
397
|
+
"""Used with Qdrant"""
|
|
398
|
+
|
|
395
399
|
DEFAULT_ENCODING: str = from_env("utf-8")
|
|
396
400
|
"""Used in file system operations, utf-8 by default"""
|
|
397
401
|
|
|
@@ -435,3 +439,19 @@ class Config(LLMConfig):
|
|
|
435
439
|
raise e
|
|
436
440
|
if self.TEXT_TO_SPEECH_PATH is None:
|
|
437
441
|
self.TEXT_TO_SPEECH_PATH = Path(self.STORAGE_PATH) / "voicing"
|
|
442
|
+
|
|
443
|
+
def validate(self):
|
|
444
|
+
super().validate()
|
|
445
|
+
if self.EMBEDDING_DB_TYPE == EmbeddingDbType.QDRANT:
|
|
446
|
+
if not self.EMBEDDING_DB_SIZE:
|
|
447
|
+
raise LLMConfigError(
|
|
448
|
+
"EMBEDDING_DB_SIZE is required configuration parameter for Qdrant"
|
|
449
|
+
)
|
|
450
|
+
if not self.EMBEDDING_DB_HOST:
|
|
451
|
+
raise LLMConfigError(
|
|
452
|
+
"EMBEDDING_DB_HOST is required configuration parameter for Qdrant"
|
|
453
|
+
)
|
|
454
|
+
if not self.EMBEDDING_DB_FUNCTION:
|
|
455
|
+
raise LLMConfigError(
|
|
456
|
+
"EMBEDDING_DB_FUNCTION is required configuration parameter for Qdrant"
|
|
457
|
+
)
|
|
@@ -107,6 +107,7 @@ class ChromaEmbeddingDB(AbstractEmbeddingDB):
|
|
|
107
107
|
return chroma_collection.count() if chroma_collection else 0
|
|
108
108
|
|
|
109
109
|
def delete(self, collection: str, what: str | list[str] | dict):
|
|
110
|
+
# pylint: disable=R0801, duplicate-code
|
|
110
111
|
if not self.collection_exists(collection):
|
|
111
112
|
return
|
|
112
113
|
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
from qdrant_client import QdrantClient
|
|
8
|
+
from qdrant_client.http.models import (
|
|
9
|
+
PointIdsList,
|
|
10
|
+
CollectionInfo,
|
|
11
|
+
Record,
|
|
12
|
+
FieldCondition,
|
|
13
|
+
Filter,
|
|
14
|
+
MatchValue,
|
|
15
|
+
MatchText
|
|
16
|
+
)
|
|
17
|
+
from qdrant_client.models import VectorParams, Distance, PointStruct, ScoredPoint
|
|
18
|
+
|
|
19
|
+
from ..configuration import Config
|
|
20
|
+
from .. import SearchResult, SearchResults, AbstractEmbeddingDB
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_sentence_transformer(fn):
|
|
24
|
+
return fn.__class__.__name__ == 'SentenceTransformer'
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def prepare_embedding_function(fn):
|
|
28
|
+
if is_sentence_transformer(fn):
|
|
29
|
+
return lambda x: fn.encode(x).tolist()
|
|
30
|
+
return fn
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class QdrantEmbeddingDB(AbstractEmbeddingDB):
|
|
35
|
+
config: Config
|
|
36
|
+
embedding_function: callable = None
|
|
37
|
+
client: QdrantClient = None
|
|
38
|
+
|
|
39
|
+
def __post_init__(self):
|
|
40
|
+
|
|
41
|
+
logging.info(
|
|
42
|
+
"Connecting to Qdrant at %s:%s",
|
|
43
|
+
self.config.EMBEDDING_DB_HOST,
|
|
44
|
+
self.config.EMBEDDING_DB_PORT
|
|
45
|
+
)
|
|
46
|
+
self.client = QdrantClient(
|
|
47
|
+
host=self.config.EMBEDDING_DB_HOST,
|
|
48
|
+
port=self.config.EMBEDDING_DB_PORT or 6333
|
|
49
|
+
)
|
|
50
|
+
self.embedding_function = prepare_embedding_function(self.config.EMBEDDING_DB_FUNCTION)
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def _wrap_results(cls, points: list[ScoredPoint | Record]) -> list[str | SearchResult]:
|
|
54
|
+
return SearchResults(
|
|
55
|
+
[
|
|
56
|
+
SearchResult(
|
|
57
|
+
i.payload["_text"],
|
|
58
|
+
dict(
|
|
59
|
+
id=i.id,
|
|
60
|
+
distance=i.score if hasattr(i, "score") else 0,
|
|
61
|
+
metadata={k: v for k, v in i.payload.items() if k != "_text"},
|
|
62
|
+
),
|
|
63
|
+
|
|
64
|
+
)
|
|
65
|
+
for i in points
|
|
66
|
+
]
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def _convert_where(cls, where: dict | None, kwargs=None) -> Filter | None:
|
|
71
|
+
conditions = []
|
|
72
|
+
# ChromaDB format
|
|
73
|
+
if kwargs and "where_document" in kwargs and kwargs["where_document"]:
|
|
74
|
+
conditions.append(
|
|
75
|
+
FieldCondition(
|
|
76
|
+
key="_text",
|
|
77
|
+
match=MatchText(text=kwargs["where_document"]["$contains"])
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
if where:
|
|
81
|
+
for k, v in where.items():
|
|
82
|
+
conditions.append(FieldCondition(key=k, match=MatchValue(value=v)))
|
|
83
|
+
|
|
84
|
+
return Filter(must=conditions) if conditions else None
|
|
85
|
+
|
|
86
|
+
def search(
|
|
87
|
+
self,
|
|
88
|
+
collection: str,
|
|
89
|
+
query: str | list,
|
|
90
|
+
n_results: int = 5,
|
|
91
|
+
where: dict = None,
|
|
92
|
+
**kwargs,
|
|
93
|
+
) -> list[str | SearchResult]:
|
|
94
|
+
if not self.collection_exists(collection):
|
|
95
|
+
return SearchResults([])
|
|
96
|
+
|
|
97
|
+
if not isinstance(query, str):
|
|
98
|
+
raise ValueError("`query` must be a string")
|
|
99
|
+
|
|
100
|
+
query_vector = self.embedding_function(query)
|
|
101
|
+
where = self._convert_where(where, kwargs)
|
|
102
|
+
kwargs.pop("where_document", None)
|
|
103
|
+
hits = self.client.query_points(
|
|
104
|
+
collection_name=collection,
|
|
105
|
+
query=query_vector,
|
|
106
|
+
limit=n_results,
|
|
107
|
+
query_filter=where,
|
|
108
|
+
**kwargs,
|
|
109
|
+
)
|
|
110
|
+
return self._wrap_results(hits.points)
|
|
111
|
+
|
|
112
|
+
def save_many(self, collection: str, items: list[tuple[str, dict] | str]):
|
|
113
|
+
if not self.collection_exists(collection):
|
|
114
|
+
self._create_collection(collection)
|
|
115
|
+
point_structs = []
|
|
116
|
+
ids = set()
|
|
117
|
+
unique = not self.config.EMBEDDING_DB_ALLOW_DUPLICATES
|
|
118
|
+
for i in items:
|
|
119
|
+
if isinstance(i, str):
|
|
120
|
+
text = i
|
|
121
|
+
metadata = dict()
|
|
122
|
+
else:
|
|
123
|
+
text = i[0]
|
|
124
|
+
metadata = i[1] or {}
|
|
125
|
+
metadata["_text"] = text
|
|
126
|
+
if unique:
|
|
127
|
+
new_id = str(uuid.UUID(hashlib.md5(text.encode()).hexdigest()))
|
|
128
|
+
if new_id in ids:
|
|
129
|
+
continue
|
|
130
|
+
ids.add(new_id)
|
|
131
|
+
else:
|
|
132
|
+
new_id = str(uuid.uuid4())
|
|
133
|
+
point_structs.append(
|
|
134
|
+
PointStruct(
|
|
135
|
+
id=new_id,
|
|
136
|
+
vector=self.embedding_function(text),
|
|
137
|
+
payload=metadata
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
operation_info = self.client.upsert(
|
|
142
|
+
collection_name=collection,
|
|
143
|
+
wait=True,
|
|
144
|
+
points=point_structs,
|
|
145
|
+
)
|
|
146
|
+
return operation_info
|
|
147
|
+
|
|
148
|
+
def clear(self, collection: str):
|
|
149
|
+
if self.collection_exists(collection):
|
|
150
|
+
self.client.delete_collection(collection_name=collection)
|
|
151
|
+
|
|
152
|
+
def count(self, collection: str) -> int:
|
|
153
|
+
if self.collection_exists(collection):
|
|
154
|
+
return self._get_collection(collection).points_count
|
|
155
|
+
return 0
|
|
156
|
+
|
|
157
|
+
def delete(self, collection: str, what: str | list[str] | dict):
|
|
158
|
+
# pylint: disable=R0801, duplicate-code
|
|
159
|
+
if not self.collection_exists(collection):
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
if isinstance(what, str):
|
|
163
|
+
ids, where = [what], None
|
|
164
|
+
elif isinstance(what, list):
|
|
165
|
+
ids, where = what, None
|
|
166
|
+
elif isinstance(what, dict):
|
|
167
|
+
ids, where = None, what
|
|
168
|
+
else:
|
|
169
|
+
raise ValueError("Invalid `what` argument")
|
|
170
|
+
if ids is not None:
|
|
171
|
+
points_selector = PointIdsList(points=ids)
|
|
172
|
+
else:
|
|
173
|
+
points_selector = self._convert_where(where)
|
|
174
|
+
self.client.delete(collection_name=collection, points_selector=points_selector)
|
|
175
|
+
|
|
176
|
+
def get(
|
|
177
|
+
self,
|
|
178
|
+
collection: str,
|
|
179
|
+
ids: list[str] | str = None,
|
|
180
|
+
limit: int = None,
|
|
181
|
+
offset: int = None,
|
|
182
|
+
where: dict = None,
|
|
183
|
+
**kwargs,
|
|
184
|
+
) -> list[str | SearchResult] | str | SearchResult | None:
|
|
185
|
+
if not self.collection_exists(collection):
|
|
186
|
+
return SearchResults([]) if not isinstance(ids, str) else None
|
|
187
|
+
if ids:
|
|
188
|
+
raise NotImplementedError("Getting by ids is not supported for Qdrant")
|
|
189
|
+
where = self._convert_where(where, kwargs)
|
|
190
|
+
kwargs.pop("where_document", None)
|
|
191
|
+
search_results = self._wrap_results(
|
|
192
|
+
self.client.scroll(
|
|
193
|
+
collection,
|
|
194
|
+
limit=limit or sys.maxsize - 1,
|
|
195
|
+
offset=offset or 0,
|
|
196
|
+
scroll_filter=where,
|
|
197
|
+
**kwargs
|
|
198
|
+
)[0]
|
|
199
|
+
)
|
|
200
|
+
if isinstance(ids, str):
|
|
201
|
+
return search_results[0] if search_results else None
|
|
202
|
+
return search_results
|
|
203
|
+
|
|
204
|
+
def get_all(self, collection: str) -> list[str | SearchResult]:
|
|
205
|
+
if not self.collection_exists(collection):
|
|
206
|
+
return SearchResults([])
|
|
207
|
+
return self._wrap_results(self.client.scroll(collection, limit=sys.maxsize - 1)[0])
|
|
208
|
+
|
|
209
|
+
def collection_exists(self, collection: str) -> bool:
|
|
210
|
+
return self.client.collection_exists(collection)
|
|
211
|
+
|
|
212
|
+
def _create_collection(self, name: str):
|
|
213
|
+
assert self.config.EMBEDDING_DB_SIZE > 0
|
|
214
|
+
size = self.config.EMBEDDING_DB_SIZE
|
|
215
|
+
distance = Distance.COSINE
|
|
216
|
+
self.client.create_collection(
|
|
217
|
+
collection_name=name,
|
|
218
|
+
vectors_config=VectorParams(size=size, distance=distance),
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
def _get_collection(self, name: str, create: bool = False) -> CollectionInfo | None:
|
|
222
|
+
if not self.collection_exists(name):
|
|
223
|
+
if create:
|
|
224
|
+
self._create_collection(name)
|
|
225
|
+
else:
|
|
226
|
+
return None
|
|
227
|
+
return self.client.get_collection(name)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_microcore-4.0.0.dev22 → ai_microcore-4.0.0.dev23}/microcore/wrappers/llm_response_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|