erioon 0.0.8__tar.gz → 0.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {erioon-0.0.8 → erioon-0.0.9}/PKG-INFO +1 -1
- {erioon-0.0.8 → erioon-0.0.9}/erioon/collection.py +66 -2
- {erioon-0.0.8 → erioon-0.0.9}/erioon/create.py +106 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon/read.py +58 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon.egg-info/PKG-INFO +1 -1
- {erioon-0.0.8 → erioon-0.0.9}/setup.py +1 -1
- {erioon-0.0.8 → erioon-0.0.9}/LICENSE +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/README.md +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon/auth.py +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon/client.py +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon/database.py +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon/delete.py +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon/functions.py +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon/ping.py +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon/update.py +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon.egg-info/SOURCES.txt +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon.egg-info/dependency_links.txt +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon.egg-info/requires.txt +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/erioon.egg-info/top_level.txt +0 -0
- {erioon-0.0.8 → erioon-0.0.9}/setup.cfg +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
import json
|
2
2
|
from urllib.parse import urlparse
|
3
|
-
from erioon.read import handle_get_all, handle_get_data
|
4
|
-
from erioon.create import handle_insert_one, handle_insert_many
|
3
|
+
from erioon.read import handle_get_all, handle_get_data, handle_classify_vector
|
4
|
+
from erioon.create import handle_insert_one, handle_insert_many, handle_vector, handle_insert_many_vectors
|
5
5
|
from erioon.delete import handle_delete_one, handle_delete_many
|
6
6
|
from erioon.update import handle_update_query
|
7
7
|
from erioon.ping import handle_connection_ping
|
@@ -223,6 +223,70 @@ class Collection:
|
|
223
223
|
coll_id=self.coll_id,
|
224
224
|
container_url=self.container_url,
|
225
225
|
)
|
226
|
+
|
227
|
+
def insert_one_vector(self, vector_data, metadata):
|
228
|
+
"""
|
229
|
+
Insert a single record into the collection.
|
230
|
+
|
231
|
+
Args:
|
232
|
+
record (dict): Record to insert.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
tuple: (response message, HTTP status code)
|
236
|
+
"""
|
237
|
+
if self._is_read_only():
|
238
|
+
return self._read_only_response()
|
239
|
+
return handle_vector(
|
240
|
+
user_id_cont=self.user_id,
|
241
|
+
database=self.db_id,
|
242
|
+
collection=self.coll_id,
|
243
|
+
vector=vector_data,
|
244
|
+
metadata=metadata,
|
245
|
+
container_url=self.container_url,
|
246
|
+
)
|
247
|
+
|
248
|
+
def insert_many_vectors(self, records):
|
249
|
+
"""
|
250
|
+
Insert multiple vector records into the collection.
|
251
|
+
|
252
|
+
Args:
|
253
|
+
records (list): List of dicts, each with keys 'vector', 'metadata', and optional '_id'.
|
254
|
+
|
255
|
+
Returns:
|
256
|
+
tuple: (response message, HTTP status code)
|
257
|
+
"""
|
258
|
+
if self._is_read_only():
|
259
|
+
return self._read_only_response()
|
260
|
+
|
261
|
+
return handle_insert_many_vectors(
|
262
|
+
user_id_cont=self.user_id,
|
263
|
+
database=self.db_id,
|
264
|
+
collection=self.coll_id,
|
265
|
+
records=records,
|
266
|
+
container_url=self.container_url,
|
267
|
+
)
|
268
|
+
|
269
|
+
def classify_vector(self, k=3):
|
270
|
+
"""
|
271
|
+
Retrieve all vector records from the collection and classify them using k-NN.
|
272
|
+
|
273
|
+
Args:
|
274
|
+
k (int): Number of neighbors to use for classification.
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
tuple: (response message, HTTP status code)
|
278
|
+
"""
|
279
|
+
if self._is_read_only():
|
280
|
+
return self._read_only_response()
|
281
|
+
|
282
|
+
return handle_classify_vector(
|
283
|
+
user_id=self.user_id,
|
284
|
+
db_id=self.db_id,
|
285
|
+
coll_id=self.coll_id,
|
286
|
+
container_url=self.container_url,
|
287
|
+
k=k
|
288
|
+
)
|
289
|
+
|
226
290
|
|
227
291
|
def __str__(self):
|
228
292
|
"""Pretty print the collection metadata."""
|
@@ -157,3 +157,109 @@ def handle_insert_many(user_id_cont, database, collection, data, container_url):
|
|
157
157
|
general_error_msg = f"Unexpected error during bulk insert: {str(e)}"
|
158
158
|
async_log(user_id_cont, database, collection, "POST", "ERROR", general_error_msg, 1, container_url)
|
159
159
|
return {"status": "KO", "message": general_error_msg}, 500
|
160
|
+
|
161
|
+
def handle_vector(user_id_cont, database, collection, vector, metadata, container_url, _id=None):
|
162
|
+
"""
|
163
|
+
Inserts a vector embedding with its metadata into the blob-based vector DB.
|
164
|
+
|
165
|
+
Args:
|
166
|
+
user_id_cont: User identifier or context.
|
167
|
+
database: Database name.
|
168
|
+
collection: Collection name.
|
169
|
+
vector: List of floats representing the embedding.
|
170
|
+
metadata: Dictionary of associated metadata.
|
171
|
+
container_url: Azure Blob container SAS URL.
|
172
|
+
_id: Optional. Custom record ID. If None, UUID is generated.
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
Tuple of (response dict, status code).
|
176
|
+
"""
|
177
|
+
try:
|
178
|
+
if not isinstance(vector, list) or not all(isinstance(v, (float, int)) for v in vector):
|
179
|
+
return {"status": "KO", "message": "Invalid vector format. Must be a list of floats."}, 400
|
180
|
+
|
181
|
+
record = {
|
182
|
+
"_id": _id or str(uuid.uuid4()),
|
183
|
+
"vector": vector,
|
184
|
+
"metadata": metadata or {}
|
185
|
+
}
|
186
|
+
|
187
|
+
return handle_insert_one(user_id_cont, database, collection, record, container_url)
|
188
|
+
|
189
|
+
except Exception as e:
|
190
|
+
error_msg = f"Error in handle_vector: {str(e)}"
|
191
|
+
async_log(user_id_cont, database, collection, "POST", "ERROR", error_msg, 1, container_url)
|
192
|
+
return {"status": "KO", "message": "Failed to insert vector record.", "error": str(e)}, 500
|
193
|
+
|
194
|
+
|
195
|
+
def handle_insert_many_vectors(user_id_cont, database, collection, records, container_url):
|
196
|
+
"""
|
197
|
+
Bulk insert multiple vector embedding records with metadata into the blob-based vector DB.
|
198
|
+
|
199
|
+
Args:
|
200
|
+
user_id_cont: User identifier or context.
|
201
|
+
database: Database name.
|
202
|
+
collection: Collection name.
|
203
|
+
records: List of dicts, each with keys 'vector', 'metadata', and optional '_id'.
|
204
|
+
container_url: Azure Blob container SAS URL.
|
205
|
+
|
206
|
+
Returns:
|
207
|
+
Tuple of (response dict, status code).
|
208
|
+
"""
|
209
|
+
insert_results = []
|
210
|
+
|
211
|
+
try:
|
212
|
+
for record in records:
|
213
|
+
vector = record.get("vector")
|
214
|
+
metadata = record.get("metadata", {})
|
215
|
+
_id = record.get("_id")
|
216
|
+
|
217
|
+
# Validate vector format
|
218
|
+
if not isinstance(vector, list) or not all(isinstance(v, (float, int)) for v in vector):
|
219
|
+
insert_results.append({
|
220
|
+
"_id": _id,
|
221
|
+
"status": "KO",
|
222
|
+
"message": "Invalid vector format. Must be a list of floats."
|
223
|
+
})
|
224
|
+
continue
|
225
|
+
|
226
|
+
# Build record to insert
|
227
|
+
rec_to_insert = {
|
228
|
+
"_id": _id or str(uuid.uuid4()),
|
229
|
+
"vector": vector,
|
230
|
+
"metadata": metadata,
|
231
|
+
}
|
232
|
+
|
233
|
+
# Insert single vector record
|
234
|
+
response, status_code = handle_insert_one(
|
235
|
+
user_id_cont,
|
236
|
+
database,
|
237
|
+
collection,
|
238
|
+
rec_to_insert,
|
239
|
+
container_url
|
240
|
+
)
|
241
|
+
|
242
|
+
insert_results.append({
|
243
|
+
"_id": rec_to_insert["_id"],
|
244
|
+
"status": "OK" if status_code == 200 else "KO",
|
245
|
+
"message": response.get("message", "") if isinstance(response, dict) else str(response)
|
246
|
+
})
|
247
|
+
|
248
|
+
# Log bulk insert operation asynchronously
|
249
|
+
async_log(
|
250
|
+
user_id_cont,
|
251
|
+
database,
|
252
|
+
collection,
|
253
|
+
"POST",
|
254
|
+
"SUCCESS",
|
255
|
+
insert_results,
|
256
|
+
len(records),
|
257
|
+
container_url
|
258
|
+
)
|
259
|
+
|
260
|
+
return {"success": "Bulk vector insert completed.", "details": insert_results}, 200
|
261
|
+
|
262
|
+
except Exception as e:
|
263
|
+
error_msg = f"Unexpected error during bulk vector insert: {str(e)}"
|
264
|
+
async_log(user_id_cont, database, collection, "POST", "ERROR", error_msg, 1, container_url)
|
265
|
+
return {"status": "KO", "message": error_msg}, 500
|
@@ -2,6 +2,7 @@ import io
|
|
2
2
|
import msgpack
|
3
3
|
from azure.storage.blob import ContainerClient
|
4
4
|
from erioon.functions import async_log
|
5
|
+
from sklearn.neighbors import KNeighborsClassifier
|
5
6
|
|
6
7
|
|
7
8
|
def handle_get_all(user_id, db_id, coll_id, limit, container_url):
|
@@ -181,3 +182,60 @@ def handle_get_data(user_id, db_id, coll_id, search_criteria, limit, container_u
|
|
181
182
|
|
182
183
|
async_log(user_id, db_id, coll_id, "GET", "ERROR", "No matching record found", 1, container_url)
|
183
184
|
return {"status": "KO", "count": 0, "error": "No matching record found"}, 404
|
185
|
+
|
186
|
+
|
187
|
+
def handle_classify_vector(user_id, db_id, coll_id, container_url, k=3):
|
188
|
+
# 1. Retrieve all data from collection
|
189
|
+
response, status = handle_get_all(user_id, db_id, coll_id, limit=10000, container_url=container_url)
|
190
|
+
if status != 200:
|
191
|
+
return {"status": "KO", "message": "Failed to fetch data for classification", "error": response.get("error", "")}, status
|
192
|
+
|
193
|
+
records = response.get("results", [])
|
194
|
+
if not records:
|
195
|
+
return {"status": "KO", "message": "No data found for classification"}, 404
|
196
|
+
|
197
|
+
# 2. Prepare dataset for classification
|
198
|
+
vectors = []
|
199
|
+
labels = []
|
200
|
+
unknown_vectors = []
|
201
|
+
unknown_ids = []
|
202
|
+
|
203
|
+
for rec in records:
|
204
|
+
vec = rec.get("vector")
|
205
|
+
meta = rec.get("metadata", {})
|
206
|
+
if not vec:
|
207
|
+
continue
|
208
|
+
vectors.append(vec)
|
209
|
+
labels.append(meta.get("class", "unknown"))
|
210
|
+
|
211
|
+
# If some records don’t have classes, you might want to separate them or remove them
|
212
|
+
known_vectors = []
|
213
|
+
known_labels = []
|
214
|
+
for v, l in zip(vectors, labels):
|
215
|
+
if l != "unknown" and l is not None:
|
216
|
+
known_vectors.append(v)
|
217
|
+
known_labels.append(l)
|
218
|
+
else:
|
219
|
+
unknown_vectors.append(v)
|
220
|
+
unknown_ids.append(rec.get("_id"))
|
221
|
+
|
222
|
+
if not known_vectors:
|
223
|
+
return {"status": "KO", "message": "No labeled data for training classification"}, 404
|
224
|
+
|
225
|
+
# 3. Train k-NN classifier on known labeled vectors
|
226
|
+
knn = KNeighborsClassifier(n_neighbors=k)
|
227
|
+
knn.fit(known_vectors, known_labels)
|
228
|
+
|
229
|
+
# 4. Predict class for unknown vectors (if any)
|
230
|
+
predictions = {}
|
231
|
+
if unknown_vectors:
|
232
|
+
predicted_labels = knn.predict(unknown_vectors)
|
233
|
+
for _id, pred in zip(unknown_ids, predicted_labels):
|
234
|
+
predictions[_id] = pred
|
235
|
+
|
236
|
+
# 5. Return predictions (or full classification result)
|
237
|
+
return {
|
238
|
+
"status": "OK",
|
239
|
+
"message": f"Classification done on {len(unknown_vectors)} unknown vectors",
|
240
|
+
"predictions": predictions,
|
241
|
+
}, 200
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|