erioon 0.0.8__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: erioon
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: Erioon SDF for Python
5
5
  Author: Zyber Pireci
6
6
  Author-email: zyber.pireci@erioon.com
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  from urllib.parse import urlparse
3
- from erioon.read import handle_get_all, handle_get_data
4
- from erioon.create import handle_insert_one, handle_insert_many
3
+ from erioon.read import handle_get_all, handle_get_data, handle_classify_vector
4
+ from erioon.create import handle_insert_one, handle_insert_many, handle_vector, handle_insert_many_vectors
5
5
  from erioon.delete import handle_delete_one, handle_delete_many
6
6
  from erioon.update import handle_update_query
7
7
  from erioon.ping import handle_connection_ping
@@ -223,6 +223,70 @@ class Collection:
223
223
  coll_id=self.coll_id,
224
224
  container_url=self.container_url,
225
225
  )
226
+
227
+ def insert_one_vector(self, vector_data, metadata):
228
+ """
229
+ Insert a single record into the collection.
230
+
231
+ Args:
232
+ record (dict): Record to insert.
233
+
234
+ Returns:
235
+ tuple: (response message, HTTP status code)
236
+ """
237
+ if self._is_read_only():
238
+ return self._read_only_response()
239
+ return handle_vector(
240
+ user_id_cont=self.user_id,
241
+ database=self.db_id,
242
+ collection=self.coll_id,
243
+ vector=vector_data,
244
+ metadata=metadata,
245
+ container_url=self.container_url,
246
+ )
247
+
248
+ def insert_many_vectors(self, records):
249
+ """
250
+ Insert multiple vector records into the collection.
251
+
252
+ Args:
253
+ records (list): List of dicts, each with keys 'vector', 'metadata', and optional '_id'.
254
+
255
+ Returns:
256
+ tuple: (response message, HTTP status code)
257
+ """
258
+ if self._is_read_only():
259
+ return self._read_only_response()
260
+
261
+ return handle_insert_many_vectors(
262
+ user_id_cont=self.user_id,
263
+ database=self.db_id,
264
+ collection=self.coll_id,
265
+ records=records,
266
+ container_url=self.container_url,
267
+ )
268
+
269
+ def classify_vector(self, k=3):
270
+ """
271
+ Retrieve all vector records from the collection and classify them using k-NN.
272
+
273
+ Args:
274
+ k (int): Number of neighbors to use for classification.
275
+
276
+ Returns:
277
+ tuple: (response message, HTTP status code)
278
+ """
279
+ if self._is_read_only():
280
+ return self._read_only_response()
281
+
282
+ return handle_classify_vector(
283
+ user_id=self.user_id,
284
+ db_id=self.db_id,
285
+ coll_id=self.coll_id,
286
+ container_url=self.container_url,
287
+ k=k
288
+ )
289
+
226
290
 
227
291
  def __str__(self):
228
292
  """Pretty print the collection metadata."""
@@ -157,3 +157,109 @@ def handle_insert_many(user_id_cont, database, collection, data, container_url):
157
157
  general_error_msg = f"Unexpected error during bulk insert: {str(e)}"
158
158
  async_log(user_id_cont, database, collection, "POST", "ERROR", general_error_msg, 1, container_url)
159
159
  return {"status": "KO", "message": general_error_msg}, 500
160
+
161
+ def handle_vector(user_id_cont, database, collection, vector, metadata, container_url, _id=None):
162
+ """
163
+ Inserts a vector embedding with its metadata into the blob-based vector DB.
164
+
165
+ Args:
166
+ user_id_cont: User identifier or context.
167
+ database: Database name.
168
+ collection: Collection name.
169
+ vector: List of floats representing the embedding.
170
+ metadata: Dictionary of associated metadata.
171
+ container_url: Azure Blob container SAS URL.
172
+ _id: Optional. Custom record ID. If None, UUID is generated.
173
+
174
+ Returns:
175
+ Tuple of (response dict, status code).
176
+ """
177
+ try:
178
+ if not isinstance(vector, list) or not all(isinstance(v, (float, int)) for v in vector):
179
+ return {"status": "KO", "message": "Invalid vector format. Must be a list of floats."}, 400
180
+
181
+ record = {
182
+ "_id": _id or str(uuid.uuid4()),
183
+ "vector": vector,
184
+ "metadata": metadata or {}
185
+ }
186
+
187
+ return handle_insert_one(user_id_cont, database, collection, record, container_url)
188
+
189
+ except Exception as e:
190
+ error_msg = f"Error in handle_vector: {str(e)}"
191
+ async_log(user_id_cont, database, collection, "POST", "ERROR", error_msg, 1, container_url)
192
+ return {"status": "KO", "message": "Failed to insert vector record.", "error": str(e)}, 500
193
+
194
+
195
+ def handle_insert_many_vectors(user_id_cont, database, collection, records, container_url):
196
+ """
197
+ Bulk insert multiple vector embedding records with metadata into the blob-based vector DB.
198
+
199
+ Args:
200
+ user_id_cont: User identifier or context.
201
+ database: Database name.
202
+ collection: Collection name.
203
+ records: List of dicts, each with keys 'vector', 'metadata', and optional '_id'.
204
+ container_url: Azure Blob container SAS URL.
205
+
206
+ Returns:
207
+ Tuple of (response dict, status code).
208
+ """
209
+ insert_results = []
210
+
211
+ try:
212
+ for record in records:
213
+ vector = record.get("vector")
214
+ metadata = record.get("metadata", {})
215
+ _id = record.get("_id")
216
+
217
+ # Validate vector format
218
+ if not isinstance(vector, list) or not all(isinstance(v, (float, int)) for v in vector):
219
+ insert_results.append({
220
+ "_id": _id,
221
+ "status": "KO",
222
+ "message": "Invalid vector format. Must be a list of floats."
223
+ })
224
+ continue
225
+
226
+ # Build record to insert
227
+ rec_to_insert = {
228
+ "_id": _id or str(uuid.uuid4()),
229
+ "vector": vector,
230
+ "metadata": metadata,
231
+ }
232
+
233
+ # Insert single vector record
234
+ response, status_code = handle_insert_one(
235
+ user_id_cont,
236
+ database,
237
+ collection,
238
+ rec_to_insert,
239
+ container_url
240
+ )
241
+
242
+ insert_results.append({
243
+ "_id": rec_to_insert["_id"],
244
+ "status": "OK" if status_code == 200 else "KO",
245
+ "message": response.get("message", "") if isinstance(response, dict) else str(response)
246
+ })
247
+
248
+ # Log bulk insert operation asynchronously
249
+ async_log(
250
+ user_id_cont,
251
+ database,
252
+ collection,
253
+ "POST",
254
+ "SUCCESS",
255
+ insert_results,
256
+ len(records),
257
+ container_url
258
+ )
259
+
260
+ return {"success": "Bulk vector insert completed.", "details": insert_results}, 200
261
+
262
+ except Exception as e:
263
+ error_msg = f"Unexpected error during bulk vector insert: {str(e)}"
264
+ async_log(user_id_cont, database, collection, "POST", "ERROR", error_msg, 1, container_url)
265
+ return {"status": "KO", "message": error_msg}, 500
@@ -2,6 +2,7 @@ import io
2
2
  import msgpack
3
3
  from azure.storage.blob import ContainerClient
4
4
  from erioon.functions import async_log
5
+ from sklearn.neighbors import KNeighborsClassifier
5
6
 
6
7
 
7
8
  def handle_get_all(user_id, db_id, coll_id, limit, container_url):
@@ -181,3 +182,60 @@ def handle_get_data(user_id, db_id, coll_id, search_criteria, limit, container_u
181
182
 
182
183
  async_log(user_id, db_id, coll_id, "GET", "ERROR", "No matching record found", 1, container_url)
183
184
  return {"status": "KO", "count": 0, "error": "No matching record found"}, 404
185
+
186
+
187
+ def handle_classify_vector(user_id, db_id, coll_id, container_url, k=3):
188
+ # 1. Retrieve all data from collection
189
+ response, status = handle_get_all(user_id, db_id, coll_id, limit=10000, container_url=container_url)
190
+ if status != 200:
191
+ return {"status": "KO", "message": "Failed to fetch data for classification", "error": response.get("error", "")}, status
192
+
193
+ records = response.get("results", [])
194
+ if not records:
195
+ return {"status": "KO", "message": "No data found for classification"}, 404
196
+
197
+ # 2. Prepare dataset for classification
198
+ vectors = []
199
+ labels = []
200
+ unknown_vectors = []
201
+ unknown_ids = []
202
+
203
+ for rec in records:
204
+ vec = rec.get("vector")
205
+ meta = rec.get("metadata", {})
206
+ if not vec:
207
+ continue
208
+ vectors.append(vec)
209
+ labels.append(meta.get("class", "unknown"))
210
+
211
+ # If some records don’t have classes, you might want to separate them or remove them
212
+ known_vectors = []
213
+ known_labels = []
214
+ for v, l in zip(vectors, labels):
215
+ if l != "unknown" and l is not None:
216
+ known_vectors.append(v)
217
+ known_labels.append(l)
218
+ else:
219
+ unknown_vectors.append(v)
220
+ unknown_ids.append(rec.get("_id"))
221
+
222
+ if not known_vectors:
223
+ return {"status": "KO", "message": "No labeled data for training classification"}, 404
224
+
225
+ # 3. Train k-NN classifier on known labeled vectors
226
+ knn = KNeighborsClassifier(n_neighbors=k)
227
+ knn.fit(known_vectors, known_labels)
228
+
229
+ # 4. Predict class for unknown vectors (if any)
230
+ predictions = {}
231
+ if unknown_vectors:
232
+ predicted_labels = knn.predict(unknown_vectors)
233
+ for _id, pred in zip(unknown_ids, predicted_labels):
234
+ predictions[_id] = pred
235
+
236
+ # 5. Return predictions (or full classification result)
237
+ return {
238
+ "status": "OK",
239
+ "message": f"Classification done on {len(unknown_vectors)} unknown vectors",
240
+ "predictions": predictions,
241
+ }, 200
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: erioon
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: Erioon SDF for Python
5
5
  Author: Zyber Pireci
6
6
  Author-email: zyber.pireci@erioon.com
@@ -2,7 +2,7 @@ from setuptools import setup
2
2
 
3
3
  setup(
4
4
  name='erioon',
5
- version='0.0.8',
5
+ version='0.0.9',
6
6
  author='Zyber Pireci',
7
7
  author_email='zyber.pireci@erioon.com',
8
8
  description='Erioon SDF for Python',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes