erioon 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
erioon/create.py ADDED
@@ -0,0 +1,159 @@
1
+ from azure.storage.blob import ContainerClient
2
+ import uuid
3
+ import json
4
+ from erioon.functions import (
5
+ create_msgpack_file,
6
+ update_index_file_insert,
7
+ calculate_shard_number,
8
+ async_log
9
+ )
10
+
11
+ def get_index_data(user_id_cont, database, collection, container_url):
12
+ """
13
+ Retrieves the content of the index.json file that tracks which records are stored in which shards.
14
+
15
+ Args:
16
+ user_id_cont: User identifier or context.
17
+ database: Database name.
18
+ collection: Collection name.
19
+ container_url: Blob Storage container SAS URL.
20
+
21
+ Returns:
22
+ List of shard mappings (list of dicts) or empty list if file not found or error.
23
+ """
24
+ container_client = ContainerClient.from_container_url(container_url)
25
+ index_blob_client = container_client.get_blob_client(blob=f"{database}/{collection}/index.json")
26
+
27
+ try:
28
+ index_data = index_blob_client.download_blob().readall()
29
+ return json.loads(index_data) if index_data else []
30
+ except Exception:
31
+ return []
32
+
33
+ def is_duplicate_id(user_id_cont, database, collection, _id, container_url):
34
+ """
35
+ Checks if the given record _id is already present in the index.json across shards.
36
+
37
+ Args:
38
+ user_id_cont: User identifier.
39
+ database: Database name.
40
+ collection: Collection name.
41
+ _id: Record ID to check.
42
+ container_url: Blob Storage container SAS URL.
43
+
44
+ Returns:
45
+ True if _id exists in any shard, else False.
46
+ """
47
+ index_data = get_index_data(user_id_cont, database, collection, container_url)
48
+
49
+ for shard in index_data:
50
+ for shard_name, ids in shard.items():
51
+ if _id in ids:
52
+ return True
53
+ return False
54
+
55
+ def handle_insert_one(user_id_cont, database, collection, record, container_url):
56
+ """
57
+ Insert a single record into the collection.
58
+
59
+ - If no '_id' provided, generate a new UUID.
60
+ - If provided '_id' is duplicate, generate a new one and update the record.
61
+ - Create or append the record in a shard file.
62
+ - Update index.json to map the record to the appropriate shard.
63
+ - Log success or errors asynchronously.
64
+
65
+ Args:
66
+ user_id_cont: User identifier.
67
+ database: Database name.
68
+ collection: Collection name.
69
+ record: Dict representing the record to insert.
70
+ container_url: Blob Storage container SAS URL.
71
+
72
+ Returns:
73
+ Tuple (response dict, status code) indicating success or failure.
74
+ """
75
+ try:
76
+ if "_id" not in record or not record["_id"]:
77
+ record["_id"] = str(uuid.uuid4())
78
+
79
+ rec_id = record["_id"]
80
+
81
+ if is_duplicate_id(user_id_cont, database, collection, rec_id, container_url):
82
+ new_id = str(uuid.uuid4())
83
+ record["_id"] = new_id
84
+ rec_id = new_id
85
+ msg = f"Record inserted successfully in {collection} with a new _id {rec_id} because the provided _id was already present."
86
+ else:
87
+ msg = f"Record inserted successfully in {collection} with _id {rec_id}"
88
+
89
+ async_log(user_id_cont, database, collection, "POST", "SUCCESS", msg, 1, container_url)
90
+
91
+ create_msgpack_file(user_id_cont, database, collection, record, container_url)
92
+
93
+ shard_number = calculate_shard_number(user_id_cont, database, collection, container_url)
94
+ update_index_file_insert(user_id_cont, database, collection, rec_id, shard_number, container_url)
95
+
96
+ return {"status": "OK", "message": msg, "record": record}, 200
97
+
98
+ except Exception as e:
99
+ error_msg = f"An error occurred during insert in {collection}: {str(e)}"
100
+ async_log(user_id_cont, database, collection,"POST", "ERROR", error_msg, 1, container_url)
101
+ return {"status": "KO", "message": "Failed to insert record.", "error": str(e)}, 500
102
+
103
+ def handle_insert_many(user_id_cont, database, collection, data, container_url):
104
+ """
105
+ Insert multiple records in bulk.
106
+
107
+ - For each record:
108
+ - Ensure it has a unique _id (generate new UUID if missing or duplicate).
109
+ - Write the record to the appropriate shard.
110
+ - Update index.json with _id to shard mapping.
111
+ - Log the batch insert operation with details.
112
+ - Return aggregate success or failure response.
113
+
114
+ Args:
115
+ user_id_cont: User identifier.
116
+ database: Database name.
117
+ collection: Collection name.
118
+ data: Dict with key "records" containing list of record dicts.
119
+ container_url: Blob Storage container SAS URL.
120
+
121
+ Returns:
122
+ Tuple (response dict, status code) with summary of insert results.
123
+ """
124
+ insert_results = []
125
+ records = data.get("records", [])
126
+ count = len(records)
127
+
128
+ try:
129
+ for record in records:
130
+ if "_id" not in record or not record["_id"]:
131
+ record["_id"] = str(uuid.uuid4())
132
+
133
+ rec_id = record["_id"]
134
+
135
+ if is_duplicate_id(user_id_cont, database, collection, rec_id, container_url):
136
+ new_id = str(uuid.uuid4())
137
+ record["_id"] = new_id
138
+ rec_id = new_id
139
+ msg = f"Inserted with new _id {rec_id} (original _id was already present)."
140
+ else:
141
+ msg = f"Inserted with _id {rec_id}."
142
+
143
+ create_msgpack_file(user_id_cont, database, collection, record, container_url)
144
+
145
+ shard_number = calculate_shard_number(user_id_cont, database, collection, container_url)
146
+ update_index_file_insert(
147
+ user_id_cont, database, collection, rec_id, shard_number, container_url
148
+ )
149
+
150
+ insert_results.append({"_id": rec_id, "message": msg})
151
+
152
+ async_log(user_id_cont, database, collection, "POST", "SUCCESS", insert_results, count, container_url)
153
+ return {"success": "Records inserted successfully", "details": insert_results}, 200
154
+
155
+
156
+ except Exception as e:
157
+ general_error_msg = f"Unexpected error during bulk insert: {str(e)}"
158
+ async_log(user_id_cont, database, collection, "POST", "ERROR", general_error_msg, 1, container_url)
159
+ return {"status": "KO", "message": general_error_msg}, 500
erioon/database.py CHANGED
@@ -2,35 +2,66 @@ import json
2
2
  from erioon.collection import Collection
3
3
 
4
4
  class Database:
5
- def __init__(self, user_id, metadata, database=None, cluster=None):
5
+ def __init__(self, user_id, metadata, database=None, cluster=None, sas_url=None):
6
+ """
7
+ Initialize a Database instance.
8
+
9
+ Args:
10
+ user_id (str): The ID of the authenticated user.
11
+ metadata (dict): Metadata containing information about the database and its collections.
12
+ database (str, optional): The name or identifier of the database.
13
+ cluster (str, optional): The cluster where the database is hosted.
14
+ sas_url (str, optional): SAS URL for accessing Azure Blob Storage container.
15
+ """
6
16
  self.user_id = user_id
7
17
  self.metadata = metadata
8
18
  self.db_id = metadata.get("database_info", {}).get("_id")
9
19
  self.database = database
10
20
  self.cluster = cluster
21
+ self.sas_url = sas_url
11
22
 
12
23
  def __getitem__(self, collection_id):
13
- try:
14
- collections = self.metadata.get("database_info", {}).get("collections", {})
15
- coll_meta = collections.get(collection_id)
16
-
17
- if not coll_meta:
18
- return "No collection found"
19
-
20
- return Collection(
21
- user_id=self.user_id,
22
- db_id=self.db_id,
23
- coll_id=collection_id,
24
- metadata=coll_meta,
25
- database = self.database,
26
- cluster = self.cluster
27
- )
28
- except Exception:
29
- return "Connection error"
24
+ """
25
+ Enables dictionary-like access to collections within the database.
26
+
27
+ Args:
28
+ collection_id (str): Identifier of the collection to retrieve.
29
+
30
+ Returns:
31
+ Collection: An instance of the Collection class initialized with metadata.
32
+ str: Error message if the collection is not found.
33
+ """
34
+ collections = self.metadata.get("database_info", {}).get("collections", {})
35
+ coll_meta = collections.get(collection_id)
36
+
37
+ if not coll_meta:
38
+ return "No collection found"
39
+
40
+ return Collection(
41
+ user_id=self.user_id,
42
+ db_id=self.db_id,
43
+ coll_id=collection_id,
44
+ metadata=coll_meta,
45
+ database=self.database,
46
+ cluster=self.cluster,
47
+ sas_url=self.sas_url
48
+ )
30
49
 
31
50
  def __str__(self):
51
+ """
52
+ Returns a nicely formatted JSON string of the database metadata.
53
+ Useful for debugging and inspecting the database info.
54
+
55
+ Returns:
56
+ str: Pretty-printed JSON metadata.
57
+ """
32
58
  return json.dumps(self.metadata, indent=4)
33
59
 
34
60
  def __repr__(self):
35
- return f"<Database db_id={self.db_id}, cluster={self.cluster}, database={self.database}>"
61
+ """
62
+ Returns a concise, informative string representation of the Database instance.
36
63
 
64
+ Returns:
65
+ str: Formatted string showing the database ID, cluster, and database name.
66
+ """
67
+ return f"<Database db_id={self.db_id}, cluster={self.cluster}, database={self.database}>"
erioon/delete.py ADDED
@@ -0,0 +1,257 @@
1
+ import json
2
+ import io
3
+ import msgpack
4
+ from azure.storage.blob import ContainerClient
5
+ from erioon.functions import update_index_file_delete, check_nested_key, async_log
6
+
7
+ def handle_delete_one(user_id, db_id, coll_id, data_to_delete, container_url):
8
+ """
9
+ Delete a single record from a collection.
10
+
11
+ The record can be identified either by the unique '_id' field or by a nested key-value pair.
12
+
13
+ Args:
14
+ user_id: Identifier of the user performing the operation.
15
+ db_id: Database ID containing the collection.
16
+ coll_id: Collection ID.
17
+ data_to_delete: Dictionary containing either '_id' or key-value pair to match.
18
+ container_url: SAS URL pointing to the storage container.
19
+
20
+ Returns:
21
+ A tuple (response dict, status code) indicating success or failure.
22
+ """
23
+ if "_id" in data_to_delete:
24
+ record_id = data_to_delete["_id"]
25
+ return handle_delete_with_id(user_id, db_id, coll_id, record_id, container_url)
26
+ else:
27
+ return handle_delete_without_id(user_id, db_id, coll_id, data_to_delete, container_url)
28
+
29
+ def handle_delete_with_id(user_id, db_id, coll_id, record_id, container_url):
30
+ """
31
+ Delete a record exactly matching the given '_id'.
32
+
33
+ Steps:
34
+ - Parse container URL and create a ContainerClient.
35
+ - Load the index.json file which maps shards to record IDs.
36
+ - Locate the shard containing the target record_id.
37
+ - Download and unpack the shard blob.
38
+ - Remove the record from the shard data.
39
+ - Repack and upload the updated shard if record found.
40
+ - Update index.json to reflect deletion.
41
+ - Log success or errors asynchronously.
42
+
43
+ Args:
44
+ user_id, db_id, coll_id: Identifiers for user, database, and collection.
45
+ record_id: The unique '_id' of the record to delete.
46
+ container_url: Azure Blob Storage container SAS URL.
47
+
48
+ Returns:
49
+ Tuple (response dict, status code) indicating operation result.
50
+ """
51
+ parsed_url = container_url.split("?")
52
+ container_path = parsed_url[0].split("/")[-1]
53
+ sas_token = parsed_url[1] if len(parsed_url) > 1 else ""
54
+ container_client = ContainerClient.from_container_url(container_url)
55
+
56
+ index_blob_client = container_client.get_blob_client(f"{db_id}/{coll_id}/index.json")
57
+
58
+ if not index_blob_client.exists():
59
+ return {"error": "Index file does not exist"}, 404
60
+
61
+ index_data = json.loads(index_blob_client.download_blob().readall())
62
+ shard_number = None
63
+
64
+ for shard in index_data:
65
+ for shard_key, ids in shard.items():
66
+ if record_id in ids:
67
+ shard_number = int(shard_key.split("_")[-1])
68
+ break
69
+ if shard_number:
70
+ break
71
+
72
+ if shard_number is None:
73
+ async_log(user_id, db_id, coll_id, "DELETE", "ERROR", f"Record with _id {record_id} not found", 1, container_url)
74
+ return {"error": f"Record with _id {record_id} not found"}, 404
75
+
76
+ msgpack_blob_client = container_client.get_blob_client(f"{db_id}/{coll_id}/{coll_id}_{shard_number}.msgpack")
77
+
78
+ try:
79
+ msgpack_data = msgpack_blob_client.download_blob().readall()
80
+ with io.BytesIO(msgpack_data) as buffer:
81
+ records = []
82
+ original_length = 0
83
+
84
+ unpacked_data = msgpack.unpackb(buffer.read(), raw=False)
85
+ if isinstance(unpacked_data, list):
86
+ for record in unpacked_data:
87
+ original_length += 1
88
+ if record.get("_id") == record_id:
89
+ continue
90
+ records.append(record)
91
+
92
+ if len(records) < original_length:
93
+ with io.BytesIO() as out_file:
94
+ packed_data = msgpack.packb(records)
95
+ out_file.write(packed_data)
96
+ out_file.seek(0)
97
+ msgpack_blob_client.upload_blob(out_file, overwrite=True)
98
+
99
+ update_index_file_delete(user_id, db_id, coll_id, record_id, shard_number, container_url)
100
+ async_log(user_id, db_id, coll_id, "DELETE", "SUCCESS", f"Record with _id {record_id} deleted successfully", 1, container_url)
101
+ return {"success": f"Record with _id {record_id} deleted successfully"}, 200
102
+ else:
103
+ async_log(user_id, db_id, coll_id, "DELETE", "ERROR", f"Record with _id {record_id} not found in shard", 1, container_url)
104
+ return {"error": f"Record with _id {record_id} not found in shard"}, 404
105
+
106
+ except Exception as e:
107
+ async_log(user_id, db_id, coll_id, "DELETE", "ERROR", f"Error deleting record {record_id}: {str(e)}", 1, container_url)
108
+ return {"error": f"Error deleting record {record_id}: {str(e)}"}, 500
109
+
110
+ def handle_delete_without_id(user_id, db_id, coll_id, data_to_delete, container_url):
111
+ """
112
+ Delete records matching a nested key-value pair when '_id' is not provided.
113
+
114
+ Steps:
115
+ - Extract the single key-value pair to search for.
116
+ - List all shards in the collection.
117
+ - Download and unpack each shard, check each record for matching key-value.
118
+ - Collect all matching record '_id's.
119
+ - If no matches found, return error.
120
+ - For each matched '_id', call `handle_delete_with_id` to delete it.
121
+ - Return summary of deleted record count.
122
+
123
+ Args:
124
+ user_id, db_id, coll_id: Identifiers for user, database, and collection.
125
+ data_to_delete: Dict with one nested key-value pair to match.
126
+ container_url: Blob storage container SAS URL.
127
+
128
+ Returns:
129
+ Tuple (response dict, status code) with success or error message.
130
+ """
131
+ container_client = ContainerClient.from_container_url(container_url)
132
+
133
+ nested_key = list(data_to_delete.keys())[0]
134
+ key, value = nested_key, data_to_delete[nested_key]
135
+
136
+ coll_id_data = []
137
+ directory_path = f"{db_id}/{coll_id}/"
138
+ blob_list = container_client.list_blobs(name_starts_with=directory_path)
139
+
140
+ for blob in blob_list:
141
+ if blob.name.endswith(".msgpack"):
142
+ try:
143
+ blob_client = container_client.get_blob_client(blob.name)
144
+ msgpack_data = blob_client.download_blob().readall()
145
+
146
+ with io.BytesIO(msgpack_data) as buffer:
147
+ unpacked_data = msgpack.unpackb(buffer.read(), raw=False)
148
+ if isinstance(unpacked_data, list):
149
+ for record in unpacked_data:
150
+ if check_nested_key(record, key, value):
151
+ coll_id_data.append(record["_id"])
152
+ except Exception:
153
+ continue
154
+
155
+ if not coll_id_data:
156
+ async_log(user_id, db_id, coll_id, "DELETE", "ERROR", f"No matching records found for key-value pair", 1, container_url)
157
+ return {"error": "No matching records found for the specified key-value pair"}, 404
158
+
159
+ for record_id in coll_id_data:
160
+ try:
161
+ handle_delete_with_id(user_id, db_id, coll_id, record_id, container_url)
162
+ except Exception:
163
+ continue
164
+
165
+ count = len(coll_id_data)
166
+ return (
167
+ {"success": f"{count} record(s) '{key}':'{value}' deleted successfully"},
168
+ 200
169
+ )
170
+
171
+ def handle_delete_many(user_id, db_id, coll_id, data_to_delete_list, container_url, batch_size=10):
172
+ """
173
+ Delete multiple records in batches to improve performance and error management.
174
+
175
+ For each batch of deletion queries:
176
+ - Determine whether to delete by '_id' or key-value.
177
+ - Collect individual successes and errors.
178
+ - Aggregate batch results.
179
+
180
+ Args:
181
+ user_id, db_id, coll_id: Identifiers for user, database, collection.
182
+ data_to_delete_list: List of dicts, each with '_id' or nested key-value pair.
183
+ container_url: storage container SAS URL.
184
+ batch_size: Number of deletions processed per batch.
185
+
186
+ Returns:
187
+ Tuple (response dict, status code) with aggregated success or error info.
188
+ """
189
+ batch_results = []
190
+
191
+ for i in range(0, len(data_to_delete_list), batch_size):
192
+ batch = data_to_delete_list[i : i + batch_size]
193
+ batch_success = []
194
+ batch_errors = []
195
+
196
+ for data_to_delete in batch:
197
+ try:
198
+ if "_id" in data_to_delete:
199
+ record_id = data_to_delete["_id"]
200
+ result = handle_delete_with_id(
201
+ user_id, db_id, coll_id, record_id, container_url
202
+ )
203
+ else:
204
+ result = handle_delete_without_id(
205
+ user_id, db_id, coll_id, data_to_delete, container_url
206
+ )
207
+
208
+ if result is not None:
209
+ response, status_code = result
210
+ if status_code == 200:
211
+ batch_success.append(
212
+ {
213
+ "delete_query": data_to_delete,
214
+ "message": response.get(
215
+ "success", "Record deleted successfully"
216
+ ),
217
+ }
218
+ )
219
+ else:
220
+ batch_errors.append(
221
+ {
222
+ "delete_query": data_to_delete,
223
+ "error": response.get(
224
+ "error",
225
+ f"Failed to delete record - Status code {status_code}",
226
+ ),
227
+ }
228
+ )
229
+ else:
230
+ batch_errors.append(
231
+ {
232
+ "delete_query": data_to_delete,
233
+ "error": "No result returned from delete function",
234
+ }
235
+ )
236
+
237
+ except Exception as e:
238
+ batch_errors.append({"delete_query": data_to_delete, "error": str(e)})
239
+
240
+ batch_results.append(
241
+ {"queries": len(batch), "success": batch_success, "errors": batch_errors}
242
+ )
243
+
244
+ total_success = sum(len(batch["success"]) for batch in batch_results)
245
+ total_errors = sum(len(batch["errors"]) for batch in batch_results)
246
+
247
+ if total_errors == 0:
248
+ return (
249
+ {
250
+ "success": f"Selected records deleted successfully",
251
+ "details": batch_results,
252
+ }
253
+ ), 200
254
+ else:
255
+ return (
256
+ {"error": f"Error deleting selected records", "details": batch_results}
257
+ ), 500