erioon 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
erioon/functions.py ADDED
@@ -0,0 +1,422 @@
1
+ # Copyright 2025-present Erioon, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # Visit www.erioon.com/dev-docs for more information about the python SDK
15
+
16
+ import msgpack
17
+ from azure.storage.blob import ContainerClient
18
+ import json
19
+ import uuid
20
+ from io import BytesIO
21
+ import datetime
22
+ from threading import Thread
23
+
24
+ # CREATE BLOB CONTAINER
25
+ def create_container_if_not_exists(container_name, container_url):
26
+ """
27
+ Checks if the Blob Storage container exists; if not, creates it.
28
+
29
+ Args:
30
+ container_name: Name of the container to check/create.
31
+ container_url: SAS URL to the blob container.
32
+ """
33
+ container_client = ContainerClient.from_container_url(container_url)
34
+ if not container_client.exists():
35
+ container_client.create_container()
36
+
37
+ # GET SHARD FILENAME
38
+ def get_shard_file_name(user_id_cont, database, collection, container_url, next_shard_number=False):
39
+ """
40
+ Determines the filename of the current (or next) shard MessagePack file for writing data.
41
+
42
+ The filename format is: {database}/{collection}/{collection}_{shard_number}.msgpack
43
+
44
+ Args:
45
+ user_id_cont: User identifier/context.
46
+ database: Database name.
47
+ collection: Collection name.
48
+ container_url: Blob Storage container SAS URL.
49
+ next_shard_number: If True, returns filename for the next shard (increment shard number).
50
+
51
+ Returns:
52
+ Filename string of the shard to be used.
53
+ """
54
+ container_client = ContainerClient.from_container_url(container_url)
55
+
56
+ base_shard_name = f"{database}/{collection}/{collection}"
57
+
58
+ files = container_client.list_blobs(name_starts_with=base_shard_name)
59
+ existing_shards = [int(blob.name.split('_')[-1].split('.')[0]) for blob in files if blob.name.endswith('.msgpack')]
60
+
61
+ if existing_shards:
62
+ next_shard = max(existing_shards) + 1 if next_shard_number else max(existing_shards)
63
+ else:
64
+ next_shard = 1
65
+
66
+ return f"{base_shard_name}_{next_shard}.msgpack"
67
+
68
+ # GET SHARD LIMIT
69
+ def get_shard_limit(user_id_cont, database, collection, container_url):
70
+ """
71
+ Retrieves the maximum number of records allowed in a single shard from the
72
+ collection_settings.json file, or returns a default limit if file doesn't exist.
73
+
74
+ Args:
75
+ user_id_cont: User identifier/context.
76
+ database: Database name.
77
+ collection: Collection name.
78
+ container_url: Blob Storage container SAS URL.
79
+
80
+ Returns:
81
+ Integer shard limit (default 100000).
82
+ """
83
+ container_client = ContainerClient.from_container_url(container_url)
84
+ config_blob_client = container_client.get_blob_client(blob=f"{database}/{collection}/collection_settings.json")
85
+
86
+ if not config_blob_client.exists():
87
+ return 100000
88
+
89
+ config_data = json.loads(config_blob_client.download_blob().readall())
90
+ return config_data.get("shard_limit", 100000)
91
+
92
+ # CREATE MSGPACK FILE
93
+ def create_msgpack_file(user_id_cont, database, collection, data, container_url):
94
+ """
95
+ Writes the given record data into the appropriate MessagePack shard file.
96
+ Automatically manages shard rollover based on shard size limit.
97
+
98
+ Args:
99
+ user_id_cont: User identifier/context.
100
+ database: Database name.
101
+ collection: Collection name.
102
+ data: The record data dict to store.
103
+ container_url: Blob Storage container SAS URL.
104
+
105
+ Returns:
106
+ The filename of the shard where the record was stored.
107
+ """
108
+ container_client = ContainerClient.from_container_url(container_url)
109
+
110
+ msgpack_filename = get_shard_file_name(user_id_cont, database, collection, container_url)
111
+
112
+ msgpack_blob_client = container_client.get_blob_client(blob=msgpack_filename)
113
+
114
+ existing_records = []
115
+ max_records_per_shard = get_shard_limit(user_id_cont, database, collection, container_url)
116
+
117
+ if msgpack_blob_client.exists():
118
+ with BytesIO(msgpack_blob_client.download_blob().readall()) as existing_file:
119
+ existing_records = msgpack.unpackb(existing_file.read(), raw=False)
120
+
121
+ if len(existing_records) >= max_records_per_shard:
122
+ msgpack_filename = get_shard_file_name(user_id_cont, database, collection, container_url, next_shard_number=True)
123
+ msgpack_blob_client = container_client.get_blob_client(blob=msgpack_filename)
124
+ existing_records = []
125
+
126
+ existing_records.append(data)
127
+
128
+ with BytesIO() as out_file:
129
+ out_file.write(msgpack.packb(existing_records, use_bin_type=True))
130
+ out_file.seek(0)
131
+ msgpack_blob_client.upload_blob(out_file, overwrite=True)
132
+
133
+ return msgpack_filename
134
+
135
+ # GET INDEX OF DOCUMENTS
136
+ def get_index_data(user_id_cont, database, collection, container_url):
137
+ """
138
+ Retrieves the content of the index.json file that tracks which records are stored in which shards.
139
+
140
+ Args:
141
+ user_id_cont: User identifier or context.
142
+ database: Database name.
143
+ collection: Collection name.
144
+ container_url: Blob Storage container SAS URL.
145
+
146
+ Returns:
147
+ List of shard mappings (list of dicts) or empty list if file not found or error.
148
+ """
149
+ container_client = ContainerClient.from_container_url(container_url)
150
+ index_blob_client = container_client.get_blob_client(blob=f"{database}/{collection}/index.json")
151
+
152
+ try:
153
+ index_data = index_blob_client.download_blob().readall()
154
+ return json.loads(index_data) if index_data else []
155
+ except Exception:
156
+ return []
157
+
158
+ # CHECK DUPLICATE IDs
159
+ def is_duplicate_id(user_id_cont, database, collection, _id, container_url):
160
+ """
161
+ Checks if the given record _id is already present in the index.json across shards.
162
+
163
+ Args:
164
+ user_id_cont: User identifier.
165
+ database: Database name.
166
+ collection: Collection name.
167
+ _id: Record ID to check.
168
+ container_url: Blob Storage container SAS URL.
169
+
170
+ Returns:
171
+ True if _id exists in any shard, else False.
172
+ """
173
+ index_data = get_index_data(user_id_cont, database, collection, container_url)
174
+
175
+ for shard in index_data:
176
+ for shard_name, ids in shard.items():
177
+ if _id in ids:
178
+ return True
179
+ return False
180
+
181
+ # SAVE LOGS
182
+ def save_logs(user_id_cont, database, collection, method, log_type, log_message, count, container_url):
183
+ """
184
+ Saves an individual log entry into logs.json inside the container.
185
+ Each log entry is keyed by a UUID and includes metadata and timestamp.
186
+
187
+ Args:
188
+ user_id_cont: User identifier/context.
189
+ database: Database name.
190
+ collection: Collection name.
191
+ method: HTTP method or operation type (e.g. POST, GET).
192
+ log_type: Log type e.g. SUCCESS, ERROR.
193
+ log_message: Detailed message or data for the log.
194
+ count: Number of records affected or relevant.
195
+ container_url: Blob Storage container SAS URL.
196
+ """
197
+ container_client = ContainerClient.from_container_url(container_url)
198
+ blob_path = f"{database}/{collection}/logs.json"
199
+ index_blob_client = container_client.get_blob_client(blob=blob_path)
200
+
201
+ try:
202
+ existing_blob = index_blob_client.download_blob().readall()
203
+ logs_data = json.loads(existing_blob)
204
+ except Exception:
205
+ logs_data = {}
206
+
207
+ log_id = str(uuid.uuid4())
208
+
209
+ logs_data[log_id] = {
210
+ "timestamp": datetime.datetime.now().isoformat(),
211
+ "method": method.upper(),
212
+ "type": log_type.upper(),
213
+ "log": log_message,
214
+ "count": count
215
+ }
216
+
217
+ # Upload updated logs
218
+ index_blob_client.upload_blob(
219
+ data=json.dumps(logs_data, indent=2),
220
+ overwrite=True
221
+ )
222
+
223
+ # ASYNC LOG SAVING
224
+ def async_log(user_id, db, collection, method, status, message, count, container_url):
225
+ """
226
+ Executes the save_logs function asynchronously in a separate thread,
227
+ allowing non-blocking log operations.
228
+
229
+ Args:
230
+ user_id: User identifier/context.
231
+ db: Database name.
232
+ collection: Collection name.
233
+ method: Operation method.
234
+ status: Log status (SUCCESS, ERROR, etc.).
235
+ message: Log message or data.
236
+ count: Number of affected records.
237
+ container_url: Blob Storage container SAS URL.
238
+ """
239
+ Thread(target=save_logs, args=(user_id, db, collection, method, status, message, count, container_url)).start()
240
+
241
+ # GENERATE UNIQUE ID KEY
242
+ def generate_unique_id(existing_ids):
243
+ """
244
+ Generates a new UUID string that does not collide with any IDs in existing_ids.
245
+
246
+ Args:
247
+ existing_ids: Iterable of already existing _id strings.
248
+
249
+ Returns:
250
+ Unique UUID string not in existing_ids.
251
+ """
252
+ while True:
253
+ new_id = str(uuid.uuid4())
254
+ if new_id not in existing_ids:
255
+ return new_id
256
+
257
+ # UPDATE INDEX DURING INSERT
258
+ def update_index_file_insert(user_id_cont, database, collection, record_id, shard_number, container_url):
259
+ """
260
+ Updates index.json to register a newly inserted record_id under the appropriate shard.
261
+
262
+ The index.json structure is a list of dicts mapping shard names to list of record IDs:
263
+ [
264
+ { "collection_1": ["id1", "id2", ...] },
265
+ { "collection_2": ["id3", "id4", ...] }
266
+ ]
267
+
268
+ Args:
269
+ user_id_cont: User identifier/context.
270
+ database: Database name.
271
+ collection: Collection name.
272
+ record_id: The _id of the inserted record.
273
+ shard_number: The shard number where the record was stored.
274
+ container_url: Blob Storage container SAS URL.
275
+
276
+ Returns:
277
+ The record_id inserted.
278
+ """
279
+ container_client = ContainerClient.from_container_url(container_url)
280
+ index_blob_client = container_client.get_blob_client(blob=f"{database}/{collection}/index.json")
281
+
282
+ index_data = []
283
+
284
+ if index_blob_client.exists():
285
+ try:
286
+ index_data = json.loads(index_blob_client.download_blob().readall())
287
+ except Exception:
288
+ index_data = []
289
+
290
+ shard_key = f"{collection}_{shard_number}"
291
+ shard_found = False
292
+
293
+ for shard in index_data:
294
+ if shard_key in shard:
295
+ shard[shard_key].append(record_id)
296
+ shard_found = True
297
+ break
298
+
299
+ if not shard_found:
300
+ index_data.append({shard_key: [record_id]})
301
+
302
+ index_blob_client.upload_blob(json.dumps(index_data), overwrite=True)
303
+
304
+ return record_id
305
+
306
+ # UPDATE INDEX FILE DURING DELETING
307
+ def update_index_file_delete(user_id_cont, database, collection, record_id, shard_number, container_url):
308
+ """
309
+ Removes a record_id from the index.json under the correct shard upon deletion.
310
+
311
+ Cleans up empty shard entries after removal.
312
+
313
+ Args:
314
+ user_id_cont: User identifier/context.
315
+ database: Database name.
316
+ collection: Collection name.
317
+ record_id: The _id of the deleted record.
318
+ shard_number: The shard number from which to remove the record.
319
+ container_url: Blob Storage container SAS URL.
320
+
321
+ Returns:
322
+ The record_id deleted.
323
+ """
324
+ container_client = ContainerClient.from_container_url(container_url)
325
+ index_blob_client = container_client.get_blob_client(blob=f"{database}/{collection}/index.json")
326
+
327
+ index_data = []
328
+
329
+ if index_blob_client.exists():
330
+ try:
331
+ index_data = json.loads(index_blob_client.download_blob().readall())
332
+ except Exception:
333
+ index_data = []
334
+
335
+ shard_key = f"{collection}_{shard_number}"
336
+
337
+ for shard in index_data:
338
+ if shard_key in shard:
339
+ if record_id in shard[shard_key]:
340
+ shard[shard_key].remove(record_id)
341
+ if not shard[shard_key]:
342
+ index_data.remove(shard)
343
+ break
344
+
345
+ index_blob_client.upload_blob(json.dumps(index_data), overwrite=True)
346
+
347
+ return record_id
348
+
349
+ # CALCULATE SHARD RECORDS
350
+ def calculate_shard_number(user_id_cont, database, collection, container_url):
351
+ """
352
+ Determines the shard number for storing a new record.
353
+
354
+ Logic:
355
+ - Lists existing shard files in the collection directory.
356
+ - Extracts shard numbers from filenames.
357
+ - Returns the highest shard number found, or 1 if none found.
358
+
359
+ Args:
360
+ user_id_cont: User identifier/context.
361
+ database: Database name.
362
+ collection: Collection name.
363
+ container_url: Blob Storage container SAS URL.
364
+
365
+ Returns:
366
+ Integer shard number to use.
367
+ """
368
+ container_client = ContainerClient.from_container_url(container_url)
369
+
370
+ directory_path = f"{database}/{collection}/"
371
+ blob_list = container_client.list_blobs(name_starts_with=directory_path)
372
+
373
+ shard_numbers = []
374
+ for blob in blob_list:
375
+ try:
376
+ parts = blob.name.split("_")
377
+ if blob.name.endswith(".msgpack"):
378
+ num = int(parts[1].split(".")[0])
379
+ shard_numbers.append(num)
380
+ except Exception:
381
+ continue
382
+ if shard_numbers:
383
+ next_shard = max(shard_numbers)
384
+ else:
385
+ next_shard = 1
386
+ return next_shard
387
+
388
+ # CHECK NESTED KEYS
389
+ def check_nested_key(data, key_path, value):
390
+ """
391
+ Recursively checks whether a nested key in a dictionary or list of dictionaries
392
+ matches the specified value.
393
+
394
+ Args:
395
+ data (dict or list): The data structure (dict or list of dicts) to search.
396
+ key_path (str): Dot-separated path to the nested key (e.g. "a.b.c").
397
+ value: The value to compare against.
398
+
399
+ Returns:
400
+ bool: True if the key exists at the nested path and equals the value, else False.
401
+ """
402
+ keys = key_path.split('.')
403
+
404
+ if not keys:
405
+ return False
406
+
407
+ current_key = keys[0]
408
+ remaining_keys = keys[1:]
409
+
410
+ if isinstance(data, dict):
411
+ if current_key in data:
412
+ if not remaining_keys:
413
+ if data[current_key] == value:
414
+ return True
415
+ else:
416
+ return check_nested_key(data[current_key], '.'.join(remaining_keys), value)
417
+ elif isinstance(data, list):
418
+ for item in data:
419
+ if isinstance(item, dict):
420
+ if check_nested_key(item, '.'.join(remaining_keys), value):
421
+ return True
422
+ return False
erioon/ping.py ADDED
@@ -0,0 +1,53 @@
1
+ # Copyright 2025-present Erioon, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # Visit www.erioon.com/dev-docs for more information about the python SDK
15
+
16
+ from erioon.functions import async_log
17
+ from azure.storage.blob import ContainerClient
18
+
19
+ # PING CONNECTION VERIFICATION
20
+ def handle_connection_ping(user_id, db_id, coll_id, container_url):
21
+ """
22
+ Checks if a specific collection exists within an Blob Storage container
23
+ and logs the status of the connection attempt asynchronously.
24
+
25
+ Parameters:
26
+ - user_id (str): Identifier of the user making the request.
27
+ - db_id (str): Database identifier (used as a folder prefix).
28
+ - coll_id (str): Collection identifier (used as a folder prefix).
29
+ - container_url (str): URL of the Blob Storage container.
30
+
31
+ Returns:
32
+ - tuple(dict, int): A tuple containing a status dictionary and an HTTP status code.
33
+ - If collection is found, returns status "OK" and HTTP 200.
34
+ - If collection is missing, returns status "KO" with HTTP 404.
35
+ - On any exception, returns status "KO" with HTTP 500.
36
+ """
37
+ try:
38
+ container_client = ContainerClient.from_container_url(container_url)
39
+ directory_path = f"{db_id}/{coll_id}/"
40
+
41
+ blobs = container_client.list_blobs(name_starts_with=directory_path)
42
+ blob_names = [blob.name for blob in blobs]
43
+
44
+ if not blob_names:
45
+ async_log(user_id, db_id, coll_id, "PING", "ERROR", f"No collection {coll_id} found.", 1, container_url)
46
+ return {"status": "KO", "error": f"No collection {coll_id} found."}, 404
47
+
48
+ async_log(user_id, db_id, coll_id, "PING", "SUCCESS", "Connection successful", 1, container_url)
49
+ return {"status": "OK", "message": "Connection successful"}, 200
50
+
51
+ except Exception as e:
52
+ async_log(user_id, db_id, coll_id, "PING", "ERROR", f"Connection failed: {str(e)}", 1, container_url)
53
+ return {"status": "KO", "error": "Connection failed", "message": str(e)}, 500