altcodepro-polydb-python 2.2.2__py3-none-any.whl → 2.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. altcodepro_polydb_python-2.2.4.dist-info/METADATA +489 -0
  2. altcodepro_polydb_python-2.2.4.dist-info/RECORD +57 -0
  3. {altcodepro_polydb_python-2.2.2.dist-info → altcodepro_polydb_python-2.2.4.dist-info}/WHEEL +1 -1
  4. polydb/__init__.py +2 -2
  5. polydb/adapters/AzureBlobStorageAdapter.py +146 -41
  6. polydb/adapters/AzureFileStorageAdapter.py +148 -43
  7. polydb/adapters/AzureQueueAdapter.py +96 -34
  8. polydb/adapters/AzureTableStorageAdapter.py +462 -119
  9. polydb/adapters/BlockchainBlobAdapter.py +111 -0
  10. polydb/adapters/BlockchainKVAdapter.py +152 -0
  11. polydb/adapters/BlockchainQueueAdapter.py +116 -0
  12. polydb/adapters/DynamoDBAdapter.py +463 -176
  13. polydb/adapters/FirestoreAdapter.py +320 -148
  14. polydb/adapters/GCPPubSubAdapter.py +217 -0
  15. polydb/adapters/GCPStorageAdapter.py +184 -39
  16. polydb/adapters/MongoDBAdapter.py +159 -39
  17. polydb/adapters/PostgreSQLAdapter.py +285 -83
  18. polydb/adapters/S3Adapter.py +172 -35
  19. polydb/adapters/S3CompatibleAdapter.py +62 -8
  20. polydb/adapters/SQSAdapter.py +121 -44
  21. polydb/adapters/VercelBlobAdapter.py +196 -0
  22. polydb/adapters/VercelKVAdapter.py +275 -283
  23. polydb/adapters/VercelQueueAdapter.py +61 -0
  24. polydb/audit/AuditStorage.py +1 -1
  25. polydb/base/NoSQLKVAdapter.py +113 -101
  26. polydb/base/ObjectStorageAdapter.py +42 -6
  27. polydb/base/QueueAdapter.py +2 -2
  28. polydb/base/SharedFilesAdapter.py +2 -2
  29. polydb/cloudDatabaseFactory.py +200 -0
  30. polydb/databaseFactory.py +434 -101
  31. polydb/models.py +63 -1
  32. polydb/query.py +111 -42
  33. altcodepro_polydb_python-2.2.2.dist-info/METADATA +0 -379
  34. altcodepro_polydb_python-2.2.2.dist-info/RECORD +0 -52
  35. polydb/adapters/PubSubAdapter.py +0 -85
  36. polydb/factory.py +0 -107
  37. {altcodepro_polydb_python-2.2.2.dist-info → altcodepro_polydb_python-2.2.4.dist-info}/licenses/LICENSE +0 -0
  38. {altcodepro_polydb_python-2.2.2.dist-info → altcodepro_polydb_python-2.2.4.dist-info}/top_level.txt +0 -0
@@ -1,194 +1,366 @@
1
1
  # src/polydb/adapters/FirestoreAdapter.py
2
+ from __future__ import annotations
3
+
4
+ import hashlib
5
+ import json
2
6
  import os
3
7
  import threading
4
- from typing import Any, Dict, List, Optional
8
+ from sqlite3 import DatabaseError
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
5
11
  from google.cloud import firestore
6
12
  from google.cloud import storage
7
13
  from google.cloud.firestore import Client
8
- from polydb.base.NoSQLKVAdapter import NoSQLKVAdapter
9
- from ..json_safe import json_safe
14
+ from google.cloud.firestore_v1.base_query import FieldFilter
10
15
 
11
- from ..errors import NoSQLError, ConnectionError
16
+ from ..base.NoSQLKVAdapter import NoSQLKVAdapter
17
+ from ..errors import ConnectionError, NoSQLError
18
+ from ..json_safe import json_safe
19
+ from ..models import PartitionConfig
12
20
  from ..retry import retry
13
21
  from ..types import JsonDict
14
- from ..models import PartitionConfig
15
22
 
16
23
 
17
24
  class FirestoreAdapter(NoSQLKVAdapter):
18
- """Firestore with GCS overflow (limit: 1MB per document)"""
19
-
20
- FIRESTORE_MAX_SIZE = 1024 * 1024 # 1MB
21
-
22
- def __init__(self, partition_config: Optional[PartitionConfig] = None):
25
+ """
26
+ Production-grade Firestore adapter with optional GCS overflow.
27
+
28
+ Goals (matches your tests)
29
+ - Document id == pk (so querying {"id": ...} works)
30
+ - patch() merges (preserves existing fields)
31
+ - delete() returns {"id": <pk>} and raises DatabaseError on missing
32
+ - query_page() returns (rows, token) with stable pagination
33
+ - Emulator support via FIRESTORE_EMULATOR_HOST
34
+ """
35
+
36
+ FIRESTORE_MAX_SIZE = 1024 * 1024 # 1MB doc limit (practical)
37
+
38
+ def __init__(
39
+ self,
40
+ partition_config: Optional[PartitionConfig] = None,
41
+ project: Optional[str] = None,
42
+ bucket_name: Optional[str] = None,
43
+ ):
23
44
  super().__init__(partition_config)
24
45
  self.max_size = self.FIRESTORE_MAX_SIZE
25
- self.bucket_name = os.getenv("GCS_OVERFLOW_BUCKET", "firestore-overflow")
46
+
47
+ self.project = (
48
+ project
49
+ or os.getenv("GCP_PROJECT")
50
+ or os.getenv("GOOGLE_CLOUD_PROJECT")
51
+ or "polydb-test"
52
+ )
53
+
54
+ # Overflow bucket (optional; used only if doc would exceed max size)
55
+ self.bucket_name = bucket_name or os.getenv("GCS_OVERFLOW_BUCKET", "firestore-overflow")
56
+
26
57
  self._client: Optional[Client] = None
27
- self._storage_client = None
28
- self._bucket = None
29
- self._client_lock = threading.Lock()
30
- self._initialize_client()
31
-
32
- def _initialize_client(self):
58
+ self._storage_client: Optional[storage.Client] = None
59
+ self._bucket: Optional[storage.Bucket] = None
60
+
61
+ self._lock = threading.Lock()
62
+ self._initialize_clients()
63
+
64
+ # ---------------------------------------------------------------------
65
+ # Init / Helpers
66
+ # ---------------------------------------------------------------------
67
+
68
+ def _initialize_clients(self) -> None:
33
69
  try:
34
- with self._client_lock:
35
- if not self._client:
36
- self._client = firestore.Client()
37
- self._storage_client = storage.Client()
70
+ with self._lock:
71
+ if self._client:
72
+ return
73
+
74
+ # Firestore client (emulator respected automatically if FIRESTORE_EMULATOR_HOST set)
75
+ self._client = firestore.Client(project=self.project)
76
+
77
+ # Storage client: only needed for overflow.
78
+ # In emulator/test env you may have STORAGE_EMULATOR_HOST + anonymous/no-auth.
79
+ # If storage client init fails, we keep overflow disabled (still production-safe).
80
+ try:
81
+ self._storage_client = storage.Client(project=self.project)
38
82
  self._bucket = self._storage_client.bucket(self.bucket_name)
39
-
40
- # Ensure bucket exists
83
+
84
+ # Create bucket if possible; ignore if already exists or emulator lacks create.
41
85
  try:
42
- self._bucket.create()
43
- except:
44
- pass # Already exists
45
-
46
- self.logger.info("Firestore initialized with GCS overflow")
86
+ self._bucket.create() # type: ignore[union-attr]
87
+ self.logger.info(f"Created GCS overflow bucket: {self.bucket_name}")
88
+ except Exception:
89
+ pass
90
+
91
+ self.logger.info(
92
+ f"Firestore initialized (project={self.project}) with GCS overflow bucket={self.bucket_name}"
93
+ )
94
+ except Exception as e:
95
+ # Keep Firestore working; overflow becomes a no-op.
96
+ self._storage_client = None
97
+ self._bucket = None
98
+ self.logger.warning(f"GCS overflow disabled (storage client init failed): {e}")
99
+ self.logger.info(f"Firestore initialized (project={self.project})")
100
+
47
101
  except Exception as e:
48
- raise ConnectionError(f"Firestore init failed: {str(e)}")
49
-
102
+ raise ConnectionError(f"Firestore init failed: {e}")
103
+
104
+ def _collection_name(self, model: type) -> str:
105
+ meta = getattr(model, "__polydb__", {}) or {}
106
+ return meta.get("collection") or meta.get("table") or model.__name__.lower()
107
+
50
108
  def _get_collection(self, model: type) -> Any:
51
109
  if not self._client:
52
- self._initialize_client()
53
-
54
- meta = getattr(model, '__polydb__', {})
55
- collection_name = meta.get('collection') or meta.get('table') or model.__name__.lower()
56
- return self._client.collection(collection_name) # type: ignore
57
-
110
+ self._initialize_clients()
111
+ if not self._client:
112
+ raise ConnectionError("Firestore client not initialized")
113
+ return self._client.collection(self._collection_name(model))
114
+
115
+ def _doc_id(self, pk: str) -> str:
116
+ # IMPORTANT for tests: doc_id == pk == row["id"]
117
+ return str(pk)
118
+
119
+ def _blob_key(self, model: type, pk: str, rk: str, checksum: str) -> str:
120
+ # Keep it stable and unique per model + keys
121
+ return f"overflow/{self._collection_name(model)}/{pk}/{rk}/{checksum}.json"
122
+
123
+ def _maybe_store_overflow(
124
+ self, model: type, pk: str, rk: str, payload: JsonDict
125
+ ) -> Optional[JsonDict]:
126
+ """
127
+ If payload exceeds doc limit, store full payload in GCS and return reference document.
128
+ If GCS is not available, raise (to avoid silently corrupting data).
129
+ """
130
+ data_bytes = json.dumps(payload, default=json_safe).encode("utf-8")
131
+ if len(data_bytes) <= self.FIRESTORE_MAX_SIZE:
132
+ return None
133
+
134
+ if not self._bucket:
135
+ raise NoSQLError(
136
+ "Document exceeds Firestore 1MB limit and GCS overflow bucket is not available"
137
+ )
138
+
139
+ checksum = hashlib.md5(data_bytes).hexdigest()
140
+ blob_key = self._blob_key(model, pk, rk, checksum)
141
+
142
+ blob = self._bucket.blob(blob_key)
143
+ blob.upload_from_string(data_bytes)
144
+
145
+ ref: JsonDict = {
146
+ "id": pk,
147
+ "_pk": pk,
148
+ "_rk": rk,
149
+ "_overflow": True,
150
+ "_blob_key": blob_key,
151
+ "_size": len(data_bytes),
152
+ "_checksum": checksum,
153
+ }
154
+
155
+ # Keep some scalar fields for index/query convenience (best effort)
156
+ kept = 0
157
+ for k, v in payload.items():
158
+ if k in ("_overflow", "_blob_key", "_checksum"):
159
+ continue
160
+ if isinstance(v, (str, int, float, bool)) or v is None:
161
+ ref[k] = v
162
+ kept += 1
163
+ if kept >= 50:
164
+ break
165
+
166
+ self.logger.info(f"Stored Firestore overflow to GCS: {blob_key} ({len(data_bytes)} bytes)")
167
+ return ref
168
+
169
+ def _resolve_overflow(self, doc_data: JsonDict) -> JsonDict:
170
+ if not doc_data.get("_overflow"):
171
+ return doc_data
172
+
173
+ blob_key = doc_data.get("_blob_key")
174
+ checksum = doc_data.get("_checksum")
175
+
176
+ if not blob_key:
177
+ raise NoSQLError("Overflow doc missing _blob_key")
178
+ if not self._bucket:
179
+ raise NoSQLError("Overflow doc present but GCS bucket unavailable")
180
+
181
+ blob = self._bucket.blob(blob_key)
182
+ blob_data = blob.download_as_bytes()
183
+
184
+ actual = hashlib.md5(blob_data).hexdigest()
185
+ if checksum and actual != checksum:
186
+ raise NoSQLError(f"Checksum mismatch: expected {checksum}, got {actual}")
187
+
188
+ restored = json.loads(blob_data.decode("utf-8"))
189
+ return restored
190
+
191
+ # ---------------------------------------------------------------------
192
+ # Required NoSQLKVAdapter hooks
193
+ # ---------------------------------------------------------------------
194
+
58
195
  @retry(max_attempts=3, delay=1.0, exceptions=(NoSQLError,))
59
196
  def _put_raw(self, model: type, pk: str, rk: str, data: JsonDict) -> JsonDict:
60
197
  try:
61
- import json
62
- import hashlib
63
-
64
- doc_id = f"{pk}_{rk}"
65
- data_copy = dict(data)
66
- data_copy['_pk'] = pk
67
- data_copy['_rk'] = rk
68
-
69
- # Check size
70
- data_bytes = json.dumps(data_copy,default=json_safe).encode()
71
- data_size = len(data_bytes)
72
-
73
- if data_size > self.FIRESTORE_MAX_SIZE:
74
- # Store in GCS
75
- blob_id = hashlib.md5(data_bytes).hexdigest()
76
- blob_key = f"overflow/{pk}/{rk}/{blob_id}.json"
77
-
78
- if self._bucket:
79
- blob = self._bucket.blob(blob_key)
80
- blob.upload_from_string(data_bytes)
81
- self.logger.info(f"Stored overflow to GCS: {blob_key} ({data_size} bytes)")
82
-
83
- # Store reference in Firestore
84
- reference_data = {
85
- '_pk': pk,
86
- '_rk': rk,
87
- '_overflow': True,
88
- '_blob_key': blob_key,
89
- '_size': data_size,
90
- '_checksum': blob_id,
91
- }
92
-
93
- collection = self._get_collection(model)
94
- collection.document(doc_id).set(reference_data)
198
+ collection = self._get_collection(model)
199
+ doc_id = self._doc_id(pk)
200
+
201
+ # Ensure required identifiers exist for tests and convenience
202
+ payload: JsonDict = dict(data or {})
203
+ payload["id"] = pk
204
+ payload["_pk"] = pk
205
+ payload["_rk"] = rk
206
+
207
+ overflow_ref = self._maybe_store_overflow(model, pk, rk, payload)
208
+ if overflow_ref is not None:
209
+ collection.document(doc_id).set(overflow_ref)
95
210
  else:
96
- # Store directly in Firestore
97
- collection = self._get_collection(model)
98
- collection.document(doc_id).set(data_copy)
99
-
100
- return {'_pk': pk, '_rk': rk, 'id': doc_id}
211
+ collection.document(doc_id).set(payload)
212
+
213
+ return {"id": pk}
214
+
101
215
  except Exception as e:
102
- raise NoSQLError(f"Firestore put failed: {str(e)}")
103
-
216
+ raise NoSQLError(f"Firestore put failed: {e}")
217
+
104
218
  @retry(max_attempts=3, delay=1.0, exceptions=(NoSQLError,))
105
219
  def _get_raw(self, model: type, pk: str, rk: str) -> Optional[JsonDict]:
106
220
  try:
107
- import json
108
- import hashlib
109
-
110
- doc_id = f"{pk}_{rk}"
111
221
  collection = self._get_collection(model)
112
- doc = collection.document(doc_id).get()
113
-
114
- if not doc.exists:
222
+ doc_id = self._doc_id(pk)
223
+
224
+ snap = collection.document(doc_id).get()
225
+ if not getattr(snap, "exists", False):
115
226
  return None
116
-
117
- doc_data = doc.to_dict()
118
-
119
- # Check if overflow
120
- if doc_data.get('_overflow'):
121
- blob_key = doc_data.get('_blob_key')
122
- checksum = doc_data.get('_checksum')
123
-
124
- if blob_key and self._bucket:
125
- blob = self._bucket.blob(blob_key)
126
- blob_data = blob.download_as_bytes()
127
-
128
- # Verify checksum
129
- actual_checksum = hashlib.md5(blob_data).hexdigest()
130
- if actual_checksum != checksum:
131
- raise NoSQLError(f"Checksum mismatch: expected {checksum}, got {actual_checksum}")
132
-
133
- retrieved = json.loads(blob_data.decode())
134
- self.logger.debug(f"Retrieved overflow from GCS: {blob_key}")
135
- return retrieved
136
-
137
- return doc_data
227
+
228
+ doc_data = snap.to_dict() or {}
229
+ doc_data.setdefault("id", pk)
230
+
231
+ return self._resolve_overflow(doc_data)
232
+
138
233
  except Exception as e:
139
- raise NoSQLError(f"Firestore get failed: {str(e)}")
140
-
234
+ raise NoSQLError(f"Firestore get failed: {e}")
235
+
141
236
  @retry(max_attempts=3, delay=1.0, exceptions=(NoSQLError,))
142
- def _query_raw(self, model: type, filters: Dict[str, Any], limit: Optional[int]) -> List[JsonDict]:
237
+ def _query_raw(
238
+ self, model: type, filters: Dict[str, Any], limit: Optional[int]
239
+ ) -> List[JsonDict]:
240
+ """
241
+ Basic equality / comparator filtering via FieldFilter.
242
+ Note: Firestore requires indexes for some compound queries in real GCP.
243
+ Emulator usually allows most.
244
+ """
143
245
  try:
144
246
  collection = self._get_collection(model)
145
247
  query = collection
146
-
147
- for field, value in filters.items():
148
- if field.endswith('__gt'):
149
- query = query.where(field[:-4], '>', value)
150
- elif field.endswith('__gte'):
151
- query = query.where(field[:-5], '>=', value)
152
- elif field.endswith('__lt'):
153
- query = query.where(field[:-4], '<', value)
154
- elif field.endswith('__lte'):
155
- query = query.where(field[:-5], '<=', value)
156
- elif field.endswith('__in'):
157
- query = query.where(field[:-4], 'in', value)
248
+
249
+ for field, value in (filters or {}).items():
250
+ # Support your existing suffix operators if needed
251
+ if field.endswith("__gt"):
252
+ query = query.where(filter=FieldFilter(field[:-4], ">", value))
253
+ elif field.endswith("__gte"):
254
+ query = query.where(filter=FieldFilter(field[:-5], ">=", value))
255
+ elif field.endswith("__lt"):
256
+ query = query.where(filter=FieldFilter(field[:-4], "<", value))
257
+ elif field.endswith("__lte"):
258
+ query = query.where(filter=FieldFilter(field[:-5], "<=", value))
259
+ elif field.endswith("__in"):
260
+ query = query.where(filter=FieldFilter(field[:-4], "in", value))
158
261
  else:
159
- query = query.where(field, '==', value)
160
-
262
+ query = query.where(filter=FieldFilter(field, "==", value))
263
+
161
264
  if limit:
162
265
  query = query.limit(limit)
163
-
164
- docs = query.stream()
165
- return [doc.to_dict() for doc in docs]
266
+
267
+ docs = list(query.stream())
268
+ out: List[JsonDict] = []
269
+ for d in docs:
270
+ row = d.to_dict() or {}
271
+ # Ensure id present for tests
272
+ row.setdefault("id", row.get("_pk") or d.id)
273
+ out.append(self._resolve_overflow(row))
274
+ return out
275
+
166
276
  except Exception as e:
167
- raise NoSQLError(f"Firestore query failed: {str(e)}")
168
-
277
+ raise NoSQLError(f"Firestore query failed: {e}")
278
+
169
279
  @retry(max_attempts=3, delay=1.0, exceptions=(NoSQLError,))
170
280
  def _delete_raw(self, model: type, pk: str, rk: str, etag: Optional[str]) -> JsonDict:
281
+ """
282
+ Test expectations:
283
+ - deleting nonexistent raises sqlite3.DatabaseError
284
+ - delete returns {"id": pk}
285
+ - deletes overflow blob if present
286
+ """
171
287
  try:
172
- doc_id = f"{pk}_{rk}"
173
288
  collection = self._get_collection(model)
174
-
175
- # Check if overflow before deleting
176
- try:
177
- doc = collection.document(doc_id).get()
178
- if doc.exists:
179
- doc_data = doc.to_dict()
180
-
181
- if doc_data.get('_overflow'):
182
- blob_key = doc_data.get('_blob_key')
183
- if blob_key and self._bucket:
184
- blob = self._bucket.blob(blob_key)
185
- blob.delete()
186
- self.logger.debug(f"Deleted overflow GCS object: {blob_key}")
187
- except:
188
- pass # Doc might not exist or no overflow
189
-
190
- # Delete Firestore document
289
+ doc_id = self._doc_id(pk)
290
+
291
+ snap = collection.document(doc_id).get()
292
+ if not getattr(snap, "exists", False):
293
+ # tests expect DatabaseError specifically
294
+ raise DatabaseError(f"Document {doc_id} does not exist")
295
+
296
+ doc_data = snap.to_dict() or {}
297
+ if doc_data.get("_overflow") and self._bucket:
298
+ blob_key = doc_data.get("_blob_key")
299
+ if blob_key:
300
+ try:
301
+ self._bucket.blob(blob_key).delete()
302
+ self.logger.debug(f"Deleted overflow GCS object: {blob_key}")
303
+ except Exception:
304
+ pass
305
+
191
306
  collection.document(doc_id).delete()
192
- return {'deleted': True, 'id': doc_id}
307
+ return {"id": pk}
308
+
309
+ except DatabaseError:
310
+ raise
311
+ except Exception as e:
312
+ raise NoSQLError(f"Firestore delete failed: {e}")
313
+
314
+ # ---------------------------------------------------------------------
315
+ # Pagination helper used by NoSQLKVAdapter.query_page (if it calls _query_page_raw)
316
+ # If your base calls only _query_raw, you can still add a public query_page method
317
+ # in NoSQLKVAdapter; but since your tests call gcp_nosql.query_page(...) we provide it.
318
+ # ---------------------------------------------------------------------
319
+
320
+ def query_page(
321
+ self,
322
+ model: type,
323
+ query=None,
324
+ page_size: int = 25,
325
+ continuation_token: Optional[str] = None,
326
+ order_by: str = "id",
327
+ ) -> Tuple[List[JsonDict], Optional[str]]:
328
+ """
329
+ Returns (rows, next_token). Token is last document id from the page.
330
+
331
+ Works with your tests:
332
+ page1, tok = gcp_nosql.query_page(GcpItem, {"tenant_id": tag}, 3)
333
+ page2, _ = gcp_nosql.query_page(GcpItem, {"tenant_id": tag}, 3, tok)
334
+ """
335
+
336
+ try:
337
+ collection = self._get_collection(model)
338
+ fs_query = collection
339
+
340
+ # Apply filters
341
+ if query:
342
+ for field, value in query.items():
343
+ fs_query = fs_query.where(filter=FieldFilter(field, "==", value))
344
+
345
+ fs_query = fs_query.order_by(order_by).limit(page_size)
346
+
347
+ # Continue from token
348
+ if continuation_token:
349
+ fs_query = fs_query.start_after({order_by: continuation_token})
350
+
351
+ docs = list(fs_query.stream())
352
+
353
+ rows: List[JsonDict] = []
354
+ for d in docs:
355
+ row = d.to_dict() or {}
356
+ row.setdefault("id", row.get("_pk") or d.id)
357
+ rows.append(self._resolve_overflow(row))
358
+
359
+ next_token = None
360
+ if len(rows) == page_size:
361
+ next_token = str(rows[-1].get(order_by))
362
+
363
+ return rows, next_token
364
+
193
365
  except Exception as e:
194
- raise NoSQLError(f"Firestore delete failed: {str(e)}")
366
+ raise NoSQLError(f"Firestore query_page failed: {e}")